summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl')
-rw-r--r--security/nss/lib/freebl/Makefile806
-rw-r--r--security/nss/lib/freebl/aes-armv8.c1169
-rw-r--r--security/nss/lib/freebl/aes-armv8.h103
-rw-r--r--security/nss/lib/freebl/aes-x86.c184
-rw-r--r--security/nss/lib/freebl/aeskeywrap.c642
-rw-r--r--security/nss/lib/freebl/alghmac.c209
-rw-r--r--security/nss/lib/freebl/alghmac.h70
-rw-r--r--security/nss/lib/freebl/altivec-types.h25
-rw-r--r--security/nss/lib/freebl/arcfive.c87
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-gas.s88
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-masm.asm107
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-sun.s84
-rw-r--r--security/nss/lib/freebl/arcfour.c594
-rw-r--r--security/nss/lib/freebl/blake2b.c428
-rw-r--r--security/nss/lib/freebl/blake2b.h23
-rw-r--r--security/nss/lib/freebl/blapi.h1926
-rw-r--r--security/nss/lib/freebl/blapii.h123
-rw-r--r--security/nss/lib/freebl/blapit.h474
-rw-r--r--security/nss/lib/freebl/blinit.c584
-rw-r--r--security/nss/lib/freebl/blname.c100
-rw-r--r--security/nss/lib/freebl/camellia.c1896
-rw-r--r--security/nss/lib/freebl/camellia.h42
-rw-r--r--security/nss/lib/freebl/chacha20-ppc64le.S668
-rw-r--r--security/nss/lib/freebl/chacha20poly1305-ppc.c588
-rw-r--r--security/nss/lib/freebl/chacha20poly1305.c549
-rw-r--r--security/nss/lib/freebl/chacha20poly1305.h21
-rw-r--r--security/nss/lib/freebl/cmac.c323
-rw-r--r--security/nss/lib/freebl/cmac.h47
-rw-r--r--security/nss/lib/freebl/config.mk93
-rw-r--r--security/nss/lib/freebl/crypto_primitives.c36
-rw-r--r--security/nss/lib/freebl/crypto_primitives.h66
-rw-r--r--security/nss/lib/freebl/ctr.c276
-rw-r--r--security/nss/lib/freebl/ctr.h52
-rw-r--r--security/nss/lib/freebl/cts.c303
-rw-r--r--security/nss/lib/freebl/cts.h33
-rw-r--r--security/nss/lib/freebl/deprecated/alg2268.c509
-rw-r--r--security/nss/lib/freebl/deprecated/seed.c671
-rw-r--r--security/nss/lib/freebl/deprecated/seed.h125
-rw-r--r--security/nss/lib/freebl/des.c676
-rw-r--r--security/nss/lib/freebl/des.h43
-rw-r--r--security/nss/lib/freebl/desblapi.c256
-rw-r--r--security/nss/lib/freebl/det_rng.c163
-rw-r--r--security/nss/lib/freebl/det_rng.h12
-rw-r--r--security/nss/lib/freebl/dh.c480
-rw-r--r--security/nss/lib/freebl/drbg.c1024
-rw-r--r--security/nss/lib/freebl/dsa.c691
-rw-r--r--security/nss/lib/freebl/ec.c1319
-rw-r--r--security/nss/lib/freebl/ec.h24
-rw-r--r--security/nss/lib/freebl/ecdecode.c269
-rw-r--r--security/nss/lib/freebl/ecl/README163
-rw-r--r--security/nss/lib/freebl/ecl/curve25519_32.c1213
-rw-r--r--security/nss/lib/freebl/ecl/curve25519_64.c24
-rw-r--r--security/nss/lib/freebl/ecl/ec_naf.c68
-rw-r--r--security/nss/lib/freebl/ecl/ecl-curve.h245
-rw-r--r--security/nss/lib/freebl/ecl/ecl-exp.h167
-rw-r--r--security/nss/lib/freebl/ecl/ecl-priv.h252
-rw-r--r--security/nss/lib/freebl/ecl/ecl.c329
-rw-r--r--security/nss/lib/freebl/ecl/ecl.h64
-rw-r--r--security/nss/lib/freebl/ecl/ecl_gf.c958
-rw-r--r--security/nss/lib/freebl/ecl/ecl_mult.c305
-rw-r--r--security/nss/lib/freebl/ecl/eclt.h30
-rw-r--r--security/nss/lib/freebl/ecl/ecp.h106
-rw-r--r--security/nss/lib/freebl/ecl/ecp_25519.c145
-rw-r--r--security/nss/lib/freebl/ecl/ecp_256.c401
-rw-r--r--security/nss/lib/freebl/ecl/ecp_256_32.c1535
-rw-r--r--security/nss/lib/freebl/ecl/ecp_384.c258
-rw-r--r--security/nss/lib/freebl/ecl/ecp_521.c137
-rw-r--r--security/nss/lib/freebl/ecl/ecp_aff.c308
-rw-r--r--security/nss/lib/freebl/ecl/ecp_jac.c513
-rw-r--r--security/nss/lib/freebl/ecl/ecp_jm.c297
-rw-r--r--security/nss/lib/freebl/ecl/ecp_mont.c154
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp256r1.c258
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp384r1.c20411
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp384r1.h41
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp384r1_wrap.c228
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp521r1.c11622
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp521r1.h33
-rw-r--r--security/nss/lib/freebl/ecl/ecp_secp521r1_wrap.c255
-rw-r--r--security/nss/lib/freebl/exports.gyp51
-rw-r--r--security/nss/lib/freebl/fipsfreebl.c2059
-rw-r--r--security/nss/lib/freebl/freebl.def26
-rw-r--r--security/nss/lib/freebl/freebl.gyp952
-rw-r--r--security/nss/lib/freebl/freebl.rc68
-rw-r--r--security/nss/lib/freebl/freebl_base.gypi249
-rw-r--r--security/nss/lib/freebl/freebl_hash.def39
-rw-r--r--security/nss/lib/freebl/freebl_hash_vector.def34
-rw-r--r--security/nss/lib/freebl/freeblver.c18
-rw-r--r--security/nss/lib/freebl/gcm-aarch64.c96
-rw-r--r--security/nss/lib/freebl/gcm-arm32-neon.c202
-rw-r--r--security/nss/lib/freebl/gcm-ppc.c109
-rw-r--r--security/nss/lib/freebl/gcm-x86.c127
-rw-r--r--security/nss/lib/freebl/gcm.c1171
-rw-r--r--security/nss/lib/freebl/gcm.h125
-rw-r--r--security/nss/lib/freebl/genload.c167
-rw-r--r--security/nss/lib/freebl/hmacct.c325
-rw-r--r--security/nss/lib/freebl/hmacct.h38
-rw-r--r--security/nss/lib/freebl/intel-aes-x64-masm.asm964
-rw-r--r--security/nss/lib/freebl/intel-aes-x86-masm.asm942
-rw-r--r--security/nss/lib/freebl/intel-aes.h143
-rw-r--r--security/nss/lib/freebl/intel-aes.s2485
-rw-r--r--security/nss/lib/freebl/intel-gcm-wrap.c475
-rw-r--r--security/nss/lib/freebl/intel-gcm-x64-masm.asm1294
-rw-r--r--security/nss/lib/freebl/intel-gcm-x86-masm.asm1207
-rw-r--r--security/nss/lib/freebl/intel-gcm.h97
-rw-r--r--security/nss/lib/freebl/intel-gcm.s1340
-rw-r--r--security/nss/lib/freebl/jpake.c495
-rw-r--r--security/nss/lib/freebl/kyber-pqcrystals-ref.c2693
-rw-r--r--security/nss/lib/freebl/kyber-pqcrystals-ref.h144
-rw-r--r--security/nss/lib/freebl/kyber.c205
-rw-r--r--security/nss/lib/freebl/ldvector.c493
-rw-r--r--security/nss/lib/freebl/loader.c2856
-rw-r--r--security/nss/lib/freebl/loader.h1028
-rw-r--r--security/nss/lib/freebl/lowhash_vector.c224
-rw-r--r--security/nss/lib/freebl/manifest.mn212
-rw-r--r--security/nss/lib/freebl/md2.c269
-rw-r--r--security/nss/lib/freebl/md5.c598
-rw-r--r--security/nss/lib/freebl/mknewpc2.c208
-rw-r--r--security/nss/lib/freebl/mksp.c119
-rw-r--r--security/nss/lib/freebl/mpi/README646
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE11
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE-MPL3
-rw-r--r--security/nss/lib/freebl/mpi/doc/basecvt.pod65
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/doc/build30
-rw-r--r--security/nss/lib/freebl/mpi/doc/div.txt64
-rw-r--r--security/nss/lib/freebl/mpi/doc/expt.txt94
-rw-r--r--security/nss/lib/freebl/mpi/doc/gcd.pod28
-rw-r--r--security/nss/lib/freebl/mpi/doc/invmod.pod34
-rw-r--r--security/nss/lib/freebl/mpi/doc/isprime.pod63
-rw-r--r--security/nss/lib/freebl/mpi/doc/lap.pod36
-rw-r--r--security/nss/lib/freebl/mpi/doc/mpi-test.pod51
-rw-r--r--security/nss/lib/freebl/mpi/doc/mul.txt77
-rw-r--r--security/nss/lib/freebl/mpi/doc/pi.txt53
-rw-r--r--security/nss/lib/freebl/mpi/doc/prime.txt6542
-rw-r--r--security/nss/lib/freebl/mpi/doc/prng.pod38
-rw-r--r--security/nss/lib/freebl/mpi/doc/redux.txt86
-rw-r--r--security/nss/lib/freebl/mpi/doc/sqrt.txt50
-rw-r--r--security/nss/lib/freebl/mpi/doc/square.txt72
-rw-r--r--security/nss/lib/freebl/mpi/doc/timing.txt213
-rw-r--r--security/nss/lib/freebl/mpi/hpma512.s615
-rw-r--r--security/nss/lib/freebl/mpi/hppa20.s904
-rw-r--r--security/nss/lib/freebl/mpi/logtab.h28
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.c286
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.h65
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.s1938
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.s1818
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.il93
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.s2346
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba.c3235
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm13066
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s16097
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m-priv.h73
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.c677
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.h28
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache.c788
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_amd64.s861
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_x86.s902
-rw-r--r--security/nss/lib/freebl/mpi/mpi-config.h56
-rw-r--r--security/nss/lib/freebl/mpi/mpi-priv.h246
-rw-r--r--security/nss/lib/freebl/mpi/mpi.c5241
-rw-r--r--security/nss/lib/freebl/mpi/mpi.h363
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64.c32
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_common.S409
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_masm.asm388
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_sun.s385
-rw-r--r--security/nss/lib/freebl/mpi/mpi_arm.c175
-rw-r--r--security/nss/lib/freebl/mpi/mpi_hp.c81
-rw-r--r--security/nss/lib/freebl/mpi/mpi_i86pc.s313
-rw-r--r--security/nss/lib/freebl/mpi/mpi_mips.s472
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sparc.c226
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sse2.s294
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86.s541
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_asm.c531
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_os2.s538
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.c460
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.h55
-rw-r--r--security/nss/lib/freebl/mpi/mpmontg.c1160
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.c610
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.h48
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparc.c221
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv8.s1607
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv9.s1645
-rw-r--r--security/nss/lib/freebl/mpi/mpvalpha.c183
-rw-r--r--security/nss/lib/freebl/mpi/mulsqr.c84
-rw-r--r--security/nss/lib/freebl/mpi/primes.c841
-rw-r--r--security/nss/lib/freebl/mpi/vis_32.il1291
-rw-r--r--security/nss/lib/freebl/mpi/vis_64.il997
-rw-r--r--security/nss/lib/freebl/mpi/vis_proto.h234
-rw-r--r--security/nss/lib/freebl/nsslowhash.c131
-rw-r--r--security/nss/lib/freebl/nsslowhash.h33
-rw-r--r--security/nss/lib/freebl/ppc-crypto.h31
-rw-r--r--security/nss/lib/freebl/ppc-gcm-wrap.c458
-rw-r--r--security/nss/lib/freebl/ppc-gcm.h76
-rw-r--r--security/nss/lib/freebl/ppc-gcm.s1051
-rw-r--r--security/nss/lib/freebl/pqg.c1926
-rw-r--r--security/nss/lib/freebl/pqg.h28
-rw-r--r--security/nss/lib/freebl/rawhash.c198
-rw-r--r--security/nss/lib/freebl/ret_cr16.s27
-rw-r--r--security/nss/lib/freebl/rijndael.c1265
-rw-r--r--security/nss/lib/freebl/rijndael.h80
-rw-r--r--security/nss/lib/freebl/rijndael32.tab1219
-rw-r--r--security/nss/lib/freebl/rijndael_tables.c213
-rw-r--r--security/nss/lib/freebl/rsa.c1725
-rw-r--r--security/nss/lib/freebl/rsa_blind.c471
-rw-r--r--security/nss/lib/freebl/rsapkcs.c1706
-rw-r--r--security/nss/lib/freebl/scripts/LICENSE36
-rwxr-xr-xsecurity/nss/lib/freebl/scripts/gen.sh19
-rw-r--r--security/nss/lib/freebl/scripts/ppc-xlate.pl352
-rw-r--r--security/nss/lib/freebl/scripts/sha512p8-ppc.pl413
-rw-r--r--security/nss/lib/freebl/secmpi.c28
-rw-r--r--security/nss/lib/freebl/secmpi.h63
-rw-r--r--security/nss/lib/freebl/secrng.h65
-rw-r--r--security/nss/lib/freebl/sha-fast-amd64-sun.s2151
-rw-r--r--security/nss/lib/freebl/sha1-armv8.c264
-rw-r--r--security/nss/lib/freebl/sha256-armv8.c203
-rw-r--r--security/nss/lib/freebl/sha256-x86.c236
-rw-r--r--security/nss/lib/freebl/sha256.h27
-rw-r--r--security/nss/lib/freebl/sha3.c288
-rw-r--r--security/nss/lib/freebl/sha512-p8.s851
-rw-r--r--security/nss/lib/freebl/sha512.c1776
-rw-r--r--security/nss/lib/freebl/sha_fast.c592
-rw-r--r--security/nss/lib/freebl/sha_fast.h186
-rw-r--r--security/nss/lib/freebl/shake.c128
-rw-r--r--security/nss/lib/freebl/shsign.h26
-rw-r--r--security/nss/lib/freebl/shvfy.c664
-rw-r--r--security/nss/lib/freebl/stubs.c879
-rw-r--r--security/nss/lib/freebl/stubs.h72
-rw-r--r--security/nss/lib/freebl/sysrand.c18
-rw-r--r--security/nss/lib/freebl/tlsprfalg.c134
-rw-r--r--security/nss/lib/freebl/unix_rand.c811
-rw-r--r--security/nss/lib/freebl/unix_urandom.c84
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20.c227
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20.h60
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c1216
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h104
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c1218
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h104
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c630
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h104
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c821
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h60
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c1209
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h60
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_51.c320
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_51.h72
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_64.c424
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_64.h72
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Hash_SHA3.c713
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Hash_SHA3.h121
-rw-r--r--security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics.h83
-rw-r--r--security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics_128.h72
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Krmllib.h59
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P256.c1829
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P256.h237
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P384.c126
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P384.h68
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P521.c131
-rw-r--r--security/nss/lib/freebl/verified/Hacl_P521.h59
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_128.c1617
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_128.h64
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_256.c2088
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_256.h64
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.c573
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.h56
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Streaming_Types.h78
-rw-r--r--security/nss/lib/freebl/verified/config.h0
-rw-r--r--security/nss/lib/freebl/verified/curve25519-inline.h942
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Bignum.h315
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Bignum25519_51.h680
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Bignum_Base.h444
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h51
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h55
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA3.h62
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h67
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_P256.h56
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_P256_PrecompTable.h508
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h53
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h53
-rw-r--r--security/nss/lib/freebl/verified/internal/Hacl_Spec.h55
-rw-r--r--security/nss/lib/freebl/verified/internal/Vale.h185
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h13
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h89
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h16
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h27
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h32
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h57
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h375
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h105
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h5
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h243
-rw-r--r--security/nss/lib/freebl/verified/karamel/include/krmllib.h28
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h76
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h328
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h219
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h25
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic56
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include5
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h225
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h571
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h84
-rw-r--r--security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def11
-rw-r--r--security/nss/lib/freebl/verified/lib_intrinsics.h93
-rw-r--r--security/nss/lib/freebl/verified/libintvector.h914
-rw-r--r--security/nss/lib/freebl/win_rand.c161
305 files changed, 198994 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile
new file mode 100644
index 0000000000..eeee90a97f
--- /dev/null
+++ b/security/nss/lib/freebl/Makefile
@@ -0,0 +1,806 @@
+#! gmake
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#######################################################################
+# (1) Include initial platform-independent assignments (MANDATORY). #
+#######################################################################
+
+include manifest.mn
+
+#######################################################################
+# (2) Include "global" configuration information. (OPTIONAL) #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/config.mk
+
+#######################################################################
+# (3) Include "component" configuration information. (OPTIONAL) #
+#######################################################################
+
+
+
+#######################################################################
+# (4) Include "local" platform-dependent assignments (OPTIONAL). #
+#######################################################################
+
+include config.mk
+
+# default for all platforms
+# unset this on those that have multiple freebl libraries
+FREEBL_BUILD_SINGLE_SHLIB = 1
+
+ifdef USE_64
+ DEFINES += -DNSS_USE_64
+endif
+
+ifdef USE_ABI32_FPU
+ DEFINES += -DNSS_USE_ABI32_FPU
+endif
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+ DEFINES += -DFREEBL_NO_DEPEND
+ STUBS_SRCS = stubs.c
+endif
+
+ifeq ($(FREEBL_LOWHASH),1)
+ DEFINES += -DFREEBL_LOWHASH
+ LOWHASH_SRCS = nsslowhash.c
+ LOWHASH_EXPORTS = nsslowhash.h
+ MAPFILE_SOURCE = freebl_hash_vector.def
+ NEED_STUB_BUILD = 1
+else
+ MAPFILE_SOURCE = freebl.def
+endif
+
+ifdef USE_STUB_BUILD
+ CSRCS = lowhash_vector.c
+ SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX))
+ OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+ ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+ $(NOSUCHFILE) so_locations
+ MAPFILE_SOURCE = freebl_hash.def
+endif
+
+# FREEBL_USE_PRELINK
+#
+# Most modern version of Linux support a speed optimization scheme where an
+# application called prelink modifies programs and shared libraries to quickly
+# load if they fit into an already designed address space. In short, prelink
+# scans the list of programs and libraries on your system, assigns them a
+# predefined space in the the address space, then provides the fixups to the
+# library.
+#
+# The modification of the shared library is correctly detected by the freebl
+# FIPS checksum scheme where we check a signed hash of the library against the
+# library itself.
+#
+# The prelink command itself can reverse the process of modification and output
+# the prestine shared library as it was before prelink made it's changes.
+# This option tells Freebl could use prelink to output the original copy of
+# the shared library before prelink modified it.
+#
+# FREEBL_PRELINK_COMMAND
+#
+# This is an optional environment variable which can override the default
+# prelink command. It could be used on systems that did something similiar to
+# prelink but used a different command and syntax. The only requirement is the
+# program must take the library as the last argument, the program must output
+# the original library to standard out, and the program does not need to take
+# any quoted or imbedded spaces in its arguments (except the path to the
+# library itself, which can have imbedded spaces or special characters).
+#
+ifdef FREEBL_USE_PRELINK
+ DEFINES += -DFREEBL_USE_PRELINK
+ifdef LINUX
+ DEFINES += -D__GNU_SOURCE=1
+endif
+endif
+ifdef NSS_NO_INIT_SUPPORT
+ DEFINES += -DNSS_NO_INIT_SUPPORT
+endif
+ifdef NSS_STRICT_INTEGRITY
+ DEFINES += -DNSS_STRICT_INTEGRITY_
+endif
+
+ifdef FREEBL_PRELINK_COMMAND
+ DEFINES +=-DFREEBL_PRELINK_COMMAND=\"$(FREEBL_PRELINK_COMMAND)\"
+endif
+# NSS_X86 means the target is a 32-bits x86 CPU architecture
+# NSS_X64 means the target is a 64-bits 64 CPU architecture
+# NSS_X86_OR_X64 means the target is either x86 or x64
+ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+ DEFINES += -DNSS_X86_OR_X64
+ EXTRA_SRCS += gcm-x86.c aes-x86.c
+$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes
+$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes
+ifneq (,$(USE_64)$(USE_X32))
+ DEFINES += -DNSS_X64
+else
+ DEFINES += -DNSS_X86
+endif
+ ifdef CC_IS_CLANG
+ EXTRA_SRCS += sha256-x86.c
+ DEFINES += -DUSE_HW_SHA2
+ else ifeq (1,$(CC_IS_GCC))
+ # Old compiler doesn't support Intel SHA extension
+ ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+ EXTRA_SRCS += sha256-x86.c
+ DEFINES += -DUSE_HW_SHA2
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ EXTRA_SRCS += sha256-x86.c
+ DEFINES += -DUSE_HW_SHA2
+ endif
+ endif
+endif
+ifeq ($(CPU_ARCH),aarch64)
+ ifdef CC_IS_CLANG
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c
+ else ifeq (1,$(CC_IS_GCC))
+ # GCC versions older than 4.9 don't support ARM AES. The check
+ # is done in two parts, first allows "major.minor" == "4.9",
+ # and then rejects any major versions prior to 5. Note that
+ # there has been no GCC 4.10, as it was renamed to GCC 5.
+ ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c gcm-aarch64.c sha1-armv8.c sha256-armv8.c
+ endif
+ endif
+endif
+ifeq ($(CPU_ARCH),arm)
+ifndef NSS_DISABLE_ARM32_NEON
+ EXTRA_SRCS += gcm-arm32-neon.c
+endif
+ ifdef CC_IS_CLANG
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
+ else ifeq (1,$(CC_IS_GCC))
+ # GCC versions older than 4.9 don't support ARM AES. The check
+ # is done in two parts, first allows "major.minor" == "4.9",
+ # and then rejects any major versions prior to 5. Note that
+ # there has been no GCC 4.10, as it was renamed to GCC 5.
+ ifneq (,$(filter 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ DEFINES += -DUSE_HW_AES -DUSE_HW_SHA1 -DUSE_HW_SHA2
+ EXTRA_SRCS += aes-armv8.c sha1-armv8.c sha256-armv8.c
+ endif
+ endif
+endif
+
+ifeq (OS2,$(OS_TARGET))
+ ASFILES = mpi_x86_os2.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+ DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+endif
+
+ifeq (,$(filter-out WINNT WIN95,$(OS_TARGET)))
+ifndef USE_64
+# 32-bit Windows
+ifdef NS_USE_GCC
+# Ideally, we want to use assembler
+# ASFILES = mpi_x86.s
+# DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE \
+# -DMP_ASSEMBLY_DIV_2DX1D
+# but we haven't figured out how to make it work, so we are not
+# using assembler right now.
+ ASFILES =
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT
+else
+# MSVC
+ MPI_SRCS += mpi_x86_asm.c
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ ifdef BUILD_OPT
+ OPTIMIZER += -Ox # maximum optimization for freebl
+ endif
+ # The Intel AES assembly code requires Visual C++ 2010.
+ # if $(_MSC_VER) >= 1600 (Visual C++ 2010)
+ ifeq ($(firstword $(sort $(_MSC_VER) 1600)),1600)
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes-x86-masm.asm intel-gcm-x86-masm.asm
+ EXTRA_SRCS += intel-gcm-wrap.c
+ ifeq ($(CLANG_CL),1)
+ INTEL_GCM_CLANG_CL = 1
+ endif
+ endif
+ # The Intel SHA extenstion requires Visual C++ 2015.
+ ifeq ($(_MSC_VER_GE_14),1)
+ DEFINES += -DUSE_HW_SHA2
+ EXTRA_SRCS += sha256-x86.c
+ endif
+endif
+else
+ # -DMP_NO_MP_WORD
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+ifdef NS_USE_GCC
+# Ideally, we should use amd64 assembly code, but it's not yet mingw-w64
+# compatible.
+else
+# MSVC
+ ifdef BUILD_OPT
+ OPTIMIZER += -Ox # maximum optimization for freebl
+ endif
+ifeq ($(CPU_ARCH),x86_64)
+ ASFILES = arcfour-amd64-masm.asm mpi_amd64_masm.asm mp_comba_amd64_masm.asm
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA
+ # The Intel AES assembly code requires Visual C++ 2010 (10.0). The _xgetbv
+ # compiler intrinsic function requires Visual C++ 2010 (10.0) SP1.
+ ifeq ($(_MSC_VER_GE_10SP1),1)
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes-x64-masm.asm intel-gcm-x64-masm.asm
+ EXTRA_SRCS += intel-gcm-wrap.c
+ ifeq ($(CLANG_CL),1)
+ INTEL_GCM_CLANG_CL = 1
+ endif
+ endif
+ # The Intel SHA extenstion requires Visual C++ 2015.
+ ifeq ($(_MSC_VER_GE_14),1)
+ DEFINES += -DUSE_HW_SHA2
+ EXTRA_SRCS += sha256-x86.c
+ endif
+ MPI_SRCS += mpi_amd64.c
+endif
+endif
+endif
+endif
+
+ifeq ($(OS_TARGET),Darwin)
+ifeq ($(CPU_ARCH),x86_64)
+ ASFILES = mpi_amd64_common.s
+ DEFINES += -DMPI_AMD64 -DMP_IS_LITTLE_ENDIAN
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DNSS_USE_COMBA
+ MPI_SRCS += mpi_amd64.c mp_comba.c
+else ifeq ($(CPU_ARCH),x86)
+ ASFILES = mpi_sse2.s
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+endif
+endif # Darwin
+
+ifeq ($(OS_TARGET),Linux)
+ifeq ($(CPU_ARCH),x86_64)
+ # Lower case s on mpi_amd64_common due to make implicit rules.
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
+ ASFLAGS += -fPIC -Wa,--noexecstack
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+# DEFINES += -DMPI_AMD64_ADD
+ # comment the next four lines to turn off Intel HW acceleration.
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes.s intel-gcm.s
+ EXTRA_SRCS += intel-gcm-wrap.c
+ INTEL_GCM = 1
+ MPI_SRCS += mpi_amd64.c mp_comba.c
+endif
+ifeq ($(CPU_ARCH),x86)
+ ASFILES = mpi_x86.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+endif
+ifeq ($(CPU_ARCH),arm)
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+ MPI_SRCS += mpi_arm.c
+endif
+ifeq ($(CPU_ARCH),ppc)
+ EXTRA_SRCS += gcm-ppc.c
+ifdef USE_64
+ DEFINES += -DNSS_NO_INIT_SUPPORT
+ PPC_ABI := $(shell $(CC) -dM -E - < /dev/null | awk '$$2 == "_CALL_ELF" {print $$3}')
+ ifeq ($(PPC_ABI),2)
+ ASFILES += sha512-p8.s
+ ifeq ($(OS_TEST),ppc64le)
+ DEFINES += -DPPC_GCM
+ EXTRA_SRCS += chacha20poly1305-ppc.c ppc-gcm-wrap.c
+ ASFILES += chacha20-ppc64le.s ppc-gcm.s
+ endif # ppc64le
+ endif
+endif # USE_64
+endif # ppc
+endif # Linux
+
+ifeq ($(OS_TARGET),AIX)
+ DEFINES += -DMP_USE_UINT_DIGIT
+ ifndef USE_64
+ DEFINES += -DMP_NO_DIV_WORD -DMP_NO_ADD_WORD -DMP_NO_SUB_WORD
+ endif
+endif # AIX
+
+ifeq ($(OS_TARGET), HP-UX)
+ifneq ($(OS_TEST), ia64)
+# PA-RISC
+ASFILES += ret_cr16.s
+ifndef USE_64
+ FREEBL_BUILD_SINGLE_SHLIB =
+ HAVE_ABI32_INT32 = 1
+ HAVE_ABI32_FPU = 1
+endif
+ifdef FREEBL_CHILD_BUILD
+ifdef USE_ABI32_INT32
+# build for DA1.1 (HP PA 1.1) 32-bit ABI build with 32-bit arithmetic
+ DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+else
+ifdef USE_64
+# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits
+ MPI_SRCS += mpi_hp.c
+ ASFILES += hpma512.s hppa20.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+else
+# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model
+# (the 32-bit ABI with 64-bit registers) using 64-bit digits
+ MPI_SRCS += mpi_hp.c
+ ASFILES += hpma512.s hppa20.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ifndef NS_USE_GCC
+ ARCHFLAG = -Aa +e +DA2.0 +DS2.0
+endif
+endif
+endif
+endif
+endif
+endif
+
+# The blapi functions are defined not only in the freebl shared
+# libraries but also in the shared libraries linked with loader.c
+# (libsoftokn3.so and libssl3.so). We need to use GNU ld's
+# -Bsymbolic option or the equivalent option for other linkers
+# to bind the blapi function references in FREEBLVector vector
+# (ldvector.c) to the blapi functions defined in the freebl
+# shared libraries.
+ifeq (,$(filter-out BSD_OS FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET)))
+ MKSHLIB += -Wl,-Bsymbolic
+endif
+
+ifeq ($(OS_TARGET),SunOS)
+
+ifdef NS_USE_GCC
+ ifdef GCC_USE_GNU_LD
+ MKSHLIB += -Wl,-Bsymbolic,-z,now,-z,text
+ else
+ MKSHLIB += -Wl,-B,symbolic,-z,now,-z,text
+ endif # GCC_USE_GNU_LD
+else
+ MKSHLIB += -B symbolic -z now -z text
+endif # NS_USE_GCC
+
+# Sun's WorkShop defines v8, v8plus and v9 architectures.
+# gcc on Solaris defines v8 and v9 "cpus".
+# gcc's v9 is equivalent to Workshop's v8plus.
+# gcc's -m64 is equivalent to Workshop's v9
+# We always use Sun's assembler, which uses Sun's naming convention.
+ifeq ($(CPU_ARCH),sparc)
+ FREEBL_BUILD_SINGLE_SHLIB=
+ ifdef USE_64
+ HAVE_ABI64_INT = 1
+ HAVE_ABI64_FPU = 1
+ else
+ HAVE_ABI32_FPU = 1
+ HAVE_ABI32_INT64 = 1
+ endif
+ SYSV_SPARC = 1
+ SOLARIS_AS = /usr/ccs/bin/as
+ #### set arch, asm, c flags
+ ifdef NS_USE_GCC
+ ifdef USE_ABI32_INT64
+ ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plus
+ SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+ endif
+ ifdef USE_ABI32_FPU
+ ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plusa
+ SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+ endif # USE_ABI32_FPU
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture
+ ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9
+ SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size
+ ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9a
+ SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+ endif
+ else # NS_USE_GCC
+ # FPU_TARGET_OPTIMIZER specifies the target processor and cache
+ # properties of the ABI32_FPU and ABI64_FPU architectures for use
+ # by the optimizer.
+ ifeq (,$(findstring Sun WorkShop 6,$(shell $(CC) -V 2>&1)))
+ # if the compiler is not Forte 6
+ FPU_TARGET_OPTIMIZER = -xcache=64/32/4:1024/64/4 -xchip=ultra3
+ else
+ # Forte 6 C compiler generates incorrect code for rijndael.c
+ # if -xchip=ultra3 is used (Bugzilla bug 333925). So we revert
+ # to what we used in NSS 3.10.
+ FPU_TARGET_OPTIMIZER = -xchip=ultra2
+ endif
+ ifdef USE_ABI32_INT64
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses 64-bit words, integer arithmetic,
+ # no FPU (non-VIS cpus).
+ # These flags were suggested by the compiler group for building
+ # with SunStudio 10.
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -xO4
+ endif
+ SOL_CFLAGS += -xtarget=generic
+ ARCHFLAG = -xarch=v8plus
+ SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+ endif
+ ifdef USE_ABI32_FPU
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses FPU code, and 32-bit word size.
+ # these flags were determined by running cc -### -fast and copying
+ # the generated flag settings
+ SOL_CFLAGS += -fsingle -xmemalign=8s
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+ SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+ SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+ SOL_CFLAGS += -xlibmil -xO5
+ endif
+ ARCHFLAG = -xarch=v8plusa
+ SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+ endif
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture,
+ # no FPU (non-VIS cpus). For building with SunStudio 10.
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -xO4
+ endif
+ SOL_CFLAGS += -xtarget=generic
+ ARCHFLAG = -xarch=v9
+ SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size.
+ # See comment for USE_ABI32_FPU.
+ SOL_CFLAGS += -fsingle -xmemalign=8s
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+ SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+ SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+ SOL_CFLAGS += -xlibmil -xO5
+ endif
+ ARCHFLAG = -xarch=v9a
+ SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+ endif
+ endif # NS_USE_GCC
+
+ ### set flags for both GCC and Sun cc
+ ifdef USE_ABI32_INT64
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU
+ # best times are with no MP_ flags specified
+ endif
+ ifdef USE_ABI32_FPU
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses FPU code, and 32-bit word size
+ MPI_SRCS += mpi_sparc.c
+ ASFILES = mpv_sparcv8.s montmulfv8.s
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+ endif
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture
+ # best times are with no MP_ flags specified
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size
+ MPI_SRCS += mpi_sparc.c
+ ASFILES = mpv_sparcv9.s montmulfv9.s
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+ endif
+
+else
+ # Solaris for non-sparc family CPUs
+ ifdef NS_USE_GCC
+ LD = gcc
+ AS = gcc
+ ASFLAGS = -x assembler-with-cpp
+ endif
+ ifeq ($(USE_64),1)
+ # Solaris for AMD64
+ ifdef NS_USE_GCC
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_common.s
+ ASFLAGS += -march=opteron -m64 -fPIC
+ MPI_SRCS += mp_comba.c
+ # comment the next four lines to turn off Intel HW acceleration
+ ASFILES += intel-gcm.s
+ EXTRA_SRCS += intel-gcm-wrap.c
+ INTEL_GCM = 1
+ DEFINES += -DINTEL_GCM
+ else
+ ASFILES = arcfour-amd64-sun.s mpi_amd64_sun.s sha-fast-amd64-sun.s
+ ASFILES += mp_comba_amd64_sun.s mpcpucache_amd64.s
+ ASFLAGS += -xarch=generic64 -K PIC
+ SOL_CFLAGS += -xprefetch=no
+ SHA_SRCS =
+ MPCPU_SRCS =
+ # Intel acceleration for GCM does not build currently with Studio
+ endif
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA -DMP_IS_LITTLE_ENDIAN
+ # comment the next two lines to turn off Intel HW acceleration
+ DEFINES += -DUSE_HW_AES
+ ASFILES += intel-aes.s
+ MPI_SRCS += mpi_amd64.c
+ else
+ # Solaris x86
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+ ASFILES = mpi_i86pc.s
+ ifndef NS_USE_GCC
+ MPCPU_SRCS =
+ ASFILES += mpcpucache_x86.s
+ endif
+ endif
+endif # Solaris for non-sparc family CPUs
+endif # target == SunO
+
+ifdef USE_64
+# no __int128 at least up to lcc 1.23 (pretending to be gcc5)
+# NB: CC_NAME is not defined here
+ifneq ($(shell $(CC) -? 2>&1 >/dev/null </dev/null | sed -e 's/:.*//;1q'),lcc)
+ ifdef CC_IS_CLANG
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ else ifeq (1,$(CC_IS_GCC))
+ ifneq (,$(filter 4.6 4.7 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ endif
+ ifneq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ NSS_DISABLE_AVX2 = 1
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ NSS_DISABLE_SSE3 = 1
+ NSS_DISABLE_SSE4_1 = 1
+ NSS_DISABLE_SSE4_2 = 1
+ endif
+ endif
+endif # lcc
+endif # USE_64
+
+ifndef HAVE_INT128_SUPPORT
+ DEFINES += -DKRML_VERIFIED_UINT128
+endif
+
+ifndef NSS_DISABLE_CHACHAPOLY
+ ifeq ($(CPU_ARCH),x86_64)
+ ifndef NSS_DISABLE_AVX2
+ EXTRA_SRCS += Hacl_Poly1305_256.c Hacl_Chacha20_Vec256.c Hacl_Chacha20Poly1305_256.c
+ DEFINES += -DHACL_CAN_COMPILE_VEC256
+ endif # NSS_DISABLE_AVX2
+ ifndef NSS_DISABLE_SSE3
+ EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c
+ DEFINES += -DHACL_CAN_COMPILE_VEC128
+ endif
+ endif # x86_64
+
+ VERIFIED_SRCS += Hacl_Poly1305_32.c Hacl_Chacha20.c Hacl_Chacha20Poly1305_32.c
+endif # NSS_DISABLE_CHACHAPOLY
+
+VERIFIED_SRCS += Hacl_Hash_SHA3.c Hacl_P256.c Hacl_P384.c Hacl_P521.c
+
+ifeq (,$(filter-out x86_64 aarch64,$(CPU_ARCH)))
+ # All 64-bit architectures get the 64 bit version.
+ ECL_SRCS += curve25519_64.c
+ VERIFIED_SRCS += Hacl_Curve25519_51.c
+else
+ # All other architectures get the generic 32 bit implementation
+ ECL_SRCS += curve25519_32.c
+endif
+
+#######################################################################
+# (5) Execute "global" rules. (OPTIONAL) #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/rules.mk
+
+#######################################################################
+# (6) Execute "component" rules. (OPTIONAL) #
+#######################################################################
+
+
+
+#######################################################################
+# (7) Execute "local" rules. (OPTIONAL). #
+#######################################################################
+
+
+rijndael_tables:
+ $(CC) -o $(OBJDIR)/make_rijndael_tab rijndael_tables.c \
+ $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a
+ $(OBJDIR)/make_rijndael_tab
+
+vpath %.h mpi ecl verified deprecated
+vpath %.c mpi ecl verified deprecated
+vpath %.S mpi ecl
+vpath %.s mpi ecl
+vpath %.asm mpi ecl
+INCLUDES += -Impi -Iecl -Iverified -Iverified/internal -Iverified/karamel/include -Iverified/karamel/krmllib/dist/minimal -Ideprecated
+
+
+DEFINES += -DMP_API_COMPATIBLE
+
+MPI_USERS = secmpi.c dh.c pqg.c dsa.c rsa.c ec.c
+
+MPI_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_SRCS:.c=$(OBJ_SUFFIX)))
+MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_USERS:.c=$(OBJ_SUFFIX)))
+
+$(MPI_OBJS): $(MPI_HDRS)
+
+ECL_USERS = ec.c
+
+ECL_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_SRCS:.c=$(OBJ_SUFFIX)) $(ECL_ASM_SRCS:$(ASM_SUFFIX)=$(OBJ_SUFFIX)))
+ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX)))
+
+$(ECL_OBJS): $(ECL_HDRS)
+
+$(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c
+
+$(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c
+
+$(OBJDIR)/ldvector$(OBJ_SUFFIX) $(OBJDIR)/loader$(OBJ_SUFFIX) : loader.h
+
+ifeq ($(SYSV_SPARC),1)
+
+$(OBJDIR)/mpv_sparcv8.o $(OBJDIR)/mpv_sparcv8x.o $(OBJDIR)/montmulfv8.o : $(OBJDIR)/%.o : %.s | $$(@D)/d
+ $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpv_sparcv9.o $(OBJDIR)/montmulfv9.o : $(OBJDIR)/%.o : %.s | $$(@D)/d
+ $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpmontg.o: mpmontg.c montmulf.h
+
+endif
+
+ifndef FREEBL_CHILD_BUILD
+
+# Parent build. This is where we decide which shared libraries to build
+
+# too suppress the SINGLE_SHLIB override warning
+FREEBL_OBJDIRS :=
+
+define target_freebl_SHLIB
+ifdef $(2)
+$(1)_DIR = $$(OBJDIR)/$$(OS_TARGET)_$(1)
+ALL_TRASH += $$($(1)_DIR)
+
+ifeq (,$$(filter $$($(1)_DIR)/d,$$(FREEBL_OBJDIRS)))
+FREEBL_OBJDIRS += $$($(1)_DIR)/d
+endif
+
+release_md:: freebl_$(2)
+libs: freebl_$(2)
+freebl_$(2): | $$($(1)_DIR)/d
+ $$(MAKE) FREEBL_CHILD_BUILD=1 $(3)=1 OBJDIR=$$($(1)_DIR) libs
+endif
+endef # target_freebl_SHLIB
+
+target_freebl_ABI = $(call target_freebl_SHLIB,$(1),HAVE_$(1),USE_$(1))
+
+$(eval $(call target_freebl_SHLIB,SINGLE_SHLIB,FREEBL_BUILD_SINGLE_SHLIB,NEEDED_DUMMY))
+$(eval $(call target_freebl_SHLIB,SINGLE_SHLIB,NEED_STUB_BUILD,USE_STUB_BUILD))
+
+$(eval $(call target_freebl_ABI,ABI32_FPU))
+$(eval $(call target_freebl_ABI,ABI32_INT32))
+$(eval $(call target_freebl_ABI,ABI32_INT64))
+$(eval $(call target_freebl_ABI,ABI64_FPU))
+$(eval $(call target_freebl_ABI,ABI64_INT))
+
+endif # FREEBL_CHILD_BUILD
+
+
+# Bugzilla Bug 333917: the non-x86 code in desblapi.c seems to violate
+# ANSI C's strict aliasing rules.
+ifeq ($(OS_TARGET),Linux)
+ifneq ($(CPU_ARCH),x86)
+$(OBJDIR)/$(PROG_PREFIX)desblapi$(OBJ_SUFFIX): desblapi.c | $$(@D)/d
+ifdef NEED_ABSOLUTE_PATH
+ $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $(call core_abspath,$<)
+else
+ $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $<
+endif
+endif
+endif
+
+ifdef INTEL_GCM
+#
+# GCM binary needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+
+# The integrated assembler in Clang 3.2 does not support % in the
+# expression of a .set directive. intel-gcm.s uses .set to give
+# symbolic names to registers, for example,
+# .set Htbl, %rdi
+# So we can't use Clang's integrated assembler with intel-gcm.s.
+ifdef CC_IS_CLANG
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm$(OBJ_SUFFIX): CFLAGS += -no-integrated-as
+endif
+endif
+
+ifdef INTEL_GCM_CLANG_CL
+#
+# clang-cl needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+endif
+
+$(OBJDIR)/$(PROG_PREFIX)sha256-x86$(OBJ_SUFFIX): CFLAGS += -msha -mssse3 -msse4.1
+
+ifeq ($(CPU_ARCH),arm)
+# When the compiler uses the softfloat ABI, we want to use the compatible softfp ABI when
+# enabling NEON for these objects.
+# Confusingly, __SOFTFP__ is the name of the define for the softfloat ABI, not for the softfp ABI.
+USES_SOFTFLOAT_ABI := $(shell $(CC) -o - -E -dM - $(CFLAGS) < /dev/null | grep __SOFTFP__ > /dev/null && echo 1)
+$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a -mfpu=crypto-neon-fp-armv8$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+ifndef NSS_DISABLE_ARM32_NEON
+$(OBJDIR)/$(PROG_PREFIX)gcm-arm32-neon$(OBJ_SUFFIX): CFLAGS += -mfpu=neon$(if $(USES_SOFTFLOAT_ABI), -mfloat-abi=softfp)
+endif
+endif
+
+ifeq ($(CPU_ARCH),aarch64)
+$(OBJDIR)/$(PROG_PREFIX)aes-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+$(OBJDIR)/$(PROG_PREFIX)gcm-aarch64$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+$(OBJDIR)/$(PROG_PREFIX)sha1-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+$(OBJDIR)/$(PROG_PREFIX)sha256-armv8$(OBJ_SUFFIX): CFLAGS += -march=armv8-a+crypto
+endif
+
+ifeq ($(CPU_ARCH),ppc)
+$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -funroll-loops -fpeel-loops
+ifneq ($(NSS_DISABLE_ALTIVEC),1)
+$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -maltivec
+$(OBJDIR)/$(PROG_PREFIX)gcm$(OBJ_SUFFIX): CFLAGS += -maltivec
+$(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -maltivec
+$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -maltivec
+$(OBJDIR)/$(PROG_PREFIX)chacha20poly1305-ppc$(OBJ_SUFFIX): CFLAGS += -maltivec
+endif
+ifneq ($(NSS_DISABLE_CRYPTO_VSX),1)
+$(OBJDIR)/$(PROG_PREFIX)gcm-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx
+$(OBJDIR)/$(PROG_PREFIX)gcm$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx
+$(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx
+$(OBJDIR)/$(PROG_PREFIX)sha512$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx
+$(OBJDIR)/$(PROG_PREFIX)chacha20poly1305-ppc$(OBJ_SUFFIX): CFLAGS += -mcrypto -mvsx
+endif
+endif
+
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx
+
+ifndef NSS_DISABLE_AVX2
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx2
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx -mavx2
+$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4.1 -msse4.2 -mavx -mavx2
+endif
diff --git a/security/nss/lib/freebl/aes-armv8.c b/security/nss/lib/freebl/aes-armv8.c
new file mode 100644
index 0000000000..7be39ede89
--- /dev/null
+++ b/security/nss/lib/freebl/aes-armv8.c
@@ -0,0 +1,1169 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "secerr.h"
+#include "rijndael.h"
+
+#if ((defined(__clang__) || \
+ (defined(__GNUC__) && defined(__GNUC_MINOR__) && \
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \
+ defined(IS_LITTLE_ENDIAN))
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error "Compiler option is invalid"
+#endif
+
+#include <arm_neon.h>
+
+SECStatus
+arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ /* AddRoundKey */
+ state = veorq_u8(state, key11);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (inputLen == 0) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11;
+ uint8x16_t iv;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ state = veorq_u8(state, iv);
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ /* AddRoundKey */
+ state = veorq_u8(state, key11);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ iv = state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t iv;
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+
+ while (inputLen > 0) {
+ uint8x16_t state, old_state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ old_state = state;
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+ state = veorq_u8(state, iv);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+
+ iv = old_state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13;
+ PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key11);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key12);
+ /* AddRoundKey */
+ state = veorq_u8(state, key13);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key13);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key12);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13;
+ uint8x16_t iv;
+ PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ state = veorq_u8(state, iv);
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key11);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key12);
+ state = veorq_u8(state, key13);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ iv = state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t iv;
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+
+ while (inputLen > 0) {
+ uint8x16_t state, old_state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ old_state = state;
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key13);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key12);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+ state = veorq_u8(state, iv);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+
+ iv = old_state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13, key14, key15;
+ PRUint8 *key = (PRUint8 *)cx->k.expandedKey;
+
+ if (inputLen == 0) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+ key14 = vld1q_u8(key + 208);
+ key15 = vld1q_u8(key + 224);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key11);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key12);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key13);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key14);
+ /* AddRoundKey */
+ state = veorq_u8(state, key15);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13, key14, key15;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+ key14 = vld1q_u8(key + 208);
+ key15 = vld1q_u8(key + 224);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key15);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key14);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key13);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key12);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13, key14, key15;
+ uint8x16_t iv;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+ key14 = vld1q_u8(key + 208);
+ key15 = vld1q_u8(key + 224);
+
+ while (inputLen > 0) {
+ uint8x16_t state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ input += 16;
+ inputLen -= 16;
+
+ state = veorq_u8(state, iv);
+
+ /* Rounds */
+ state = vaeseq_u8(state, key1);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key2);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key3);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key4);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key5);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key6);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key7);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key8);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key9);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key10);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key11);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key12);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key13);
+ state = vaesmcq_u8(state);
+ state = vaeseq_u8(state, key14);
+ /* AddRoundKey */
+ state = veorq_u8(state, key15);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+ iv = state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+SECStatus
+arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize)
+{
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ pre_align unsigned char buf[16] post_align;
+#endif
+ uint8x16_t iv;
+ uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10;
+ uint8x16_t key11, key12, key13, key14, key15;
+ const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey;
+
+ if (!inputLen) {
+ return SECSuccess;
+ }
+
+ /* iv */
+ iv = vld1q_u8(cx->iv);
+
+ /* expanedKey */
+ key1 = vld1q_u8(key);
+ key2 = vld1q_u8(key + 16);
+ key3 = vld1q_u8(key + 32);
+ key4 = vld1q_u8(key + 48);
+ key5 = vld1q_u8(key + 64);
+ key6 = vld1q_u8(key + 80);
+ key7 = vld1q_u8(key + 96);
+ key8 = vld1q_u8(key + 112);
+ key9 = vld1q_u8(key + 128);
+ key10 = vld1q_u8(key + 144);
+ key11 = vld1q_u8(key + 160);
+ key12 = vld1q_u8(key + 176);
+ key13 = vld1q_u8(key + 192);
+ key14 = vld1q_u8(key + 208);
+ key15 = vld1q_u8(key + 224);
+
+ while (inputLen > 0) {
+ uint8x16_t state, old_state;
+#if defined(HAVE_UNALIGNED_ACCESS)
+ state = vld1q_u8(input);
+#else
+ if ((uintptr_t)input & 0x7) {
+ memcpy(buf, input, 16);
+ state = vld1q_u8(__builtin_assume_aligned(buf, 16));
+ } else {
+ state = vld1q_u8(__builtin_assume_aligned(input, 8));
+ }
+#endif
+ old_state = state;
+ input += 16;
+ inputLen -= 16;
+
+ /* Rounds */
+ state = vaesdq_u8(state, key15);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key14);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key13);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key12);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key11);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key10);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key9);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key8);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key7);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key6);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key5);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key4);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key3);
+ state = vaesimcq_u8(state);
+ state = vaesdq_u8(state, key2);
+ /* AddRoundKey */
+ state = veorq_u8(state, key1);
+
+ state = veorq_u8(state, iv);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ vst1q_u8(output, state);
+#else
+ if ((uintptr_t)output & 0x7) {
+ vst1q_u8(__builtin_assume_aligned(buf, 16), state);
+ memcpy(output, buf, 16);
+ } else {
+ vst1q_u8(__builtin_assume_aligned(output, 8), state);
+ }
+#endif
+ output += 16;
+
+ iv = old_state;
+ }
+ vst1q_u8(cx->iv, iv);
+
+ return SECSuccess;
+}
+
+#endif
diff --git a/security/nss/lib/freebl/aes-armv8.h b/security/nss/lib/freebl/aes-armv8.h
new file mode 100644
index 0000000000..b0ef1c8708
--- /dev/null
+++ b/security/nss/lib/freebl/aes-armv8.h
@@ -0,0 +1,103 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+SECStatus arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+
+#define native_aes_ecb_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? arm_aes_encrypt_ecb_128 \
+ : (keysize) == 24 ? arm_aes_encrypt_ecb_192 \
+ : arm_aes_encrypt_ecb_256) \
+ : ((keysize) == 16 ? arm_aes_decrypt_ecb_128 \
+ : (keysize) == 24 ? arm_aes_decrypt_ecb_192 \
+ : arm_aes_decrypt_ecb_256))
+
+#define native_aes_cbc_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? arm_aes_encrypt_cbc_128 \
+ : (keysize) == 24 ? arm_aes_encrypt_cbc_192 \
+ : arm_aes_encrypt_cbc_256) \
+ : ((keysize) == 16 ? arm_aes_decrypt_cbc_128 \
+ : (keysize) == 24 ? arm_aes_decrypt_cbc_192 \
+ : arm_aes_decrypt_cbc_256))
+
+#define native_aes_init(encrypt, keysize) \
+ do { \
+ if (encrypt) { \
+ rijndael_key_expansion(cx, key, Nk); \
+ } else { \
+ rijndael_invkey_expansion(cx, key, Nk); \
+ } \
+ } while (0)
diff --git a/security/nss/lib/freebl/aes-x86.c b/security/nss/lib/freebl/aes-x86.c
new file mode 100644
index 0000000000..0cebb202a8
--- /dev/null
+++ b/security/nss/lib/freebl/aes-x86.c
@@ -0,0 +1,184 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "rijndael.h"
+#include "secerr.h"
+
+#include <wmmintrin.h> /* aes-ni */
+
+#define EXPAND_KEY128(k, rcon, res) \
+ tmp_key = _mm_aeskeygenassist_si128(k, rcon); \
+ tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \
+ tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \
+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
+ res = _mm_xor_si128(tmp, tmp_key)
+
+static void
+native_key_expansion128(AESContext *cx, const unsigned char *key)
+{
+ __m128i *keySchedule = cx->k.keySchedule;
+ pre_align __m128i tmp_key post_align;
+ pre_align __m128i tmp post_align;
+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+ EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);
+ EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);
+ EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);
+ EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);
+ EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);
+ EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);
+ EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);
+ EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);
+ EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);
+ EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);
+}
+
+#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \
+ tmp2 = _mm_slli_si128(k0, 4); \
+ tmp1 = _mm_xor_si128(k0, tmp2); \
+ tmp2 = _mm_slli_si128(tmp2, 4); \
+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
+ tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \
+ res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))
+
+#define EXPAND_KEY192_PART2(res, k1, k2) \
+ tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \
+ res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))
+
+#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \
+ EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \
+ EXPAND_KEY192_PART2(carry, res1, tmp3); \
+ res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \
+ _mm_castsi128_pd(tmp3), 0)); \
+ res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \
+ _mm_castsi128_pd(carry), 1)); \
+ EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)
+
+static void
+native_key_expansion192(AESContext *cx, const unsigned char *key)
+{
+ __m128i *keySchedule = cx->k.keySchedule;
+ pre_align __m128i tmp1 post_align;
+ pre_align __m128i tmp2 post_align;
+ pre_align __m128i tmp3 post_align;
+ pre_align __m128i carry post_align;
+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
+ EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],
+ keySchedule[3], carry, 0x1, 0x2);
+ EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);
+ EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],
+ keySchedule[6], carry, 0x4, 0x8);
+ EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);
+ EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],
+ keySchedule[9], carry, 0x10, 0x20);
+ EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);
+ EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],
+ keySchedule[12], carry, 0x40, 0x80);
+}
+
+#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \
+ tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \
+ tmp2 = _mm_slli_si128(k1x, 4); \
+ tmp1 = _mm_xor_si128(k1x, tmp2); \
+ tmp2 = _mm_slli_si128(tmp2, 4); \
+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
+ res = _mm_xor_si128(tmp1, tmp_key);
+
+#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \
+ EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \
+ EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)
+
+static void
+native_key_expansion256(AESContext *cx, const unsigned char *key)
+{
+ __m128i *keySchedule = cx->k.keySchedule;
+ pre_align __m128i tmp_key post_align;
+ pre_align __m128i tmp1 post_align;
+ pre_align __m128i tmp2 post_align;
+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
+ EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],
+ keySchedule[1], 0x01);
+ EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],
+ keySchedule[3], 0x02);
+ EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],
+ keySchedule[5], 0x04);
+ EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],
+ keySchedule[7], 0x08);
+ EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],
+ keySchedule[9], 0x10);
+ EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],
+ keySchedule[11], 0x20);
+ EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],
+ keySchedule[13], 0xFF);
+}
+
+/*
+ * AES key expansion using aes-ni instructions.
+ */
+void
+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
+ unsigned int Nk)
+{
+ switch (Nk) {
+ case 4:
+ native_key_expansion128(cx, key);
+ return;
+ case 6:
+ native_key_expansion192(cx, key);
+ return;
+ case 8:
+ native_key_expansion256(cx, key);
+ return;
+ default:
+ /* This shouldn't happen (checked by the caller). */
+ return;
+ }
+}
+
+void
+rijndael_native_encryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ unsigned int i;
+ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
+ m = _mm_xor_si128(m, cx->k.keySchedule[0]);
+ for (i = 1; i < cx->Nr; ++i) {
+ m = _mm_aesenc_si128(m, cx->k.keySchedule[i]);
+ }
+ m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]);
+ _mm_storeu_si128((__m128i *)output, m);
+}
+
+void
+rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ int i;
+ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
+ m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]);
+ for (i = cx->Nr - 1; i > 0; --i) {
+ m = _mm_aesdec_si128(m, cx->k.keySchedule[i]);
+ }
+ m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]);
+ _mm_storeu_si128((__m128i *)output, m);
+}
+
+// out = a ^ b
+void
+native_xorBlock(unsigned char *out,
+ const unsigned char *a,
+ const unsigned char *b)
+{
+ pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a));
+ pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b));
+ in1 = _mm_xor_si128(in1, in2);
+ _mm_storeu_si128((__m128i *)(out), in1);
+}
diff --git a/security/nss/lib/freebl/aeskeywrap.c b/security/nss/lib/freebl/aeskeywrap.c
new file mode 100644
index 0000000000..09c0667c7a
--- /dev/null
+++ b/security/nss/lib/freebl/aeskeywrap.c
@@ -0,0 +1,642 @@
+/*
+ * aeskeywrap.c - implement AES Key Wrap algorithm from RFC 3394
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <stddef.h>
+
+#include "prcpucfg.h"
+#if defined(IS_LITTLE_ENDIAN) || defined(SHA_NO_LONG_LONG)
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 0
+#else
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 1
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "secport.h" /* for PORT_XXX */
+#include "secerr.h"
+#include "blapi.h" /* for AES_ functions */
+#include "rijndael.h"
+
+struct AESKeyWrapContextStr {
+ AESContext aescx;
+ unsigned char iv[AES_KEY_WRAP_IV_BYTES];
+ void *mem; /* Pointer to beginning of allocated memory. */
+};
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+ /* aligned_alloc is C11 so we have to do it the old way. */
+ AESKeyWrapContext *ctx = PORT_ZAlloc(sizeof(AESKeyWrapContext) + 15);
+ if (ctx == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return NULL;
+ }
+ ctx->mem = ctx;
+ return (AESKeyWrapContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F);
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int x1,
+ unsigned int encrypt,
+ unsigned int x2)
+{
+ SECStatus rv = SECFailure;
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (iv) {
+ memcpy(cx->iv, iv, sizeof cx->iv);
+ } else {
+ memset(cx->iv, 0xA6, sizeof cx->iv);
+ }
+ rv = AES_InitContext(&cx->aescx, key, keylen, NULL, NSS_AES, encrypt,
+ AES_BLOCK_SIZE);
+ return rv;
+}
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen)
+{
+ SECStatus rv;
+ AESKeyWrapContext *cx = AESKeyWrap_AllocateContext();
+ if (!cx)
+ return NULL; /* error is already set */
+ rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0);
+ if (rv != SECSuccess) {
+ PORT_Free(cx->mem);
+ cx = NULL; /* error should already be set */
+ }
+ return cx;
+}
+
+/*
+** Destroy a AES KeyWrap context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+ if (cx) {
+ AES_DestroyContext(&cx->aescx, PR_FALSE);
+ /* memset(cx, 0, sizeof *cx); */
+ if (freeit) {
+ PORT_Free(cx->mem);
+ }
+ }
+}
+
+#if !BIG_ENDIAN_WITH_64_BIT_REGISTERS
+
+/* The AES Key Wrap algorithm has 64-bit values that are ALWAYS big-endian
+** (Most significant byte first) in memory. The only ALU operations done
+** on them are increment, decrement, and XOR. So, on little-endian CPUs,
+** and on CPUs that lack 64-bit registers, these big-endian 64-bit operations
+** are simulated in the following code. This is thought to be faster and
+** simpler than trying to convert the data to little-endian and back.
+*/
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian). This function increments the 64-bit value T, and then
+** XORs it with A, changing A.
+*/
+static void
+increment_and_xor(unsigned char *A, unsigned char *T)
+{
+ if (!++T[7])
+ if (!++T[6])
+ if (!++T[5])
+ if (!++T[4])
+ if (!++T[3])
+ if (!++T[2])
+ if (!++T[1])
+ ++T[0];
+
+ A[0] ^= T[0];
+ A[1] ^= T[1];
+ A[2] ^= T[2];
+ A[3] ^= T[3];
+ A[4] ^= T[4];
+ A[5] ^= T[5];
+ A[6] ^= T[6];
+ A[7] ^= T[7];
+}
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian). This function XORs T with A, giving a new A, then
+** decrements the 64-bit value T.
+*/
+static void
+xor_and_decrement(PRUint64 *A, PRUint64 *T)
+{
+ unsigned char *TP = (unsigned char *)T;
+ const PRUint64 mask = 0xFF;
+ *A = ((*A & mask << 56) ^ (*T & mask << 56)) |
+ ((*A & mask << 48) ^ (*T & mask << 48)) |
+ ((*A & mask << 40) ^ (*T & mask << 40)) |
+ ((*A & mask << 32) ^ (*T & mask << 32)) |
+ ((*A & mask << 24) ^ (*T & mask << 23)) |
+ ((*A & mask << 16) ^ (*T & mask << 16)) |
+ ((*A & mask << 8) ^ (*T & mask << 8)) |
+ ((*A & mask) ^ (*T & mask));
+
+ if (!TP[7]--)
+ if (!TP[6]--)
+ if (!TP[5]--)
+ if (!TP[4]--)
+ if (!TP[3]--)
+ if (!TP[2]--)
+ if (!TP[1]--)
+ TP[0]--;
+}
+
+/* Given an unsigned long t (in host byte order), store this value as a
+** 64-bit big-endian value (MSB first) in *pt.
+*/
+static void
+set_t(unsigned char *pt, unsigned long t)
+{
+ pt[7] = (unsigned char)t;
+ t >>= 8;
+ pt[6] = (unsigned char)t;
+ t >>= 8;
+ pt[5] = (unsigned char)t;
+ t >>= 8;
+ pt[4] = (unsigned char)t;
+ t >>= 8;
+ pt[3] = (unsigned char)t;
+ t >>= 8;
+ pt[2] = (unsigned char)t;
+ t >>= 8;
+ pt[1] = (unsigned char)t;
+ t >>= 8;
+ pt[0] = (unsigned char)t;
+}
+
+#endif
+
+static void
+encode_PRUint32_BE(unsigned char *data, PRUint32 val)
+{
+ size_t i;
+ for (i = 0; i < sizeof(PRUint32); i++) {
+ data[i] = PORT_GET_BYTE_BE(val, i, sizeof(PRUint32));
+ }
+}
+
+static PRUint32
+decode_PRUint32_BE(unsigned char *data)
+{
+ PRUint32 val = 0;
+ size_t i;
+
+ for (i = 0; i < sizeof(PRUint32); i++) {
+ val = (val << PR_BITS_PER_BYTE) | data[i];
+ }
+ return val;
+}
+
+/*
+** Perform AES key wrap W function.
+** "cx" the context
+** "iv" the iv is concatenated to the plain text for for executing the function
+** "output" the output buffer to store the encrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_W(AESKeyWrapContext *cx, unsigned char *iv, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint64 *R = NULL;
+ unsigned int nBlocks;
+ unsigned int i, j;
+ unsigned int aesLen = AES_BLOCK_SIZE;
+ unsigned int outLen = inputLen + AES_KEY_WRAP_BLOCK_SIZE;
+ SECStatus s = SECFailure;
+ /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+ PRUint64 t;
+ PRUint64 B[2];
+
+#define A B[0]
+
+ /* Check args */
+ if (inputLen < 2 * AES_KEY_WRAP_BLOCK_SIZE ||
+ 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return s;
+ }
+#ifdef maybe
+ if (!output && pOutputLen) { /* caller is asking for output size */
+ *pOutputLen = outLen;
+ return SECSuccess;
+ }
+#endif
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return s;
+ }
+ if (cx == NULL || output == NULL || input == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return s;
+ }
+ nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+ R = PORT_NewArray(PRUint64, nBlocks + 1);
+ if (!R)
+ return s; /* error is already set. */
+ /*
+ ** 1) Initialize variables.
+ */
+ memcpy(&A, iv, AES_KEY_WRAP_IV_BYTES);
+ memcpy(&R[1], input, inputLen);
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ t = 0;
+#else
+ memset(&t, 0, sizeof t);
+#endif
+ /*
+ ** 2) Calculate intermediate values.
+ */
+ for (j = 0; j < 6; ++j) {
+ for (i = 1; i <= nBlocks; ++i) {
+ B[1] = R[i];
+ s = AES_Encrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+ sizeof B, (unsigned char *)B, sizeof B);
+ if (s != SECSuccess)
+ break;
+ R[i] = B[1];
+/* here, increment t and XOR A with t (in big endian order); */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ A ^= ++t;
+#else
+ increment_and_xor((unsigned char *)&A, (unsigned char *)&t);
+#endif
+ }
+ }
+ /*
+ ** 3) Output the results.
+ */
+ if (s == SECSuccess) {
+ R[0] = A;
+ memcpy(output, &R[0], outLen);
+ if (pOutputLen)
+ *pOutputLen = outLen;
+ } else if (pOutputLen) {
+ *pOutputLen = 0;
+ }
+ PORT_ZFree(R, outLen);
+ return s;
+}
+#undef A
+
+/*
+** Perform AES key wrap W^-1 function.
+** "cx" the context
+** "iv" the input IV to verify against. If NULL, then skip verification.
+** "ivOut" the output buffer to store the IV (optional).
+** "output" the output buffer to store the decrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Winv(AESKeyWrapContext *cx, unsigned char *iv,
+ unsigned char *ivOut, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint64 *R = NULL;
+ unsigned int nBlocks;
+ unsigned int i, j;
+ unsigned int aesLen = AES_BLOCK_SIZE;
+ unsigned int outLen;
+ SECStatus s = SECFailure;
+ /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+ PRUint64 t;
+ PRUint64 B[2];
+
+ /* Check args */
+ if (inputLen < 3 * AES_KEY_WRAP_BLOCK_SIZE ||
+ 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return s;
+ }
+ outLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE;
+#ifdef maybe
+ if (!output && pOutputLen) { /* caller is asking for output size */
+ *pOutputLen = outLen;
+ return SECSuccess;
+ }
+#endif
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return s;
+ }
+ if (cx == NULL || output == NULL || input == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return s;
+ }
+ nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+ R = PORT_NewArray(PRUint64, nBlocks);
+ if (!R)
+ return s; /* error is already set. */
+ nBlocks--;
+ /*
+ ** 1) Initialize variables.
+ */
+ memcpy(&R[0], input, inputLen);
+ B[0] = R[0];
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ t = 6UL * nBlocks;
+#else
+ set_t((unsigned char *)&t, 6UL * nBlocks);
+#endif
+ /*
+ ** 2) Calculate intermediate values.
+ */
+ for (j = 0; j < 6; ++j) {
+ for (i = nBlocks; i; --i) {
+/* here, XOR A with t (in big endian order) and decrement t; */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ B[0] ^= t--;
+#else
+ xor_and_decrement(&B[0], &t);
+#endif
+ B[1] = R[i];
+ s = AES_Decrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+ sizeof B, (unsigned char *)B, sizeof B);
+ if (s != SECSuccess)
+ break;
+ R[i] = B[1];
+ }
+ }
+ /*
+ ** 3) Output the results.
+ */
+ if (s == SECSuccess) {
+ int bad = (iv) && memcmp(&B[0], iv, AES_KEY_WRAP_IV_BYTES);
+ if (!bad) {
+ memcpy(output, &R[1], outLen);
+ if (pOutputLen)
+ *pOutputLen = outLen;
+ if (ivOut) {
+ memcpy(ivOut, &B[0], AES_KEY_WRAP_IV_BYTES);
+ }
+ } else {
+ s = SECFailure;
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ if (pOutputLen)
+ *pOutputLen = 0;
+ }
+ } else if (pOutputLen) {
+ *pOutputLen = 0;
+ }
+ PORT_ZFree(R, inputLen);
+ return s;
+}
+#undef A
+
+/*
+** Perform AES key wrap.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ return AESKeyWrap_W(cx, cx->iv, output, pOutputLen, maxOutputLen,
+ input, inputLen);
+}
+
+/*
+** Perform AES key unwrap.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ return AESKeyWrap_Winv(cx, cx->iv, NULL, output, pOutputLen, maxOutputLen,
+ input, inputLen);
+}
+
+#define BLOCK_PAD_POWER2(x, bs) (((bs) - ((x) & ((bs)-1))) & ((bs)-1))
+#define AES_KEY_WRAP_ICV2 0xa6, 0x59, 0x59, 0xa6
+#define AES_KEY_WRAP_ICV2_INT32 0xa65959a6
+#define AES_KEY_WRAP_ICV2_LEN 4
+
+/*
+** Perform AES key wrap with padding.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned int padLen = BLOCK_PAD_POWER2(inputLen, AES_KEY_WRAP_BLOCK_SIZE);
+ unsigned int paddedInputLen = inputLen + padLen;
+ unsigned int outLen = paddedInputLen + AES_KEY_WRAP_BLOCK_SIZE;
+ unsigned char iv[AES_BLOCK_SIZE] = { AES_KEY_WRAP_ICV2 };
+ unsigned char *newBuf;
+ SECStatus rv;
+
+ *pOutputLen = outLen;
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ PORT_Assert((AES_KEY_WRAP_ICV2_LEN + sizeof(PRUint32)) == AES_KEY_WRAP_BLOCK_SIZE);
+ encode_PRUint32_BE(iv + AES_KEY_WRAP_ICV2_LEN, inputLen);
+
+ /* If we can fit in an AES Block, just do and AES Encrypt,
+ * iv is big enough to handle this on the stack, so no need to allocate
+ */
+ if (outLen == AES_BLOCK_SIZE) {
+ PORT_Assert(inputLen <= AES_KEY_WRAP_BLOCK_SIZE);
+ PORT_Memset(iv + AES_KEY_WRAP_BLOCK_SIZE, 0, AES_KEY_WRAP_BLOCK_SIZE);
+ PORT_Memcpy(iv + AES_KEY_WRAP_BLOCK_SIZE, input, inputLen);
+ rv = AES_Encrypt(&cx->aescx, output, pOutputLen, maxOutputLen, iv,
+ outLen);
+ PORT_Memset(iv, 0, sizeof(iv));
+ return rv;
+ }
+
+ /* add padding to our input block */
+ newBuf = PORT_ZAlloc(paddedInputLen);
+ if (newBuf == NULL) {
+ return SECFailure;
+ }
+ PORT_Memcpy(newBuf, input, inputLen);
+
+ rv = AESKeyWrap_W(cx, iv, output, pOutputLen, maxOutputLen,
+ newBuf, paddedInputLen);
+ PORT_ZFree(newBuf, paddedInputLen);
+ /* a little overkill, we only need to clear out the length, but this
+ * is easier to verify we got it all */
+ PORT_Memset(iv, 0, sizeof(iv));
+ return rv;
+}
+
+/*
+** Perform AES key unwrap with padding.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "pOutputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned int padLen;
+ unsigned int padLen2;
+ unsigned int outLen;
+ unsigned int paddedLen;
+ unsigned int good;
+ unsigned char *newBuf = NULL;
+ unsigned char *allocBuf = NULL;
+ int i;
+ unsigned char iv[AES_BLOCK_SIZE];
+ PRUint32 magic;
+ SECStatus rv = SECFailure;
+
+ paddedLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE;
+ /* unwrap the padded result */
+ if (inputLen == AES_BLOCK_SIZE) {
+ rv = AES_Decrypt(&cx->aescx, iv, &outLen, inputLen, input, inputLen);
+ newBuf = &iv[AES_KEY_WRAP_BLOCK_SIZE];
+ outLen -= AES_KEY_WRAP_BLOCK_SIZE;
+ } else {
+ /* if the caller supplied enough space to hold the unpadded buffer,
+ * we can unwrap directly into that unpadded buffer. Otherwise
+ * we allocate a buffer that can hold the padding, and we'll copy
+ * the result in a later step */
+ newBuf = output;
+ if (maxOutputLen < paddedLen) {
+ allocBuf = newBuf = PORT_Alloc(paddedLen);
+ if (!allocBuf) {
+ return SECFailure;
+ }
+ }
+ /* We pass NULL for the first IV argument because we don't know
+ * what the IV has since in includes the length, so we don't have
+ * Winv verify it. We pass iv in the second argument to get the
+ * iv, which we verify below before we return anything */
+ rv = AESKeyWrap_Winv(cx, NULL, iv, newBuf, &outLen,
+ paddedLen, input, inputLen);
+ }
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = SECFailure;
+ if (outLen != paddedLen) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ goto loser;
+ }
+
+ /* we verify the result in a constant time manner */
+ /* verify ICV magic */
+ magic = decode_PRUint32_BE(iv);
+ good = PORT_CT_EQ(magic, AES_KEY_WRAP_ICV2_INT32);
+ /* fetch and verify plain text length */
+ outLen = decode_PRUint32_BE(iv + AES_KEY_WRAP_ICV2_LEN);
+ good &= PORT_CT_LE(outLen, paddedLen);
+ /* now verify the padding */
+ padLen = paddedLen - outLen;
+ padLen2 = BLOCK_PAD_POWER2(outLen, AES_KEY_WRAP_BLOCK_SIZE);
+ good &= PORT_CT_EQ(padLen, padLen2);
+ for (i = 0; i < AES_KEY_WRAP_BLOCK_SIZE; i++) {
+ unsigned int doTest = PORT_CT_GT(padLen, i);
+ unsigned int result = PORT_CT_ZERO(newBuf[paddedLen - i - 1]);
+ good &= PORT_CT_SEL(doTest, result, PORT_CT_TRUE);
+ }
+
+ /* now if anything was wrong, fail. At this point we will leak timing
+ * information, but we also 'leak' the error code as well. */
+ if (!good) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ goto loser;
+ }
+
+ /* now copy out the result */
+ *pOutputLen = outLen;
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto loser;
+ }
+ if (output != newBuf) {
+ PORT_Memcpy(output, newBuf, outLen);
+ }
+ rv = SECSuccess;
+loser:
+ /* if we failed, make sure we don't return any data to the user */
+ if ((rv != SECSuccess) && (output == newBuf)) {
+ PORT_Memset(newBuf, 0, paddedLen);
+ }
+ /* clear out CSP sensitive data from the heap and stack */
+ if (allocBuf) {
+ PORT_ZFree(allocBuf, paddedLen);
+ }
+ PORT_Memset(iv, 0, sizeof(iv));
+ return rv;
+}
diff --git a/security/nss/lib/freebl/alghmac.c b/security/nss/lib/freebl/alghmac.c
new file mode 100644
index 0000000000..58bbaa9ec8
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.c
@@ -0,0 +1,209 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "alghmac.h"
+#include "secerr.h"
+
+#define HMAC_PAD_SIZE HASH_BLOCK_LENGTH_MAX
+
+struct HMACContextStr {
+ void *hash;
+ const SECHashObject *hashobj;
+ PRBool wasAllocated;
+ unsigned char ipad[HMAC_PAD_SIZE];
+ unsigned char opad[HMAC_PAD_SIZE];
+};
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+ if (cx == NULL)
+ return;
+
+ PORT_Assert(!freeit == !cx->wasAllocated);
+ if (cx->hash != NULL) {
+ cx->hashobj->destroy(cx->hash, PR_TRUE);
+ PORT_Memset(cx, 0, sizeof *cx);
+ }
+ if (freeit)
+ PORT_Free(cx);
+}
+
+static SECStatus
+hmac_initKey(HMACContext *cx, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS)
+{
+ unsigned int i;
+ unsigned char hashed_secret[HASH_LENGTH_MAX];
+
+ /* required by FIPS 198 Section 3 */
+ if (isFIPS && secret_len < cx->hashobj->length / 2) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (secret_len > cx->hashobj->blocklength) {
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, secret, secret_len);
+ PORT_Assert(cx->hashobj->length <= sizeof hashed_secret);
+ cx->hashobj->end(cx->hash, hashed_secret, &secret_len,
+ sizeof hashed_secret);
+ if (secret_len != cx->hashobj->length) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ goto loser;
+ }
+ secret = (const unsigned char *)&hashed_secret[0];
+ }
+
+ PORT_Memset(cx->ipad, 0x36, cx->hashobj->blocklength);
+ PORT_Memset(cx->opad, 0x5c, cx->hashobj->blocklength);
+
+ /* fold secret into padding */
+ for (i = 0; i < secret_len; i++) {
+ cx->ipad[i] ^= secret[i];
+ cx->opad[i] ^= secret[i];
+ }
+ PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+ return SECSuccess;
+
+loser:
+ PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+ return SECFailure;
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+ SECStatus rv;
+
+ if (cx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ cx->wasAllocated = PR_FALSE;
+ cx->hashobj = hash_obj;
+ cx->hash = cx->hashobj->create();
+ if (cx->hash == NULL)
+ goto loser;
+
+ rv = hmac_initKey(cx, secret, secret_len, isFIPS);
+ if (rv != SECSuccess)
+ goto loser;
+
+ return rv;
+loser:
+ if (cx->hash != NULL)
+ cx->hashobj->destroy(cx->hash, PR_TRUE);
+ return SECFailure;
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS)
+{
+ SECStatus rv;
+ HMACContext *cx = PORT_ZNew(HMACContext);
+ if (cx == NULL)
+ return NULL;
+ rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS);
+ cx->wasAllocated = PR_TRUE;
+ if (rv != SECSuccess) {
+ PORT_Free(cx); /* contains no secret info */
+ cx = NULL;
+ }
+ return cx;
+}
+
+/* this allows us to reuse an existing HMACContext with a new key and
+ * Hash function */
+SECStatus
+HMAC_ReInit(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+ PRBool wasAllocated;
+ SECStatus rv;
+
+ /* if we are using the same hash, keep the hash contexts and only
+ * init the key */
+ if ((cx->hashobj == hash_obj) && (cx->hash != NULL)) {
+ return hmac_initKey(cx, secret, secret_len, isFIPS);
+ }
+ /* otherwise we destroy the contents of the context and
+ * initalize it from scratch. We need to preseve the current state
+ * of wasAllocated to the final destroy works correctly */
+ wasAllocated = cx->wasAllocated;
+ cx->wasAllocated = PR_FALSE;
+ HMAC_Destroy(cx, PR_FALSE);
+ rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ cx->wasAllocated = wasAllocated;
+ return SECSuccess;
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+ /* start inner hash */
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, cx->ipad, cx->hashobj->blocklength);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+ cx->hashobj->update(cx->hash, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (max_result_len < cx->hashobj->length) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+ if (*result_len != cx->hashobj->length)
+ return SECFailure;
+
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, cx->opad, cx->hashobj->blocklength);
+ cx->hashobj->update(cx->hash, result, *result_len);
+ cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+ return SECSuccess;
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+ HMACContext *newcx;
+
+ newcx = (HMACContext *)PORT_ZAlloc(sizeof(HMACContext));
+ if (newcx == NULL)
+ goto loser;
+
+ newcx->wasAllocated = PR_TRUE;
+ newcx->hashobj = cx->hashobj;
+ newcx->hash = cx->hashobj->clone(cx->hash);
+ if (newcx->hash == NULL)
+ goto loser;
+ PORT_Memcpy(newcx->ipad, cx->ipad, cx->hashobj->blocklength);
+ PORT_Memcpy(newcx->opad, cx->opad, cx->hashobj->blocklength);
+ return newcx;
+
+loser:
+ HMAC_Destroy(newcx, PR_TRUE);
+ return NULL;
+}
diff --git a/security/nss/lib/freebl/alghmac.h b/security/nss/lib/freebl/alghmac.h
new file mode 100644
index 0000000000..0e0d66a344
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.h
@@ -0,0 +1,70 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _ALGHMAC_H_
+#define _ALGHMAC_H_
+
+typedef struct HMACContextStr HMACContext;
+
+SEC_BEGIN_PROTOS
+
+/* destroy HMAC context */
+extern void
+HMAC_Destroy(HMACContext *cx, PRBool freeit);
+
+/* create HMAC context
+ * hash_obj hash object from SECRawHashObjects[]
+ * secret the secret with which the HMAC is performed.
+ * secret_len the length of the secret.
+ * isFIPS true if conforming to FIPS 198.
+ *
+ * NULL is returned if an error occurs.
+ */
+extern HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+
+/* like HMAC_Create, except caller allocates HMACContext. */
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS);
+
+/* like HMAC_Init, except caller passes in an existing context
+ * previously used by either HMAC_Create or HMAC_Init. */
+SECStatus
+HMAC_ReInit(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS);
+
+/* reset HMAC for a fresh round */
+extern void
+HMAC_Begin(HMACContext *cx);
+
+/* update HMAC
+ * cx HMAC Context
+ * data the data to perform HMAC on
+ * data_len the length of the data to process
+ */
+extern void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len);
+
+/* Finish HMAC -- place the results within result
+ * cx HMAC context
+ * result buffer for resulting hmac'd data
+ * result_len where the resultant hmac length is stored
+ * max_result_len maximum possible length that can be stored in result
+ */
+extern SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len);
+
+/* clone a copy of the HMAC state. this is usefult when you would
+ * need to keep a running hmac but also need to extract portions
+ * partway through the process.
+ */
+extern HMACContext *
+HMAC_Clone(HMACContext *cx);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/altivec-types.h b/security/nss/lib/freebl/altivec-types.h
new file mode 100644
index 0000000000..118a7e0879
--- /dev/null
+++ b/security/nss/lib/freebl/altivec-types.h
@@ -0,0 +1,25 @@
+/*
+ * altivec-types.h - shorter vector typedefs
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _ALTIVEC_TYPES_H_
+#define _ALTIVEC_TYPES_H_ 1
+
+#include <altivec.h>
+
+typedef __vector unsigned char vec_u8;
+typedef __vector signed char vec_s8;
+typedef __vector unsigned short vec_u16;
+typedef __vector signed short vec_s16;
+typedef __vector unsigned int vec_u32;
+typedef __vector signed int vec_s32;
+#ifdef __VSX__
+typedef __vector unsigned long long vec_u64;
+typedef __vector signed long long vec_s64;
+#endif
+typedef __vector float vec_f;
+
+#endif
diff --git a/security/nss/lib/freebl/arcfive.c b/security/nss/lib/freebl/arcfive.c
new file mode 100644
index 0000000000..dda77710fe
--- /dev/null
+++ b/security/nss/lib/freebl/arcfive.c
@@ -0,0 +1,87 @@
+/*
+ * arcfive.c - stubs for RC5 - NOT a working implementation!
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "prerror.h"
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+** "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return NULL;
+}
+
+/*
+** Destroy an RC5 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
+
+/*
+** Perform RC5 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
+
+/*
+** Perform RC5 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
diff --git a/security/nss/lib/freebl/arcfour-amd64-gas.s b/security/nss/lib/freebl/arcfour-amd64-gas.s
new file mode 100644
index 0000000000..7c4f5358f1
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-gas.s
@@ -0,0 +1,88 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# ** ARCFOUR implementation optimized for AMD64.
+# **
+# ** The throughput achieved by this code is about 320 MBytes/sec, on
+# ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+ pushq %rbp
+ pushq %rbx
+ movq %rdi, %rbp # key = ARG(key)
+ movq %rsi, %rbx # rbx = ARG(len)
+ movq %rdx, %rsi # in = ARG(in)
+ movq %rcx, %rdi # out = ARG(out)
+ movq (%rbp), %rcx # x = key->x
+ movq 8(%rbp), %rdx # y = key->y
+ addq $16, %rbp # d = key->data
+ incq %rcx # x++
+ andq $255, %rcx # x &= 0xff
+ leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
+ movq %rbx, %r9 # tmp = in+len-8
+ movq 0(%rbp,%rcx,8), %rax # tx = d[x]
+ cmpq %rsi, %rbx # cmp in with in+len-8
+ jl .Lend # jump if (in+len-8 < in)
+
+.Lstart:
+ addq $8, %rsi # increment in
+ addq $8, %rdi # increment out
+
+ # generate the next 8 bytes of the rc4 stream into %r8
+ movq $8, %r11 # byte counter
+1: addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ decb %r11b
+ rorq $8, %r8 # (ror does not change ZF)
+ jnz 1b
+
+ # xor 8 bytes
+ xorq -8(%rsi), %r8
+ cmpq %r9, %rsi # cmp in+len-8 with in
+ movq %r8, -8(%rdi)
+ jle .Lstart # jump if (in <= in+len-8)
+
+.Lend:
+ addq $8, %r9 # tmp = in+len
+
+ # handle the last bytes, one by one
+1: cmpq %rsi, %r9 # cmp in with in+len
+ jle .Lfinished # jump if (in+len <= in)
+ addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ xorb (%rsi), %r8b # xor 1 byte
+ movb %r8b, (%rdi)
+ incq %rsi # in++
+ incq %rdi # out++
+ jmp 1b
+
+.Lfinished:
+ decq %rcx # x--
+ movb %dl, -8(%rbp) # key->y = y
+ movb %cl, -16(%rbp) # key->x = x
+ popq %rbx
+ popq %rbp
+ ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack,"",@progbits
+.previous
diff --git a/security/nss/lib/freebl/arcfour-amd64-masm.asm b/security/nss/lib/freebl/arcfour-amd64-masm.asm
new file mode 100644
index 0000000000..1601c4f899
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-masm.asm
@@ -0,0 +1,107 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+; ** ARCFOUR implementation optimized for AMD64.
+; **
+; ** The throughput achieved by this code is about 320 MBytes/sec, on
+; ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.CODE
+
+; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen,
+; const unsigned char *input, unsigned char *output);
+
+
+ARCFOUR PROC
+
+ push rbp
+ push rbx
+ push rsi
+ push rdi
+
+ mov rbp, rcx ; key = ARG(key)
+ mov rbx, rdx ; rbx = ARG(len)
+ mov rsi, r8 ; in = ARG(in)
+ mov rdi, r9 ; out = ARG(out)
+ mov rcx, [rbp] ; x = key->x
+ mov rdx, [rbp+8] ; y = key->y
+ add rbp, 16 ; d = key->data
+ inc rcx ; x++
+ and rcx, 0ffh ; x &= 0xff
+ lea rbx, [rbx+rsi-8] ; rbx = in+len-8
+ mov r9, rbx ; tmp = in+len-8
+ mov rax, [rbp+rcx*8] ; tx = d[x]
+ cmp rbx, rsi ; cmp in with in+len-8
+ jl Lend ; jump if (in+len-8 < in)
+
+Lstart:
+ add rsi, 8 ; increment in
+ add rdi, 8 ; increment out
+
+ ;
+ ; generate the next 8 bytes of the rc4 stream into r8
+ ;
+
+ mov r11, 8 ; byte counter
+
+@@:
+ add dl, al ; y += tx
+ mov ebx, [rbp+rdx*8] ; ty = d[y]
+ mov [rbp+rcx*8], ebx ; d[x] = ty
+ add bl, al ; val = ty + tx
+ mov [rbp+rdx*8], eax ; d[y] = tx
+ inc cl ; x++ (NEXT ROUND)
+ mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
+ mov r8b, [rbp+rbx*8] ; val = d[val]
+ dec r11b
+ ror r8, 8 ; (ror does not change ZF)
+ jnz @b
+
+ ;
+ ; xor 8 bytes
+ ;
+
+ xor r8, [rsi-8]
+ cmp rsi, r9 ; cmp in+len-8 with in
+ mov [rdi-8], r8
+ jle Lstart
+
+Lend:
+ add r9, 8 ; tmp = in+len
+
+ ;
+ ; handle the last bytes, one by one
+ ;
+
+@@:
+ cmp r9, rsi ; cmp in with in+len
+ jle Lfinished ; jump if (in+len <= in)
+ add dl, al ; y += tx
+ mov ebx, [rbp+rdx*8] ; ty = d[y]
+ mov [rbp+rcx*8], ebx ; d[x] = ty
+ add bl, al ; val = ty + tx
+ mov [rbp+rdx*8], eax ; d[y] = tx
+ inc cl ; x++ (NEXT ROUND)
+ mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
+ mov r8b, [rbp+rbx*8] ; val = d[val]
+ xor r8b, [rsi] ; xor 1 byte
+ mov [rdi], r8b
+ inc rsi ; in++
+ inc rdi
+ jmp @b
+
+Lfinished:
+ dec rcx ; x--
+ mov [rbp-8], dl ; key->y = y
+ mov [rbp-16], cl ; key->x = x
+
+ pop rdi
+ pop rsi
+ pop rbx
+ pop rbp
+ ret
+
+ARCFOUR ENDP
+
+END
diff --git a/security/nss/lib/freebl/arcfour-amd64-sun.s b/security/nss/lib/freebl/arcfour-amd64-sun.s
new file mode 100644
index 0000000000..8b649f9014
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-sun.s
@@ -0,0 +1,84 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/ ** ARCFOUR implementation optimized for AMD64.
+/ **
+/ ** The throughput achieved by this code is about 320 MBytes/sec, on
+/ ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+ pushq %rbp
+ pushq %rbx
+ movq %rdi, %rbp / key = ARG(key)
+ movq %rsi, %rbx / rbx = ARG(len)
+ movq %rdx, %rsi / in = ARG(in)
+ movq %rcx, %rdi / out = ARG(out)
+ movq (%rbp), %rcx / x = key->x
+ movq 8(%rbp), %rdx / y = key->y
+ addq $16, %rbp / d = key->data
+ incq %rcx / x++
+ andq $255, %rcx / x &= 0xff
+ leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8
+ movq %rbx, %r9 / tmp = in+len-8
+ movq 0(%rbp,%rcx,8), %rax / tx = d[x]
+ cmpq %rsi, %rbx / cmp in with in+len-8
+ jl .Lend / jump if (in+len-8 < in)
+
+.Lstart:
+ addq $8, %rsi / increment in
+ addq $8, %rdi / increment out
+
+ / generate the next 8 bytes of the rc4 stream into %r8
+ movq $8, %r11 / byte counter
+1: addb %al, %dl / y += tx
+ movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
+ addb %al, %bl / val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
+ incb %cl / x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b / val = d[val]
+ decb %r11b
+ rorq $8, %r8 / (ror does not change ZF)
+ jnz 1b
+
+ / xor 8 bytes
+ xorq -8(%rsi), %r8
+ cmpq %r9, %rsi / cmp in+len-8 with in
+ movq %r8, -8(%rdi)
+ jle .Lstart / jump if (in <= in+len-8)
+
+.Lend:
+ addq $8, %r9 / tmp = in+len
+
+ / handle the last bytes, one by one
+1: cmpq %rsi, %r9 / cmp in with in+len
+ jle .Lfinished / jump if (in+len <= in)
+ addb %al, %dl / y += tx
+ movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
+ addb %al, %bl / val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
+ incb %cl / x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b / val = d[val]
+ xorb (%rsi), %r8b / xor 1 byte
+ movb %r8b, (%rdi)
+ incq %rsi / in++
+ incq %rdi / out++
+ jmp 1b
+
+.Lfinished:
+ decq %rcx / x--
+ movb %dl, -8(%rbp) / key->y = y
+ movb %cl, -16(%rbp) / key->x = x
+ popq %rbx
+ popq %rbp
+ ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
diff --git a/security/nss/lib/freebl/arcfour.c b/security/nss/lib/freebl/arcfour.c
new file mode 100644
index 0000000000..72e696e523
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour.c
@@ -0,0 +1,594 @@
+/* arcfour.c - the arc four algorithm.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+
+/* Architecture-dependent defines */
+
+#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \
+ defined(_WIN64)
+/* Convert the byte-stream to a word-stream */
+#define CONVERT_TO_WORDS
+#endif
+
+#if defined(AIX) || defined(NSS_BEVAND_ARCFOUR)
+/* Treat array variables as words, not bytes, on CPUs that take
+ * much longer to write bytes than to write words, or when using
+ * assembler code that required it.
+ */
+#define USE_WORD
+#endif
+
+#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR)
+typedef PRUint64 WORD;
+#else
+typedef PRUint32 WORD;
+#endif
+#define WORDSIZE sizeof(WORD)
+
+#if defined(USE_WORD)
+typedef WORD Stype;
+#else
+typedef PRUint8 Stype;
+#endif
+
+#define ARCFOUR_STATE_SIZE 256
+
+#define MASK1BYTE (WORD)(0xff)
+
+#define SWAP(a, b) \
+ tmp = a; \
+ a = b; \
+ b = tmp;
+
+/*
+ * State information for stream cipher.
+ */
+struct RC4ContextStr {
+#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR)
+ Stype i;
+ Stype j;
+ Stype S[ARCFOUR_STATE_SIZE];
+#else
+ Stype S[ARCFOUR_STATE_SIZE];
+ Stype i;
+ Stype j;
+#endif
+};
+
+/*
+ * array indices [0..255] to initialize cx->S array (faster than loop).
+ */
+static const Stype Kinit[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+ return PORT_ZNew(RC4Context);
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len,
+ const unsigned char *unused1, int unused2,
+ unsigned int unused3, unsigned int unused4)
+{
+ unsigned int i;
+ PRUint8 j, tmp;
+ PRUint8 K[256];
+ PRUint8 *L;
+
+ /* verify the key length. */
+ PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE);
+ if (len == 0 || len >= ARCFOUR_STATE_SIZE) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ if (cx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* Initialize the state using array indices. */
+ memcpy(cx->S, Kinit, sizeof cx->S);
+ /* Fill in K repeatedly with values from key. */
+ L = K;
+ for (i = sizeof K; i > len; i -= len) {
+ memcpy(L, key, len);
+ L += len;
+ }
+ memcpy(L, key, i);
+ /* Stir the state of the generator. At this point it is assumed
+ * that the key is the size of the state buffer. If this is not
+ * the case, the key bytes are repeated to fill the buffer.
+ */
+ j = 0;
+#define ARCFOUR_STATE_STIR(ii) \
+ j = j + cx->S[ii] + K[ii]; \
+ SWAP(cx->S[ii], cx->S[j]);
+ for (i = 0; i < ARCFOUR_STATE_SIZE; i++) {
+ ARCFOUR_STATE_STIR(i);
+ }
+ cx->i = 0;
+ cx->j = 0;
+ return SECSuccess;
+}
+
+/*
+ * Initialize a new generator.
+ */
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+ RC4Context *cx = RC4_AllocateContext();
+ if (cx) {
+ SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0);
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof(*cx));
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+ if (freeit)
+ PORT_ZFree(cx, sizeof(*cx));
+}
+
+#if defined(NSS_BEVAND_ARCFOUR)
+extern void ARCFOUR(RC4Context *cx, WORD inputLen,
+ const unsigned char *input, unsigned char *output);
+#else
+/*
+ * Generate the next byte in the stream.
+ */
+#define ARCFOUR_NEXT_BYTE() \
+ tmpSi = cx->S[++tmpi]; \
+ tmpj += tmpSi; \
+ tmpSj = cx->S[tmpj]; \
+ cx->S[tmpi] = tmpSj; \
+ cx->S[tmpj] = tmpSi; \
+ t = tmpSi + tmpSj;
+
+#ifdef CONVERT_TO_WORDS
+/*
+ * Straight ARCFOUR op. No optimization.
+ */
+static SECStatus
+rc4_no_opt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint8 t;
+ Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ unsigned int index;
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ for (index = 0; index < inputLen; index++) {
+ /* Generate next byte from stream. */
+ ARCFOUR_NEXT_BYTE();
+ /* output = next stream byte XOR next input byte */
+ output[index] = cx->S[t] ^ input[index];
+ }
+ *outputLen = inputLen;
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+}
+
+#else
+/* !CONVERT_TO_WORDS */
+
+/*
+ * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces.
+ */
+static SECStatus
+rc4_unrolled(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint8 t;
+ Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ int index;
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ for (index = inputLen / 8; index-- > 0; input += 8, output += 8) {
+ ARCFOUR_NEXT_BYTE();
+ output[0] = cx->S[t] ^ input[0];
+ ARCFOUR_NEXT_BYTE();
+ output[1] = cx->S[t] ^ input[1];
+ ARCFOUR_NEXT_BYTE();
+ output[2] = cx->S[t] ^ input[2];
+ ARCFOUR_NEXT_BYTE();
+ output[3] = cx->S[t] ^ input[3];
+ ARCFOUR_NEXT_BYTE();
+ output[4] = cx->S[t] ^ input[4];
+ ARCFOUR_NEXT_BYTE();
+ output[5] = cx->S[t] ^ input[5];
+ ARCFOUR_NEXT_BYTE();
+ output[6] = cx->S[t] ^ input[6];
+ ARCFOUR_NEXT_BYTE();
+ output[7] = cx->S[t] ^ input[7];
+ }
+ index = inputLen % 8;
+ if (index) {
+ input += index;
+ output += index;
+ switch (index) {
+ case 7:
+ ARCFOUR_NEXT_BYTE();
+ output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */
+ case 6:
+ ARCFOUR_NEXT_BYTE();
+ output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */
+ case 5:
+ ARCFOUR_NEXT_BYTE();
+ output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */
+ case 4:
+ ARCFOUR_NEXT_BYTE();
+ output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */
+ case 3:
+ ARCFOUR_NEXT_BYTE();
+ output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */
+ case 2:
+ ARCFOUR_NEXT_BYTE();
+ output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */
+ case 1:
+ ARCFOUR_NEXT_BYTE();
+ output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */
+ default:
+ /* FALLTHRU */
+ ; /* hp-ux build breaks without this */
+ }
+ }
+ cx->i = tmpi;
+ cx->j = tmpj;
+ *outputLen = inputLen;
+ return SECSuccess;
+}
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT4BYTES_L(n) \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 8); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 16); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 24);
+#else
+#define ARCFOUR_NEXT4BYTES_B(n) \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 24); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 16); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 8); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n);
+#endif
+
+#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64)
+/* 64-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_L(0); \
+ ARCFOUR_NEXT4BYTES_L(32); \
+ }
+#else
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_B(32); \
+ ARCFOUR_NEXT4BYTES_B(0); \
+ }
+#endif
+#else
+/* 32-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_L(0); \
+ }
+#else
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_B(0); \
+ }
+#endif
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define RSH <<
+#define LSH >>
+#else
+#define RSH >>
+#define LSH <<
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define LEFTMOST_BYTE_SHIFT 0
+#define NEXT_BYTE_SHIFT(shift) shift + 8
+#else
+#define LEFTMOST_BYTE_SHIFT 8 * (WORDSIZE - 1)
+#define NEXT_BYTE_SHIFT(shift) shift - 8
+#endif
+
+#ifdef CONVERT_TO_WORDS
+static SECStatus
+rc4_wordconv(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t));
+ unsigned int inOffset = (PRUword)input % WORDSIZE;
+ unsigned int outOffset = (PRUword)output % WORDSIZE;
+ register WORD streamWord;
+ register const WORD *pInWord;
+ register WORD *pOutWord;
+ register WORD inWord, nextInWord;
+ PRUint8 t;
+ register Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ unsigned int bufShift, invBufShift;
+ unsigned int i;
+ const unsigned char *finalIn;
+ unsigned char *finalOut;
+
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ if (inputLen < 2 * WORDSIZE) {
+ /* Ignore word conversion, do byte-at-a-time */
+ return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen);
+ }
+ *outputLen = inputLen;
+ pInWord = (const WORD *)(input - inOffset);
+ pOutWord = (WORD *)(output - outOffset);
+ if (inOffset <= outOffset) {
+ bufShift = 8 * (outOffset - inOffset);
+ invBufShift = 8 * WORDSIZE - bufShift;
+ } else {
+ invBufShift = 8 * (inOffset - outOffset);
+ bufShift = 8 * WORDSIZE - invBufShift;
+ }
+ /*****************************************************************/
+ /* Step 1: */
+ /* If the first output word is partial, consume the bytes in the */
+ /* first partial output word by loading one or two words of */
+ /* input and shifting them accordingly. Otherwise, just load */
+ /* in the first word of input. At the end of this block, at */
+ /* least one partial word of input should ALWAYS be loaded. */
+ /*****************************************************************/
+ if (outOffset) {
+ unsigned int byteCount = WORDSIZE - outOffset;
+ for (i = 0; i < byteCount; i++) {
+ ARCFOUR_NEXT_BYTE();
+ output[i] = cx->S[t] ^ input[i];
+ }
+ /* Consumed byteCount bytes of input */
+ inputLen -= byteCount;
+ pInWord++;
+
+ /* move to next word of output */
+ pOutWord++;
+
+ /* If buffers are relatively misaligned, shift the bytes in inWord
+ * to be aligned to the output buffer.
+ */
+ if (inOffset < outOffset) {
+ /* The first input word (which may be partial) has more bytes
+ * than needed. Copy the remainder to inWord.
+ */
+ unsigned int shift = LEFTMOST_BYTE_SHIFT;
+ inWord = 0;
+ for (i = 0; i < outOffset - inOffset; i++) {
+ inWord |= (WORD)input[byteCount + i] << shift;
+ shift = NEXT_BYTE_SHIFT(shift);
+ }
+ } else if (inOffset > outOffset) {
+ /* Consumed some bytes in the second input word. Copy the
+ * remainder to inWord.
+ */
+ inWord = *pInWord++;
+ inWord = inWord LSH invBufShift;
+ } else {
+ inWord = 0;
+ }
+ } else {
+ /* output is word-aligned */
+ if (inOffset) {
+ /* Input is not word-aligned. The first word load of input
+ * will not produce a full word of input bytes, so one word
+ * must be pre-loaded. The main loop below will load in the
+ * next input word and shift some of its bytes into inWord
+ * in order to create a full input word. Note that the main
+ * loop must execute at least once because the input must
+ * be at least two words.
+ */
+ unsigned int shift = LEFTMOST_BYTE_SHIFT;
+ inWord = 0;
+ for (i = 0; i < WORDSIZE - inOffset; i++) {
+ inWord |= (WORD)input[i] << shift;
+ shift = NEXT_BYTE_SHIFT(shift);
+ }
+ pInWord++;
+ } else {
+ /* Input is word-aligned. The first word load of input
+ * will produce a full word of input bytes, so nothing
+ * needs to be loaded here.
+ */
+ inWord = 0;
+ }
+ }
+ /*****************************************************************/
+ /* Step 2: main loop */
+ /* At this point the output buffer is word-aligned. Any unused */
+ /* bytes from above will be in inWord (shifted correctly). If */
+ /* the input buffer is unaligned relative to the output buffer, */
+ /* shifting has to be done. */
+ /*****************************************************************/
+ if (bufShift) {
+ /* preloadedByteCount is the number of input bytes pre-loaded
+ * in inWord.
+ */
+ unsigned int preloadedByteCount = bufShift / 8;
+ for (; inputLen >= preloadedByteCount + WORDSIZE;
+ inputLen -= WORDSIZE) {
+ nextInWord = *pInWord++;
+ inWord |= nextInWord RSH bufShift;
+ nextInWord = nextInWord LSH invBufShift;
+ ARCFOUR_NEXT_WORD();
+ *pOutWord++ = inWord ^ streamWord;
+ inWord = nextInWord;
+ }
+ if (inputLen == 0) {
+ /* Nothing left to do. */
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+ }
+ finalIn = (const unsigned char *)pInWord - preloadedByteCount;
+ } else {
+ for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) {
+ inWord = *pInWord++;
+ ARCFOUR_NEXT_WORD();
+ *pOutWord++ = inWord ^ streamWord;
+ }
+ if (inputLen == 0) {
+ /* Nothing left to do. */
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+ }
+ finalIn = (const unsigned char *)pInWord;
+ }
+ /*****************************************************************/
+ /* Step 3: */
+ /* Do the remaining partial word of input one byte at a time. */
+ /*****************************************************************/
+ finalOut = (unsigned char *)pOutWord;
+ for (i = 0; i < inputLen; i++) {
+ ARCFOUR_NEXT_BYTE();
+ finalOut[i] = cx->S[t] ^ finalIn[i];
+ }
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+}
+#endif
+#endif /* NSS_BEVAND_ARCFOUR */
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+#if defined(NSS_BEVAND_ARCFOUR)
+ ARCFOUR(cx, inputLen, input, output);
+ *outputLen = inputLen;
+ return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+ /* Convert the byte-stream to a word-stream */
+ return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+ /* Operate on bytes, but unroll the main loop */
+ return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+/* decrypt and encrypt are same operation. */
+#if defined(NSS_BEVAND_ARCFOUR)
+ ARCFOUR(cx, inputLen, input, output);
+ *outputLen = inputLen;
+ return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+ /* Convert the byte-stream to a word-stream */
+ return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+ /* Operate on bytes, but unroll the main loop */
+ return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+#undef CONVERT_TO_WORDS
+#undef USE_WORD
diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c
new file mode 100644
index 0000000000..2f14bfc978
--- /dev/null
+++ b/security/nss/lib/freebl/blake2b.c
@@ -0,0 +1,428 @@
+/*
+ * blake2b.c - definitions for the blake2b hash function
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+#include "blapi.h"
+#include "blake2b.h"
+#include "crypto_primitives.h"
+
+/**
+ * This contains the BLAKE2b initialization vectors.
+ */
+static const uint64_t iv[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/**
+ * This contains the table of permutations for blake2b compression function.
+ */
+static const uint8_t sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+/**
+ * This function increments the blake2b ctx counter.
+ */
+void
+blake2b_IncrementCounter(BLAKE2BContext* ctx, const uint64_t inc)
+{
+ ctx->t[0] += inc;
+ ctx->t[1] += ctx->t[0] < inc;
+}
+
+/**
+ * This macro implements the blake2b mixing function which mixes two 8-byte
+ * words from the message into the hash.
+ */
+#define G(a, b, c, d, x, y) \
+ a += b + x; \
+ d = ROTR64(d ^ a, 32); \
+ c += d; \
+ b = ROTR64(b ^ c, 24); \
+ a += b + y; \
+ d = ROTR64(d ^ a, 16); \
+ c += d; \
+ b = ROTR64(b ^ c, 63)
+
+#define ROUND(i) \
+ G(v[0], v[4], v[8], v[12], m[sigma[i][0]], m[sigma[i][1]]); \
+ G(v[1], v[5], v[9], v[13], m[sigma[i][2]], m[sigma[i][3]]); \
+ G(v[2], v[6], v[10], v[14], m[sigma[i][4]], m[sigma[i][5]]); \
+ G(v[3], v[7], v[11], v[15], m[sigma[i][6]], m[sigma[i][7]]); \
+ G(v[0], v[5], v[10], v[15], m[sigma[i][8]], m[sigma[i][9]]); \
+ G(v[1], v[6], v[11], v[12], m[sigma[i][10]], m[sigma[i][11]]); \
+ G(v[2], v[7], v[8], v[13], m[sigma[i][12]], m[sigma[i][13]]); \
+ G(v[3], v[4], v[9], v[14], m[sigma[i][14]], m[sigma[i][15]])
+
+/**
+ * The blake2b compression function which takes a full 128-byte chunk of the
+ * input message and mixes it into the ongoing ctx array, i.e., permute the
+ * ctx while xoring in the block of data.
+ */
+void
+blake2b_Compress(BLAKE2BContext* ctx, const uint8_t* block)
+{
+ size_t i;
+ uint64_t v[16], m[16];
+
+ PORT_Memcpy(m, block, BLAKE2B_BLOCK_LENGTH);
+#if !defined(IS_LITTLE_ENDIAN)
+ for (i = 0; i < 16; ++i) {
+ m[i] = FREEBL_HTONLL(m[i]);
+ }
+#endif
+
+ PORT_Memcpy(v, ctx->h, 8 * 8);
+ PORT_Memcpy(v + 8, iv, 8 * 8);
+
+ v[12] ^= ctx->t[0];
+ v[13] ^= ctx->t[1];
+ v[14] ^= ctx->f;
+
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+
+ for (i = 0; i < 8; i++) {
+ ctx->h[i] ^= v[i] ^ v[i + 8];
+ }
+}
+
+/**
+ * This function can be used for both keyed and unkeyed version.
+ */
+BLAKE2BContext*
+BLAKE2B_NewContext()
+{
+ return PORT_ZNew(BLAKE2BContext);
+}
+
+/**
+ * Zero and free the context and can be used for both keyed and unkeyed version.
+ */
+void
+BLAKE2B_DestroyContext(BLAKE2BContext* ctx, PRBool freeit)
+{
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+}
+
+/**
+ * This function initializes blake2b ctx and can be used for both keyed and
+ * unkeyed version. It also checks ctx and sets error states.
+ */
+static SECStatus
+blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key,
+ size_t keylen)
+{
+ if (!ctx) {
+ goto failure_noclean;
+ }
+ if (outlen == 0 || outlen > BLAKE2B512_LENGTH) {
+ goto failure;
+ }
+ if (key && keylen > BLAKE2B_KEY_SIZE) {
+ goto failure;
+ }
+ /* Note: key can be null if it's unkeyed. */
+ if ((key == NULL && keylen > 0) || keylen > BLAKE2B_KEY_SIZE ||
+ (key != NULL && keylen == 0)) {
+ goto failure;
+ }
+
+ /* Mix key size(keylen) and desired hash length(outlen) into h0 */
+ uint64_t param = outlen ^ (keylen << 8) ^ (1 << 16) ^ (1 << 24);
+ PORT_Memcpy(ctx->h, iv, 8 * 8);
+ ctx->h[0] ^= param;
+ ctx->outlen = outlen;
+
+ /* This updates the context for only the keyed version */
+ if (keylen > 0 && keylen <= BLAKE2B_KEY_SIZE && key) {
+ uint8_t block[BLAKE2B_BLOCK_LENGTH] = { 0 };
+ PORT_Memcpy(block, key, keylen);
+ BLAKE2B_Update(ctx, block, BLAKE2B_BLOCK_LENGTH);
+ PORT_Memset(block, 0, BLAKE2B_BLOCK_LENGTH);
+ }
+
+ return SECSuccess;
+
+failure:
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+failure_noclean:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+}
+
+SECStatus
+BLAKE2B_Begin(BLAKE2BContext* ctx)
+{
+ return blake2b_Begin(ctx, BLAKE2B512_LENGTH, NULL, 0);
+}
+
+SECStatus
+BLAKE2B_MAC_Begin(BLAKE2BContext* ctx, const PRUint8* key, const size_t keylen)
+{
+ PORT_Assert(key != NULL);
+ if (!key) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return blake2b_Begin(ctx, BLAKE2B512_LENGTH, (const uint8_t*)key, keylen);
+}
+
+static void
+blake2b_IncrementCompress(BLAKE2BContext* ctx, size_t blockLength,
+ const unsigned char* input)
+{
+ blake2b_IncrementCounter(ctx, blockLength);
+ blake2b_Compress(ctx, input);
+}
+
+/**
+ * This function updates blake2b ctx and can be used for both keyed and unkeyed
+ * version.
+ */
+SECStatus
+BLAKE2B_Update(BLAKE2BContext* ctx, const unsigned char* in,
+ unsigned int inlen)
+{
+ /* Nothing to do if there's nothing. */
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+
+ if (!ctx || !in) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Is this a reused context? */
+ if (ctx->f) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ size_t left = ctx->buflen;
+ PORT_Assert(left <= BLAKE2B_BLOCK_LENGTH);
+ size_t fill = BLAKE2B_BLOCK_LENGTH - left;
+
+ if (inlen > fill) {
+ if (ctx->buflen) {
+ /* There's some remaining data in ctx->buf that we have to prepend
+ * to in. */
+ PORT_Memcpy(ctx->buf + left, in, fill);
+ ctx->buflen = 0;
+ blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, ctx->buf);
+ in += fill;
+ inlen -= fill;
+ }
+ while (inlen > BLAKE2B_BLOCK_LENGTH) {
+ blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, in);
+ in += BLAKE2B_BLOCK_LENGTH;
+ inlen -= BLAKE2B_BLOCK_LENGTH;
+ }
+ }
+
+ /* Store the remaining data from in in ctx->buf to process later.
+ * Note that ctx->buflen can be BLAKE2B_BLOCK_LENGTH. We can't process that
+ * here because we have to update ctx->f before compressing the last block.
+ */
+ PORT_Assert(inlen <= BLAKE2B_BLOCK_LENGTH);
+ PORT_Memcpy(ctx->buf + ctx->buflen, in, inlen);
+ ctx->buflen += inlen;
+
+ return SECSuccess;
+}
+
+/**
+ * This function finalizes ctx, pads final block and stores hash.
+ * It can be used for both keyed and unkeyed version.
+ */
+SECStatus
+BLAKE2B_End(BLAKE2BContext* ctx, unsigned char* out,
+ unsigned int* digestLen, size_t maxDigestLen)
+{
+ size_t i;
+ unsigned int outlen = PR_MIN(BLAKE2B512_LENGTH, maxDigestLen);
+
+ /* Argument checks */
+ if (!ctx || !out) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Sanity check against outlen in context. */
+ if (ctx->outlen < outlen) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Is this a reused context? */
+ if (ctx->f != 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Process the remaining data from ctx->buf (padded with 0). */
+ blake2b_IncrementCounter(ctx, ctx->buflen);
+ /* BLAKE2B_BLOCK_LENGTH - ctx->buflen can be 0. */
+ PORT_Memset(ctx->buf + ctx->buflen, 0, BLAKE2B_BLOCK_LENGTH - ctx->buflen);
+ ctx->f = UINT64_MAX;
+ blake2b_Compress(ctx, ctx->buf);
+
+ /* Write out the blake2b context(ctx). */
+ for (i = 0; i < outlen; ++i) {
+ out[i] = ctx->h[i / 8] >> ((i % 8) * 8);
+ }
+
+ if (digestLen) {
+ *digestLen = outlen;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+blake2b_HashBuf(uint8_t* output, const uint8_t* input, uint8_t outlen,
+ size_t inlen, const uint8_t* key, size_t keylen)
+{
+ SECStatus rv = SECFailure;
+ BLAKE2BContext ctx = { { 0 } };
+
+ if (inlen != 0) {
+ PORT_Assert(input != NULL);
+ if (input == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto done;
+ }
+ }
+
+ PORT_Assert(output != NULL);
+ if (output == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto done;
+ }
+
+ if (blake2b_Begin(&ctx, outlen, key, keylen) != SECSuccess) {
+ goto done;
+ }
+
+ if (BLAKE2B_Update(&ctx, input, inlen) != SECSuccess) {
+ goto done;
+ }
+
+ if (BLAKE2B_End(&ctx, output, NULL, outlen) != SECSuccess) {
+ goto done;
+ }
+ rv = SECSuccess;
+
+done:
+ PORT_Memset(&ctx, 0, sizeof ctx);
+ return rv;
+}
+
+SECStatus
+BLAKE2B_Hash(unsigned char* dest, const char* src)
+{
+ return blake2b_HashBuf(dest, (const unsigned char*)src, BLAKE2B512_LENGTH,
+ PORT_Strlen(src), NULL, 0);
+}
+
+SECStatus
+BLAKE2B_HashBuf(unsigned char* output, const unsigned char* input, PRUint32 inlen)
+{
+ return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, NULL, 0);
+}
+
+SECStatus
+BLAKE2B_MAC_HashBuf(unsigned char* output, const unsigned char* input,
+ unsigned int inlen, const unsigned char* key,
+ unsigned int keylen)
+{
+ PORT_Assert(key != NULL);
+ if (!key && keylen <= BLAKE2B_KEY_SIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, key, keylen);
+}
+
+unsigned int
+BLAKE2B_FlattenSize(BLAKE2BContext* ctx)
+{
+ return sizeof(BLAKE2BContext);
+}
+
+SECStatus
+BLAKE2B_Flatten(BLAKE2BContext* ctx, unsigned char* space)
+{
+ PORT_Assert(space != NULL);
+ if (!space) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ PORT_Memcpy(space, ctx, sizeof(BLAKE2BContext));
+ return SECSuccess;
+}
+
+BLAKE2BContext*
+BLAKE2B_Resurrect(unsigned char* space, void* arg)
+{
+ PORT_Assert(space != NULL);
+ if (!space) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ BLAKE2BContext* ctx = BLAKE2B_NewContext();
+ if (ctx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+
+ PORT_Memcpy(ctx, space, sizeof(BLAKE2BContext));
+ return ctx;
+}
+
+void
+BLAKE2B_Clone(BLAKE2BContext* dest, BLAKE2BContext* src)
+{
+ PORT_Assert(dest != NULL);
+ PORT_Assert(src != NULL);
+ if (!dest || !src) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+ PORT_Memcpy(dest, src, sizeof(BLAKE2BContext));
+}
diff --git a/security/nss/lib/freebl/blake2b.h b/security/nss/lib/freebl/blake2b.h
new file mode 100644
index 0000000000..d19a49f0ea
--- /dev/null
+++ b/security/nss/lib/freebl/blake2b.h
@@ -0,0 +1,23 @@
+/*
+ * blake2b.h - header file for blake2b hash function
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef BLAKE_H
+#define BLAKE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct Blake2bContextStr {
+ uint64_t h[8]; /* chained state */
+ uint64_t t[2]; /* total number of bytes */
+ uint64_t f; /* last block flag */
+ uint8_t buf[BLAKE2B_BLOCK_LENGTH]; /* input buffer */
+ size_t buflen; /* size of remaining bytes in buf */
+ size_t outlen; /* digest size */
+};
+
+#endif /* BLAKE_H */
diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h
new file mode 100644
index 0000000000..4b4de66916
--- /dev/null
+++ b/security/nss/lib/freebl/blapi.h
@@ -0,0 +1,1926 @@
+/*
+ * blapi.h - public prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPI_H_
+#define _BLAPI_H_
+
+#include "blapit.h"
+#include "hasht.h"
+#include "cmac.h"
+#include "alghmac.h"
+#include "kyber.h"
+
+SEC_BEGIN_PROTOS
+
+/*
+** RSA encryption/decryption. When encrypting/decrypting the output
+** buffer must be at least the size of the public key modulus.
+*/
+
+extern SECStatus BL_Init(void);
+
+/*
+** Generate and return a new RSA public and private key.
+** Both keys are encoded in a single RSAPrivateKey structure.
+** "cx" is the random number generator context
+** "keySizeInBits" is the size of the key to be generated, in bits.
+** 512, 1024, etc.
+** "publicExponent" when not NULL is a pointer to some data that
+** represents the public exponent to use. The data is a byte
+** encoded integer, in "big endian" order.
+*/
+extern RSAPrivateKey *RSA_NewKey(int keySizeInBits,
+ SECItem *publicExponent);
+
+/*
+** Perform a raw public-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a raw private-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a raw private-key operation, and check the parameters used in
+** the operation for validity by performing a test operation first.
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a check of private key parameters for consistency.
+*/
+extern SECStatus RSA_PrivateKeyCheck(const RSAPrivateKey *key);
+
+/*
+** Given only minimal private key parameters, fill in the rest of the
+** parameters.
+**
+**
+** All the entries, including those supplied by the caller, will be
+** overwritten with data alocated out of the arena.
+**
+** If no arena is supplied, one will be created.
+**
+** The following fields must be supplied in order for this function
+** to succeed:
+** one of either publicExponent or privateExponent
+** two more of the following 5 parameters (not counting the above).
+** modulus (n)
+** prime1 (p)
+** prime2 (q)
+** publicExponent (e)
+** privateExponent (d)
+**
+** NOTE: if only the publicExponent, privateExponent, and one prime is given,
+** then there may be more than one RSA key that matches that combination. If
+** we find 2 possible valid keys that meet this criteria, we return an error.
+** If we return the wrong key, and the original modulus is compared to the
+** new modulus, both can be factored by calculateing gcd(n_old,n_new) to get
+** the common prime.
+**
+** NOTE: in some cases the publicExponent must be less than 2^23 for this
+** function to work correctly. (The case where we have only one of: modulus
+** prime1 and prime2).
+**
+** All parameters will be replaced in the key structure with new parameters
+** allocated out of the arena. There is no attempt to free the old structures.
+** prime1 will always be greater than prime2 (even if the caller supplies the
+** smaller prime as prime1 or the larger prime as prime2). The parameters are
+** not overwritten on failure.
+**
+** While the remaining Chinese remainder theorem parameters (dp,dp, and qinv)
+** can also be used in reconstructing the private key, they are currently
+** ignored in this implementation.
+*/
+extern SECStatus RSA_PopulatePrivateKey(RSAPrivateKey *key);
+
+/********************************************************************
+** RSA algorithm
+*/
+
+/********************************************************************
+** Raw signing/encryption/decryption operations.
+**
+** No padding or formatting will be applied.
+** inputLen MUST be equivalent to the modulus size (in bytes).
+*/
+extern SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+
+extern SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+extern SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSAES-OAEP encryption/decryption, as defined in RFC 3447, Section 7.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "seed" should be NULL, indicating that
+** freebl should generate a random value. Otherwise, it should be an octet
+** string of seedLen bytes, which should be the same size as the output of
+** hashAlg.
+*/
+extern SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSAES-PKCS1-v1_5 encryption/decryption, as defined in RFC 3447, Section 7.2.
+*/
+extern SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSASSA-PSS signing/verifying, as defined in RFC 3447, Section 8.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "salt" should be NULL, indicating that
+** freebl should generate a random value.
+*/
+extern SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+
+/********************************************************************
+** RSASSA-PKCS1-v1_5 signing/verifying, as defined in RFC 3447, Section 8.2.
+**
+** These functions expect as input to be the raw value to be signed. For most
+** cases using PKCS1-v1_5, this should be the value of T, the DER-encoded
+** DigestInfo structure defined in Section 9.2, Step 2.
+** Note: This can also be used for signatures that use PKCS1-v1_5 padding, such
+** as the signatures used in SSL/TLS, which sign a raw hash.
+*/
+extern SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+/********************************************************************
+** DSA signing algorithm
+*/
+
+/* Generate a new random value within the interval [2, q-1].
+ */
+extern SECStatus DSA_NewRandom(PLArenaPool *arena, const SECItem *q,
+ SECItem *random);
+
+/*
+** Generate and return a new DSA public and private key pair,
+** both of which are encoded into a single DSAPrivateKey struct.
+** "params" is a pointer to the PQG parameters for the domain
+** Uses a random seed.
+*/
+extern SECStatus DSA_NewKey(const PQGParams *params,
+ DSAPrivateKey **privKey);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus DSA_SignDigest(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+extern SECStatus DSA_VerifyDigest(DSAPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes long */
+extern SECStatus DSA_NewKeyFromSeed(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+extern SECStatus DSA_SignDigestWithSeed(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed);
+
+/******************************************************
+** Diffie Helman key exchange algorithm
+*/
+
+/* Generates parameters for Diffie-Helman key generation.
+** primeLen is the length in bytes of prime P to be generated.
+*/
+extern SECStatus DH_GenParam(int primeLen, DHParams **params);
+
+/* Generates a public and private key, both of which are encoded in a single
+** DHPrivateKey struct. Params is input, privKey are output.
+** This is Phase 1 of Diffie Hellman.
+*/
+extern SECStatus DH_NewKey(DHParams *params,
+ DHPrivateKey **privKey);
+
+/*
+** DH_Derive does the Diffie-Hellman phase 2 calculation, using the
+** other party's publicValue, and the prime and our privateValue.
+** maxOutBytes is the requested length of the generated secret in bytes.
+** A zero value means produce a value of any length up to the size of
+** the prime. If successful, derivedSecret->data is set
+** to the address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret produced.
+** The size of the secret produced will depend on the value of outBytes.
+** If outBytes is 0, the key length will be all the significant bytes of
+** the derived secret (leading zeros are dropped). This length could be less
+** than the length of the prime. If outBytes is nonzero, the length of the
+** produced key will be outBytes long. If the key is truncated, the most
+** significant bytes are truncated. If it is expanded, zero bytes are added
+** at the beginning.
+** It is the caller's responsibility to free the allocated buffer
+** containing the derived secret.
+*/
+extern SECStatus DH_Derive(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int outBytes);
+
+/*
+** KEA_CalcKey returns octet string with the private key for a dual
+** Diffie-Helman key generation as specified for government key exchange.
+*/
+extern SECStatus KEA_Derive(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret);
+
+/*
+ * verify that a KEA or DSA public key is a valid key for this prime and
+ * subprime domain.
+ */
+extern PRBool KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+/* verify a value is prime */
+PRBool KEA_PrimeCheck(SECItem *prime);
+
+/****************************************
+ * J-PAKE key transport
+ */
+
+/* Given gx == g^x, create a Schnorr zero-knowledge proof for the value x
+ * using the specified hash algorithm and signer ID. The signature is
+ * returned in the values gv and r. testRandom must be NULL for a PRNG
+ * generated random committment to be used in the sigature. When testRandom
+ * is non-NULL, that value must contain a value in the subgroup q; that
+ * value will be used instead of a PRNG-generated committment in order to
+ * facilitate known-answer tests.
+ *
+ * If gxIn is non-NULL then it must contain a pre-computed value of g^x that
+ * will be used by the function; in this case, the gxOut parameter must be NULL.
+ * If the gxIn parameter is NULL then gxOut must be non-NULL; in this case
+ * gxOut will contain the value g^x on output.
+ *
+ * gx (if not supplied by the caller), gv, and r will be allocated in the arena.
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ * The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r);
+
+/* Given gx == g^x, verify the Schnorr zero-knowledge proof (gv, r) for the
+ * value x using the specified hash algorithm and signer ID.
+ *
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r);
+
+/* Call before round 2 with x2, s, and x2s all non-NULL. This will calculate
+ * base = g^(x1+x3+x4) (mod p) and x2s = x2*s (mod q). The values to send in
+ * round 2 (A and the proof of knowledge of x2s) can then be calculated with
+ * JPAKE_Sign using pqg->base = base and x = x2s.
+ *
+ * Call after round 2 with x2, s, and x2s all NULL, and passing (gx1, gx2, gx3)
+ * instead of (gx1, gx3, gx4). This will calculate base = g^(x1+x2+x3). Then call
+ * JPAKE_Verify with pqg->base = base and then JPAKE_Final.
+ *
+ * base and x2s will be allocated in the arena. The arena is *not* optional so
+ * do not pass NULL for the arena parameter. The arena should be zeroed when it
+ * is freed.
+ */
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s);
+
+/* K = (B/g^(x2*x4*s))^x2 (mod p)
+ *
+ * K will be allocated in the arena. The arena is *not* optional so do not pass
+ * NULL for the arena parameter. The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K);
+
+/******************************************************
+** Elliptic Curve algorithms
+*/
+
+/* Generates a public and private key, both of which are encoded
+** in a single ECPrivateKey struct. Params is input, privKey are
+** output.
+*/
+extern SECStatus EC_NewKey(ECParams *params,
+ ECPrivateKey **privKey);
+
+extern SECStatus EC_NewKeyFromSeed(ECParams *params,
+ ECPrivateKey **privKey,
+ const unsigned char *seed,
+ int seedlen);
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. Such validation prevents against small subgroup attacks
+ * when the ECDH primitive is used with the cofactor.
+ */
+extern SECStatus EC_ValidatePublicKey(ECParams *params,
+ SECItem *publicValue);
+
+/*
+** ECDH_Derive performs a scalar point multiplication of a point
+** representing a (peer's) public key and a large integer representing
+** a private key (its own). Both keys must use the same elliptic curve
+** parameters. If the withCofactor parameter is true, the
+** multiplication also uses the cofactor associated with the curve
+** parameters. The output of this scheme is the x-coordinate of the
+** resulting point. If successful, derivedSecret->data is set to the
+** address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+extern SECStatus ECDH_Derive(SECItem *publicValue,
+ ECParams *params,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret);
+
+/* On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus ECDSA_SignDigest(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+/* On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+extern SECStatus ECDSA_VerifyDigest(ECPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+/* Uses the provided seed. */
+extern SECStatus ECDSA_SignDigestWithSeed(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed,
+ const int seedlen);
+
+/******************************************/
+/*
+** RC4 symmetric stream cypher
+*/
+
+/*
+** Create a new RC4 context suitable for RC4 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+*/
+extern RC4Context *RC4_CreateContext(const unsigned char *key, int len);
+
+extern RC4Context *RC4_AllocateContext(void);
+extern SECStatus RC4_InitContext(RC4Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+
+/*
+** Destroy an RC4 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC4_DestroyContext(RC4Context *cx, PRBool freeit);
+
+/*
+** Perform RC4 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Encrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC4 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC2 symmetric block cypher
+*/
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+** "mode" one of NSS_RC2 or NSS_RC2_CBC
+** "effectiveKeyLen" is the effective key length (as specified in
+** RFC 2268) in bytes (not bits).
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC2Context *RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode,
+ unsigned effectiveKeyLen);
+extern RC2Context *RC2_AllocateContext(void);
+extern SECStatus RC2_InitContext(RC2Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int effectiveKeyLen,
+ unsigned int);
+
+/*
+** Destroy an RC2 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC2_DestroyContext(RC2Context *cx, PRBool freeit);
+
+/*
+** Perform RC2 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Encrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC2 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Decrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+** "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC5Context *RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode);
+extern RC5Context *RC5_AllocateContext(void);
+extern SECStatus RC5_InitContext(RC5Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int rounds,
+ unsigned int wordSize);
+
+/*
+** Destroy an RC5 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC5_DestroyContext(RC5Context *cx, PRBool freeit);
+
+/*
+** Perform RC5 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC5_Encrypt(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC5 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+
+extern SECStatus RC5_Decrypt(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** DES symmetric block cypher
+*/
+
+/*
+** Create a new DES context suitable for DES encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_DES_CBC or
+** mode is DES_EDE3_CBC)
+** "mode" one of NSS_DES, NSS_DES_CBC, NSS_DES_EDE3 or NSS_DES_EDE3_CBC
+** "encrypt" is PR_TRUE if the context will be used for encryption
+**
+** When mode is set to NSS_DES_CBC or NSS_DES_EDE3_CBC then the DES
+** cipher is run in "cipher block chaining" mode.
+*/
+extern DESContext *DES_CreateContext(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+extern DESContext *DES_AllocateContext(void);
+extern SECStatus DES_InitContext(DESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+
+/*
+** Destroy an DES encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void DES_DestroyContext(DESContext *cx, PRBool freeit);
+
+/*
+** Perform DES encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Encrypt(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform DES decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Decrypt(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** SEED symmetric block cypher
+*/
+extern SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt);
+extern SEEDContext *SEED_AllocateContext(void);
+extern SECStatus SEED_InitContext(SEEDContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode, unsigned int encrypt,
+ unsigned int);
+extern void SEED_DestroyContext(SEEDContext *cx, PRBool freeit);
+extern SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+extern SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** AES symmetric block cypher (Rijndael)
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+** "blocklen" is the blocksize to use. NOTE: only 16 is supported!
+*/
+extern AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen, unsigned int blocklen);
+extern AESContext *AES_AllocateContext(void);
+extern SECStatus AES_InitContext(AESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+
+/*
+** Destroy a AES encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AES_DestroyContext(AESContext *cx, PRBool freeit);
+
+/*
+** Perform AES encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+/*
+** Perform AES AEAD operation (either encrypt or decrypt), controlled by
+** the context.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+** "params" pointer to an AEAD specific param PKCS #11 param structure
+** "paramsLen" length of the param structure pointed to by params
+** "aad" addition authenticated data
+** "aadLen" the amount of additional authenticated data.
+*/
+extern SECStatus
+AES_AEAD(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ void *params, unsigned int paramsLen,
+ const unsigned char *aad, unsigned int aadLen);
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "iv" The 8 byte "initial value"
+** "encrypt", a boolean, true for key wrapping, false for unwrapping.
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen);
+extern AESKeyWrapContext *AESKeyWrap_AllocateContext(void);
+extern SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int,
+ unsigned int encrypt,
+ unsigned int);
+
+/*
+** Destroy a AES KeyWrap context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit);
+
+/*
+** Perform AES key wrap.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES key unwrap.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES padded key wrap.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES padded key unwrap.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** Camellia symmetric block cypher
+*/
+
+/*
+** Create a new Camellia context suitable for Camellia encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt, unsigned int keylen);
+
+extern CamelliaContext *Camellia_AllocateContext(void);
+extern SECStatus Camellia_InitContext(CamelliaContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int unused);
+/*
+** Destroy a Camellia encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit);
+
+/*
+** Perform Camellia encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform Camellia decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** ChaCha20 block cipher
+*/
+
+extern SECStatus ChaCha20_InitContext(ChaCha20Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ const unsigned char *nonce,
+ unsigned int nonceLen,
+ PRUint32 ctr);
+
+extern ChaCha20Context *ChaCha20_CreateContext(const unsigned char *key,
+ unsigned int keyLen,
+ const unsigned char *nonce,
+ unsigned int nonceLen,
+ PRUint32 ctr);
+
+extern void ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit);
+
+/******************************************/
+/*
+** ChaCha20+Poly1305 AEAD
+*/
+
+extern SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ unsigned int tagLen);
+
+extern ChaCha20Poly1305Context *ChaCha20Poly1305_CreateContext(
+ const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+extern void ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx,
+ PRBool freeit);
+
+extern SECStatus ChaCha20Poly1305_Seal(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+extern SECStatus ChaCha20Poly1305_Open(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+extern SECStatus ChaCha20Poly1305_Encrypt(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen, unsigned char *tagOut);
+
+extern SECStatus ChaCha20Poly1305_Decrypt(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen, unsigned char *tagIn);
+
+extern SECStatus ChaCha20_Xor(
+ unsigned char *output, const unsigned char *block, unsigned int len,
+ const unsigned char *k, const unsigned char *nonce, PRUint32 ctr);
+
+/******************************************/
+/*
+** MD5 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+/*
+** Create a new MD5 context
+*/
+extern MD5Context *MD5_NewContext(void);
+
+/*
+** Destroy an MD5 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD5_DestroyContext(MD5Context *cx, PRBool freeit);
+
+/*
+** Reset an MD5 context, preparing it for a fresh round of hashing
+*/
+extern void MD5_Begin(MD5Context *cx);
+
+/*
+** Update the MD5 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void MD5_Update(MD5Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD5 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the MD5 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD5 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int MD5_FlattenSize(MD5Context *cx);
+
+/*
+ * Flatten the MD5 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus MD5_Flatten(MD5Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD5 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern MD5Context *MD5_Resurrect(unsigned char *space, void *arg);
+extern void MD5_Clone(MD5Context *dest, MD5Context *src);
+
+/*
+** trace the intermediate state info of the MD5 hash.
+*/
+extern void MD5_TraceState(MD5Context *cx);
+
+/******************************************/
+/*
+** MD2 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD2
+*/
+extern SECStatus MD2_Hash(unsigned char *dest, const char *src);
+
+/*
+** Create a new MD2 context
+*/
+extern MD2Context *MD2_NewContext(void);
+
+/*
+** Destroy an MD2 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD2_DestroyContext(MD2Context *cx, PRBool freeit);
+
+/*
+** Reset an MD2 context, preparing it for a fresh round of hashing
+*/
+extern void MD2_Begin(MD2Context *cx);
+
+/*
+** Update the MD2 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void MD2_Update(MD2Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD2 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD2 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int MD2_FlattenSize(MD2Context *cx);
+
+/*
+ * Flatten the MD2 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus MD2_Flatten(MD2Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD2 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern MD2Context *MD2_Resurrect(unsigned char *space, void *arg);
+extern void MD2_Clone(MD2Context *dest, MD2Context *src);
+
+/******************************************/
+/*
+** SHA-1 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+/*
+** Create a new SHA-1 context
+*/
+extern SHA1Context *SHA1_NewContext(void);
+
+/*
+** Destroy a SHA-1 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void SHA1_DestroyContext(SHA1Context *cx, PRBool freeit);
+
+/*
+** Reset a SHA-1 context, preparing it for a fresh round of hashing
+*/
+extern void SHA1_Begin(SHA1Context *cx);
+
+/*
+** Update the SHA-1 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void SHA1_Update(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+
+/*
+** Finish the SHA-1 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (20) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA1_End(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the SHA-1 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 20 bytes of digest data are stored
+** "digestLen" where the digest length (20) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA1_EndRaw(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** trace the intermediate state info of the SHA1 hash.
+*/
+extern void SHA1_TraceState(SHA1Context *cx);
+
+/*
+ * Return the the size of a buffer needed to flatten the SHA-1 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int SHA1_FlattenSize(SHA1Context *cx);
+
+/*
+ * Flatten the SHA-1 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus SHA1_Flatten(SHA1Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a SHA-1 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern SHA1Context *SHA1_Resurrect(unsigned char *space, void *arg);
+extern void SHA1_Clone(SHA1Context *dest, SHA1Context *src);
+
+/******************************************/
+
+/******************************************/
+/*
+** SHA-2 secure hash function
+** The SHA-2 family includes SHA224, SHA256, SHA384, and SHA512
+*/
+
+extern SHA224Context *SHA224_NewContext(void);
+extern void SHA224_DestroyContext(SHA224Context *cx, PRBool freeit);
+extern void SHA224_Begin(SHA224Context *cx);
+extern void SHA224_Update(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA224_End(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-224 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 28 bytes of digest data are stored
+** "digestLen" where the digest length (28) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA224_EndRaw(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA224_Hash(unsigned char *dest, const char *src);
+extern void SHA224_TraceState(SHA224Context *cx);
+extern unsigned int SHA224_FlattenSize(SHA224Context *cx);
+extern SECStatus SHA224_Flatten(SHA224Context *cx, unsigned char *space);
+extern SHA224Context *SHA224_Resurrect(unsigned char *space, void *arg);
+extern void SHA224_Clone(SHA224Context *dest, SHA224Context *src);
+
+/******************************************/
+
+extern SHA256Context *SHA256_NewContext(void);
+extern void SHA256_DestroyContext(SHA256Context *cx, PRBool freeit);
+extern void SHA256_Begin(SHA256Context *cx);
+extern void SHA256_Update(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA256_End(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-256 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 32 bytes of digest data are stored
+** "digestLen" where the digest length (32) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA256_EndRaw(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA256_Hash(unsigned char *dest, const char *src);
+extern void SHA256_TraceState(SHA256Context *cx);
+extern unsigned int SHA256_FlattenSize(SHA256Context *cx);
+extern SECStatus SHA256_Flatten(SHA256Context *cx, unsigned char *space);
+extern SHA256Context *SHA256_Resurrect(unsigned char *space, void *arg);
+extern void SHA256_Clone(SHA256Context *dest, SHA256Context *src);
+
+/******************************************/
+
+extern SHA512Context *SHA512_NewContext(void);
+extern void SHA512_DestroyContext(SHA512Context *cx, PRBool freeit);
+extern void SHA512_Begin(SHA512Context *cx);
+extern void SHA512_Update(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+/*
+** Export the current state of the SHA-512 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 64 bytes of digest data are stored
+** "digestLen" where the digest length (64) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA512_EndRaw(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern void SHA512_End(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA512_Hash(unsigned char *dest, const char *src);
+extern void SHA512_TraceState(SHA512Context *cx);
+extern unsigned int SHA512_FlattenSize(SHA512Context *cx);
+extern SECStatus SHA512_Flatten(SHA512Context *cx, unsigned char *space);
+extern SHA512Context *SHA512_Resurrect(unsigned char *space, void *arg);
+extern void SHA512_Clone(SHA512Context *dest, SHA512Context *src);
+
+/******************************************/
+
+extern SHA384Context *SHA384_NewContext(void);
+extern void SHA384_DestroyContext(SHA384Context *cx, PRBool freeit);
+extern void SHA384_Begin(SHA384Context *cx);
+extern void SHA384_Update(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA384_End(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-384 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 48 bytes of digest data are stored
+** "digestLen" where the digest length (48) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA384_EndRaw(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA384_Hash(unsigned char *dest, const char *src);
+extern void SHA384_TraceState(SHA384Context *cx);
+extern unsigned int SHA384_FlattenSize(SHA384Context *cx);
+extern SECStatus SHA384_Flatten(SHA384Context *cx, unsigned char *space);
+extern SHA384Context *SHA384_Resurrect(unsigned char *space, void *arg);
+extern void SHA384_Clone(SHA384Context *dest, SHA384Context *src);
+
+/******************************************/
+/*
+** SHA-3 secure hash function
+** The SHA-3 family includes SHA3_224, SHA3_256, SHA3_384, and SHA3_512
+*/
+
+extern SHA3_224Context *SHA3_224_NewContext(void);
+extern void SHA3_224_DestroyContext(SHA3_224Context *cx, PRBool freeit);
+extern unsigned int SHA3_224_FlattenSize(SHA3_224Context *cx);
+extern void SHA3_224_Begin(SHA3_224Context *cx);
+extern void SHA3_224_Update(SHA3_224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA3_224_End(SHA3_224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+extern SECStatus SHA3_224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA3_224_Hash(unsigned char *dest, const char *src);
+
+/******************************************/
+
+extern SHA3_256Context *SHA3_256_NewContext(void);
+extern void SHA3_256_DestroyContext(SHA3_256Context *cx, PRBool freeit);
+extern unsigned int SHA3_256_FlattenSize(SHA3_256Context *cx);
+extern void SHA3_256_Begin(SHA3_256Context *cx);
+extern void SHA3_256_Update(SHA3_256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA3_256_End(SHA3_256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+extern SECStatus SHA3_256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA3_256_Hash(unsigned char *dest, const char *src);
+
+/******************************************/
+
+extern SHA3_384Context *SHA3_384_NewContext(void);
+extern void SHA3_384_DestroyContext(SHA3_384Context *cx, PRBool freeit);
+extern unsigned int SHA3_384_FlattenSize(SHA3_384Context *cx);
+extern void SHA3_384_Begin(SHA3_384Context *cx);
+extern void SHA3_384_Update(SHA3_384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA3_384_End(SHA3_384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+extern SECStatus SHA3_384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA3_384_Hash(unsigned char *dest, const char *src);
+
+/******************************************/
+
+extern SHA3_512Context *SHA3_512_NewContext(void);
+extern void SHA3_512_DestroyContext(SHA3_512Context *cx, PRBool freeit);
+extern unsigned int SHA3_512_FlattenSize(SHA3_512Context *cx);
+extern void SHA3_512_Begin(SHA3_512Context *cx);
+extern void SHA3_512_Update(SHA3_512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA3_512_End(SHA3_512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+extern SECStatus SHA3_512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA3_512_Hash(unsigned char *dest, const char *src);
+
+/******************************************/
+/*
+** SHAKE XOF functions from SHA-3
+** The SHAKE family includes SHAKE_128 and SHAKE_256
+*/
+
+extern SHAKE_128Context *SHAKE_128_NewContext(void);
+extern void SHAKE_128_DestroyContext(SHAKE_128Context *cx, PRBool freeit);
+extern void SHAKE_128_Begin(SHAKE_128Context *cx);
+extern void SHAKE_128_Absorb(SHAKE_128Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHAKE_128_SqueezeEnd(SHAKE_128Context *cx, unsigned char *digest,
+ unsigned int digestLen);
+extern SECStatus SHAKE_128_HashBuf(unsigned char *dest, unsigned int dest_len,
+ const unsigned char *src, PRUint32 src_length);
+extern SECStatus SHAKE_128_Hash(unsigned char *dest, unsigned int dest_len, const char *src);
+
+/******************************************/
+
+extern SHAKE_256Context *SHAKE_256_NewContext(void);
+extern void SHAKE_256_DestroyContext(SHAKE_256Context *cx, PRBool freeit);
+extern void SHAKE_256_Begin(SHAKE_256Context *cx);
+extern void SHAKE_256_Absorb(SHAKE_256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHAKE_256_SqueezeEnd(SHAKE_256Context *cx, unsigned char *digest,
+ unsigned int digestLen);
+extern SECStatus SHAKE_256_HashBuf(unsigned char *dest, unsigned int dest_len,
+ const unsigned char *src, PRUint32 src_length);
+extern SECStatus SHAKE_256_Hash(unsigned char *dest, unsigned int dest_len, const char *src);
+
+/****************************************
+ * implement TLS 1.0 Pseudo Random Function (PRF) and TLS P_hash function
+ */
+
+extern SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+ SECItem *result, PRBool isFIPS);
+
+extern SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS);
+
+/******************************************/
+/*
+** Implements the Blake2b hash function.
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using Blake2b
+*/
+extern SECStatus BLAKE2B_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using Blake2b
+*/
+extern SECStatus BLAKE2B_HashBuf(unsigned char *output,
+ const unsigned char *input, PRUint32 inlen);
+
+extern SECStatus BLAKE2B_MAC_HashBuf(unsigned char *output,
+ const unsigned char *input,
+ unsigned int inlen,
+ const unsigned char *key,
+ unsigned int keylen);
+
+/*
+** Create a new Blake2b context
+*/
+extern BLAKE2BContext *BLAKE2B_NewContext(void);
+
+/*
+** Destroy a Blake2b secure hash context.
+** "ctx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit);
+
+/*
+** Reset a Blake2b context, preparing it for a fresh round of hashing
+*/
+extern SECStatus BLAKE2B_Begin(BLAKE2BContext *ctx);
+
+extern SECStatus BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key,
+ const size_t keylen);
+
+/*
+** Update the Blake hash function with more data.
+*/
+extern SECStatus BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in,
+ unsigned int inlen);
+
+/*
+** Finish the Blake hash function. Produce the digested results in "digest"
+*/
+extern SECStatus BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out,
+ unsigned int *digestLen, size_t maxDigestLen);
+
+/*
+ * Return the size of a buffer needed to flatten the Blake2b Context into
+ * "ctx" the context
+ * returns size;
+ */
+extern unsigned int BLAKE2B_FlattenSize(BLAKE2BContext *ctx);
+
+/*
+ * Flatten the Blake2b Context into a buffer:
+ * "ctx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a Blake2b Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context
+ */
+extern BLAKE2BContext *BLAKE2B_Resurrect(unsigned char *space, void *arg);
+extern void BLAKE2B_Clone(BLAKE2BContext *dest, BLAKE2BContext *src);
+
+/******************************************/
+/*
+** Pseudo Random Number Generation. FIPS compliance desirable.
+*/
+
+/*
+** Initialize the global RNG context and give it some seed input taken
+** from the system. This function is thread-safe and will only allow
+** the global context to be initialized once. The seed input is likely
+** small, so it is imperative that RNG_RandomUpdate() be called with
+** additional seed data before the generator is used. A good way to
+** provide the generator with additional entropy is to call
+** RNG_SystemInfoForRNG(). Note that NSS_Init() does exactly that.
+*/
+extern SECStatus RNG_RNGInit(void);
+
+/*
+** Update the global random number generator with more seeding
+** material
+*/
+extern SECStatus RNG_RandomUpdate(const void *data, size_t bytes);
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+extern SECStatus RNG_GenerateGlobalRandomBytes(void *dest, size_t len);
+
+/* Destroy the global RNG context. After a call to RNG_RNGShutdown()
+** a call to RNG_RNGInit() is required in order to use the generator again,
+** along with seed data (see the comment above RNG_RNGInit()).
+*/
+extern void RNG_RNGShutdown(void);
+
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+ * FIPS 186-2 Change Notice 1 RNG Algorithm 1, used both to
+ * generate the DSA X parameter and as a generic purpose RNG.
+ *
+ * The following two FIPS186Change functions are needed for
+ * NIST RNG Validation System.
+ */
+
+/*
+ * FIPS186Change_GenerateX is now deprecated. It will return SECFailure with
+ * the error set to PR_NOT_IMPLEMENTED_ERROR.
+ */
+extern SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY,
+ const unsigned char *XSEEDj,
+ unsigned char *x_j);
+
+/*
+ * When generating the DSA X parameter, we generate 2*GSIZE bytes
+ * of random output and reduce it mod q.
+ *
+ * Input: w, 2*GSIZE bytes
+ * q, DSA_SUBPRIME_LEN bytes
+ * Output: xj, DSA_SUBPRIME_LEN bytes
+ */
+extern SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj);
+
+/* To allow NIST KAT tests */
+extern SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len);
+
+/*
+ * The following functions are for FIPS poweron self test and FIPS algorithm
+ * testing.
+ */
+extern SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len);
+
+extern SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Uninstantiate(void);
+
+extern SECStatus
+PRNGTEST_RunHealthTests(void);
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of seed and length of h both equal length of P.
+ * All lengths are specified by "j", according to the table above.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGen(unsigned int j, /* input : determines length of P. */
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by j. Length of h will match length of P.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [20..255] or an error will result.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGenSeedLen(
+ unsigned int j, /* input : determines length of P. */
+ unsigned int seedBytes, /* input : length of seed in bytes.*/
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by L in bits.
+ * Length of Q specified by N in bits.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [N..L*2] or an error will result.
+ *
+ * Not that J uses the above table, L is the length exact. L and N must
+ * match the table below or an error will result:
+ *
+ * L N
+ * 1024 160
+ * 2048 224
+ * 2048 256
+ * 3072 256
+ *
+ * If N or seedBytes are set to zero, then PQG_ParamGenSeedLen will
+ * pick a default value (typically the smallest secure value for these
+ * variables).
+ *
+ * The verify parameters will conform to FIPS186-3 using the smallest
+ * permissible hash for the key strength.
+ */
+extern SECStatus
+PQG_ParamGenV2(
+ unsigned int L, /* input : determines length of P. */
+ unsigned int N, /* input : determines length of Q. */
+ unsigned int seedBytes, /* input : length of seed in bytes.*/
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Test PQGParams for validity as DSS PQG values.
+ * If vfy is non-NULL, test PQGParams to make sure they were generated
+ * using the specified seed, counter, and h values.
+ *
+ * Return value indicates whether Verification operation ran successfully
+ * to completion, but does not indicate if PQGParams are valid or not.
+ * If return value is SECSuccess, then *pResult has these meanings:
+ * SECSuccess: PQGParams are valid.
+ * SECFailure: PQGParams are invalid.
+ *
+ * Verify the PQG againts the counter, SEED and h.
+ * These tests are specified in FIPS 186-3 Appendix A.1.1.1, A.1.1.3, and A.2.2
+ * PQG_VerifyParams will automatically choose the appropriate test.
+ */
+
+extern SECStatus PQG_VerifyParams(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result);
+
+extern void PQG_DestroyParams(PQGParams *params);
+
+extern void PQG_DestroyVerify(PQGVerify *vfy);
+
+/*
+ * clean-up any global tables freebl may have allocated after it starts up.
+ * This function is not thread safe and should be called only after the
+ * library has been quiessed.
+ */
+extern void BL_Cleanup(void);
+
+/* unload freebl shared library from memory */
+extern void BL_Unload(void);
+
+/**************************************************************************
+ * Verify a given Shared library signature *
+ **************************************************************************/
+PRBool BLAPI_SHVerify(const char *name, PRFuncPtr addr);
+
+/**************************************************************************
+ * Verify a given filename's signature *
+ **************************************************************************/
+PRBool BLAPI_SHVerifyFile(const char *shName);
+
+/**************************************************************************
+ * Verify Are Own Shared library signature *
+ **************************************************************************/
+PRBool BLAPI_VerifySelf(const char *name);
+
+/*********************************************************************/
+extern const SECHashObject *HASH_GetRawHashObject(HASH_HashType hashType);
+
+extern void BL_SetForkState(PRBool forked);
+
+/*
+** pepare an ECParam structure from DEREncoded params
+*/
+extern SECStatus EC_FillParams(PLArenaPool *arena,
+ const SECItem *encodedParams, ECParams *params);
+extern SECStatus EC_DecodeParams(const SECItem *encodedParams,
+ ECParams **ecparams);
+extern SECStatus EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams);
+
+/*
+ * use the internal table to get the size in bytes of a single EC point
+ */
+extern int EC_GetPointSize(const ECParams *params);
+
+/*
+ * use the internal table to get the size in bytes of a single EC coordinate
+ */
+extern int EC_GetScalarSize(const ECParams *params);
+
+/* Generate a Kyber key pair with parameters given by |params|. If |seed| is
+ * null this function generates its own randomness internally, otherwise the
+ * key is derived from |seed| using the method defined by |params|. The caller
+ * is responsible for allocating appropriately sized `privKey` and `pubKey`
+ * items.
+ */
+extern SECStatus Kyber_NewKey(KyberParams params, const SECItem *seed, SECItem *privKey, SECItem *pubKey);
+
+/* Encapsulate a random secret to the Kyber public key `pubKey`. If `seed` is
+ * null this function generates its own randomness internally, otherwise the
+ * secret is derived from `seed` using the method defined by `params`. The
+ * caller is responsible for allocating appropriately sized `ciphertext` and
+ * `secret` items. Returns an error if any arguments' length is incompatible
+ * with `params`.
+ */
+extern SECStatus Kyber_Encapsulate(KyberParams params, const SECItem *seed, const SECItem *pubKey, SECItem *ciphertext, SECItem *secret);
+
+/* Decapsulate a secret from a Kyber ciphertext `ciphertext` using the private
+ * key `privKey`. The caller is responsible for allocating an appropriately sized
+ * `secret` item. Returns an error if any arguments' length is incompatible
+ * with `params`.
+ */
+extern SECStatus Kyber_Decapsulate(KyberParams params, const SECItem *privKey, const SECItem *ciphertext, SECItem *secret);
+
+SEC_END_PROTOS
+
+#endif /* _BLAPI_H_ */
diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h
new file mode 100644
index 0000000000..a34ad28a8c
--- /dev/null
+++ b/security/nss/lib/freebl/blapii.h
@@ -0,0 +1,123 @@
+/*
+ * blapii.h - private data structures and prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPII_H_
+#define _BLAPII_H_
+
+#include "blapit.h"
+#include "mpi.h"
+#include "hasht.h"
+
+/* max block size of supported block ciphers */
+#define MAX_BLOCK_SIZE 16
+
+typedef SECStatus (*freeblCipherFunc)(void *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize);
+typedef SECStatus (*freeblAeadFunc)(void *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ void *params, unsigned int paramsLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit);
+
+SEC_BEGIN_PROTOS
+
+#ifndef NSS_FIPS_DISABLED
+SECStatus BL_FIPSEntryOK(PRBool freeblOnly, PRBool rerun);
+PRBool BL_POSTRan(PRBool freeblOnly);
+#endif
+
+#if defined(XP_UNIX) && !defined(NO_FORK_CHECK)
+
+extern PRBool bl_parentForkedAfterC_Initialize;
+
+#define SKIP_AFTER_FORK(x) \
+ if (!bl_parentForkedAfterC_Initialize) \
+ x
+
+#else
+
+#define SKIP_AFTER_FORK(x) x
+
+#endif
+
+SEC_END_PROTOS
+
+#if defined(NSS_X86_OR_X64)
+#define HAVE_UNALIGNED_ACCESS 1
+#endif
+
+#if defined(__clang__)
+#define HAVE_NO_SANITIZE_ATTR __has_attribute(no_sanitize)
+#else
+#define HAVE_NO_SANITIZE_ATTR 0
+#endif
+
+/* Alignment helpers. */
+#if defined(_MSC_VER)
+#define pre_align __declspec(align(16))
+#define post_align
+#elif defined(__GNUC__)
+#define pre_align
+#define post_align __attribute__((aligned(16)))
+#else
+#define pre_align
+#define post_align
+#endif
+
+#if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR
+#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment")))
+#else
+#define NO_SANITIZE_ALIGNMENT
+#endif
+
+#undef HAVE_NO_SANITIZE_ATTR
+
+SECStatus RSA_Init();
+SECStatus generate_prime(mp_int *prime, int primeLen);
+
+SECStatus
+RSA_EMSAEncodePSS(unsigned char *em,
+ unsigned int emLen,
+ unsigned int emBits,
+ const unsigned char *mHash,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen);
+
+/* Freebl state. */
+PRBool aesni_support();
+PRBool clmul_support();
+PRBool sha_support();
+PRBool avx_support();
+PRBool avx2_support();
+PRBool adx_support();
+PRBool ssse3_support();
+PRBool sse4_1_support();
+PRBool sse4_2_support();
+PRBool arm_neon_support();
+PRBool arm_aes_support();
+PRBool arm_pmull_support();
+PRBool arm_sha1_support();
+PRBool arm_sha2_support();
+PRBool ppc_crypto_support();
+
+#ifdef NSS_FIPS_DISABLED
+#define BLAPI_CLEAR_STACK(stack_size)
+#else
+#define BLAPI_CLEAR_STACK(stack_size) \
+ { \
+ volatile char _stkclr[stack_size]; \
+ PORT_Memset((void *)&_stkclr[0], 0, stack_size); \
+ }
+#endif
+
+#endif /* _BLAPII_H_ */
diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h
new file mode 100644
index 0000000000..7b59a812b0
--- /dev/null
+++ b/security/nss/lib/freebl/blapit.h
@@ -0,0 +1,474 @@
+/*
+ * blapit.h - public data structures for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPIT_H_
+#define _BLAPIT_H_
+
+#include "seccomon.h"
+#include "prlink.h"
+#include "plarena.h"
+#include "ecl-exp.h"
+
+/* RC2 operation modes */
+#define NSS_RC2 0
+#define NSS_RC2_CBC 1
+
+/* RC5 operation modes */
+#define NSS_RC5 0
+#define NSS_RC5_CBC 1
+
+/* DES operation modes */
+#define NSS_DES 0
+#define NSS_DES_CBC 1
+#define NSS_DES_EDE3 2
+#define NSS_DES_EDE3_CBC 3
+
+#define DES_KEY_LENGTH 8 /* Bytes */
+
+/* AES operation modes */
+#define NSS_AES 0
+#define NSS_AES_CBC 1
+#define NSS_AES_CTS 2
+#define NSS_AES_CTR 3
+#define NSS_AES_GCM 4
+
+/* Camellia operation modes */
+#define NSS_CAMELLIA 0
+#define NSS_CAMELLIA_CBC 1
+
+/* SEED operation modes */
+#define NSS_SEED 0
+#define NSS_SEED_CBC 1
+
+#define DSA1_SUBPRIME_LEN 20 /* Bytes */
+#define DSA1_SIGNATURE_LEN (DSA1_SUBPRIME_LEN * 2) /* Bytes */
+#define DSA_MAX_SUBPRIME_LEN 32 /* Bytes */
+#define DSA_MAX_SIGNATURE_LEN (DSA_MAX_SUBPRIME_LEN * 2) /* Bytes */
+
+/*
+ * Mark the old defines as deprecated. This will warn code that expected
+ * DSA1 only that they need to change if the are to support DSA2.
+ */
+#if defined(__GNUC__) && (__GNUC__ > 3)
+/* make GCC warn when we use these #defines */
+typedef int __BLAPI_DEPRECATED __attribute__((deprecated));
+#define DSA_SUBPRIME_LEN ((__BLAPI_DEPRECATED)DSA1_SUBPRIME_LEN)
+#define DSA_SIGNATURE_LEN ((__BLAPI_DEPRECATED)DSA1_SIGNATURE_LEN)
+#define DSA_Q_BITS ((__BLAPI_DEPRECATED)(DSA1_SUBPRIME_LEN * 8))
+#else
+#ifdef _WIN32
+/* This magic gets the windows compiler to give us a deprecation
+ * warning */
+#pragma deprecated(DSA_SUBPRIME_LEN, DSA_SIGNATURE_LEN, DSA_QBITS)
+#endif
+#define DSA_SUBPRIME_LEN DSA1_SUBPRIME_LEN
+#define DSA_SIGNATURE_LEN DSA1_SIGNATURE_LEN
+#define DSA_Q_BITS (DSA1_SUBPRIME_LEN * 8)
+#endif
+
+/* XXX We shouldn't have to hard code this limit. For
+ * now, this is the quickest way to support ECDSA signature
+ * processing (ECDSA signature lengths depend on curve
+ * size). This limit is sufficient for curves upto
+ * 576 bits.
+ */
+#define MAX_ECKEY_LEN 72 /* Bytes */
+
+#define EC_MAX_KEY_BITS 521 /* in bits */
+#define EC_MIN_KEY_BITS 256 /* in bits */
+
+/* EC point compression format */
+#define EC_POINT_FORM_COMPRESSED_Y0 0x02
+#define EC_POINT_FORM_COMPRESSED_Y1 0x03
+#define EC_POINT_FORM_UNCOMPRESSED 0x04
+#define EC_POINT_FORM_HYBRID_Y0 0x06
+#define EC_POINT_FORM_HYBRID_Y1 0x07
+
+/*
+ * Number of bytes each hash algorithm produces
+ */
+#define MD2_LENGTH 16 /* Bytes */
+#define MD5_LENGTH 16 /* Bytes */
+#define SHA1_LENGTH 20 /* Bytes */
+#define SHA256_LENGTH 32 /* bytes */
+#define SHA384_LENGTH 48 /* bytes */
+#define SHA512_LENGTH 64 /* bytes */
+#define SHA3_224_LENGTH 28 /* bytes */
+#define SHA3_256_LENGTH 32 /* bytes */
+#define SHA3_384_LENGTH 48 /* bytes */
+#define SHA3_512_LENGTH 64 /* bytes */
+#define BLAKE2B512_LENGTH 64 /* Bytes */
+#define HASH_LENGTH_MAX SHA512_LENGTH
+
+/*
+ * Input block size for each hash algorithm.
+ */
+
+#define MD2_BLOCK_LENGTH 64 /* bytes */
+#define MD5_BLOCK_LENGTH 64 /* bytes */
+#define SHA1_BLOCK_LENGTH 64 /* bytes */
+#define SHA224_BLOCK_LENGTH 64 /* bytes */
+#define SHA256_BLOCK_LENGTH 64 /* bytes */
+#define SHA384_BLOCK_LENGTH 128 /* bytes */
+#define SHA512_BLOCK_LENGTH 128 /* bytes */
+#define SHA3_224_BLOCK_LENGTH 144 /* bytes */
+#define SHA3_256_BLOCK_LENGTH 136 /* bytes */
+#define SHA3_384_BLOCK_LENGTH 104 /* bytes */
+#define SHA3_512_BLOCK_LENGTH 72 /* bytes */
+#define BLAKE2B_BLOCK_LENGTH 128 /* Bytes */
+#define HASH_BLOCK_LENGTH_MAX SHA3_224_BLOCK_LENGTH
+
+#define AES_BLOCK_SIZE 16 /* bytes */
+#define AES_KEY_WRAP_BLOCK_SIZE (AES_BLOCK_SIZE / 2)
+#define AES_KEY_WRAP_IV_BYTES AES_KEY_WRAP_BLOCK_SIZE
+
+#define AES_128_KEY_LENGTH 16 /* bytes */
+#define AES_192_KEY_LENGTH 24 /* bytes */
+#define AES_256_KEY_LENGTH 32 /* bytes */
+
+#define CAMELLIA_BLOCK_SIZE 16 /* bytes */
+
+#define SEED_BLOCK_SIZE 16 /* bytes */
+#define SEED_KEY_LENGTH 16 /* bytes */
+
+#define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048
+
+#define BLAKE2B_KEY_SIZE 64
+
+/*
+ * These values come from the initial key size limits from the PKCS #11
+ * module. They may be arbitrarily adjusted to any value freebl supports.
+ */
+#define RSA_MIN_MODULUS_BITS 128
+#define RSA_MAX_MODULUS_BITS 16384
+#define RSA_MAX_EXPONENT_BITS 64
+#define DH_MIN_P_BITS 128
+#define DH_MAX_P_BITS 16384
+
+/*
+ * The FIPS 186-1 algorithm for generating primes P and Q allows only 9
+ * distinct values for the length of P, and only one value for the
+ * length of Q.
+ * The algorithm uses a variable j to indicate which of the 9 lengths
+ * of P is to be used.
+ * The following table relates j to the lengths of P and Q in bits.
+ *
+ * j bits in P bits in Q
+ * _ _________ _________
+ * 0 512 160
+ * 1 576 160
+ * 2 640 160
+ * 3 704 160
+ * 4 768 160
+ * 5 832 160
+ * 6 896 160
+ * 7 960 160
+ * 8 1024 160
+ *
+ * The FIPS-186-1 compliant PQG generator takes j as an input parameter.
+ *
+ * FIPS 186-3 algorithm specifies 4 distinct P and Q sizes:
+ *
+ * bits in P bits in Q
+ * _________ _________
+ * 1024 160
+ * 2048 224
+ * 2048 256
+ * 3072 256
+ *
+ * The FIPS-186-3 complaiant PQG generator (PQG V2) takes arbitrary p and q
+ * lengths as input and returns an error if they aren't in this list.
+ */
+
+#define DSA1_Q_BITS 160
+#define DSA_MAX_P_BITS 3072
+#define DSA_MIN_P_BITS 512
+#define DSA_MAX_Q_BITS 256
+#define DSA_MIN_Q_BITS 160
+
+#if DSA_MAX_Q_BITS != DSA_MAX_SUBPRIME_LEN * 8
+#error "Inconsistent declaration of DSA SUBPRIME/Q parameters in blapit.h"
+#endif
+
+/*
+ * function takes desired number of bits in P,
+ * returns index (0..8) or -1 if number of bits is invalid.
+ */
+#define PQG_PBITS_TO_INDEX(bits) \
+ (((bits) < 512 || (bits) > 1024 || (bits) % 64) ? -1 : (int)((bits)-512) / 64)
+
+/*
+ * function takes index (0-8)
+ * returns number of bits in P for that index, or -1 if index is invalid.
+ */
+#define PQG_INDEX_TO_PBITS(j) (((unsigned)(j) > 8) ? -1 : (512 + 64 * (j)))
+
+/* When we are generating a gcm iv from a random number, we need to calculate
+ * an acceptable iteration count to avoid birthday attacks. (randomly
+ * generating the same IV twice).
+ *
+ * We use the approximation n = sqrt(2*m*p) to find an acceptable n given m
+ * and p.
+ * where n is the number of iterations.
+ * m is the number of possible random values.
+ * p is the probability of collision (0-1).
+ *
+ * We want to calculate the constant number GCM_IV_RANDOM_BIRTHDAY_BITS, which
+ * is the number of bits we subtract off of the length of the iv (in bits) to
+ * get a safe count value (log2).
+ *
+ * Since we do the calculation in bits, so we need to take the whole
+ * equation log2:
+ * log2 n = (1+(log2 m)+(log2 p))/2
+ * Since p < 1, log2 p is negative. Also note that the length of the iv in
+ * bits is log2 m, so if we set GCMIV_RANDOM_BIRTHDAY_BITS =- log2 p - 1.
+ * then we can calculate a safe counter value with:
+ * n = 2^((ivLenBits - GCMIV_RANDOM_BIRTHDAY_BITS)/2)
+ *
+ * If we arbitrarily set p = 10^-18 (1 chance in trillion trillion operation)
+ * we get GCMIV_RANDOM_BIRTHDAY_BITS = -(-18)/.301 -1 = 59 (.301 = log10 2)
+ * GCMIV_RANDOM_BIRTHDAY_BITS should be at least 59, call it a round 64. NOTE:
+ * the variable IV size for TLS is 64 bits, which explains why it's not safe
+ * to use a random value for the nonce in TLS. */
+#define GCMIV_RANDOM_BIRTHDAY_BITS 64
+
+/* flag to tell BLAPI_Verify* to rerun the post and integrity tests */
+#define BLAPI_FIPS_RERUN_FLAG '\377' /* 0xff, 255 invalide code for UFT8/ASCII */
+#define BLAPI_FIPS_RERUN_FLAG_STRING "\377" /* The above as a C string */
+
+/***************************************************************************
+** Opaque objects
+*/
+
+struct DESContextStr;
+struct RC2ContextStr;
+struct RC4ContextStr;
+struct RC5ContextStr;
+struct AESContextStr;
+struct CamelliaContextStr;
+struct MD2ContextStr;
+struct MD5ContextStr;
+struct SHA1ContextStr;
+struct SHA256ContextStr;
+struct SHA512ContextStr;
+struct SHA3ContextStr;
+struct SHAKEContextStr;
+struct AESKeyWrapContextStr;
+struct SEEDContextStr;
+struct ChaCha20ContextStr;
+struct ChaCha20Poly1305ContextStr;
+struct Blake2bContextStr;
+
+typedef struct DESContextStr DESContext;
+typedef struct RC2ContextStr RC2Context;
+typedef struct RC4ContextStr RC4Context;
+typedef struct RC5ContextStr RC5Context;
+typedef struct AESContextStr AESContext;
+typedef struct CamelliaContextStr CamelliaContext;
+typedef struct MD2ContextStr MD2Context;
+typedef struct MD5ContextStr MD5Context;
+typedef struct SHA1ContextStr SHA1Context;
+typedef struct SHA256ContextStr SHA256Context;
+/* SHA224Context is really a SHA256ContextStr. This is not a mistake. */
+typedef struct SHA256ContextStr SHA224Context;
+typedef struct SHA512ContextStr SHA512Context;
+/* SHA384Context is really a SHA512ContextStr. This is not a mistake. */
+typedef struct SHA512ContextStr SHA384Context;
+/* All SHA3_*Contexts are the same. This is not a mistake. */
+typedef struct SHA3ContextStr SHA3_224Context;
+typedef struct SHA3ContextStr SHA3_256Context;
+typedef struct SHA3ContextStr SHA3_384Context;
+typedef struct SHA3ContextStr SHA3_512Context;
+typedef struct SHAKEContextStr SHAKE_128Context;
+typedef struct SHAKEContextStr SHAKE_256Context;
+typedef struct AESKeyWrapContextStr AESKeyWrapContext;
+typedef struct SEEDContextStr SEEDContext;
+typedef struct ChaCha20ContextStr ChaCha20Context;
+typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context;
+typedef struct Blake2bContextStr BLAKE2BContext;
+
+/***************************************************************************
+** RSA Public and Private Key structures
+*/
+
+/* member names from PKCS#1, section 7.1 */
+struct RSAPublicKeyStr {
+ PLArenaPool *arena;
+ SECItem modulus;
+ SECItem publicExponent;
+};
+typedef struct RSAPublicKeyStr RSAPublicKey;
+
+/* member names from PKCS#1, section 7.2 */
+struct RSAPrivateKeyStr {
+ PLArenaPool *arena;
+ SECItem version;
+ SECItem modulus;
+ SECItem publicExponent;
+ SECItem privateExponent;
+ SECItem prime1;
+ SECItem prime2;
+ SECItem exponent1;
+ SECItem exponent2;
+ SECItem coefficient;
+};
+typedef struct RSAPrivateKeyStr RSAPrivateKey;
+
+/***************************************************************************
+** DSA Public and Private Key and related structures
+*/
+
+struct PQGParamsStr {
+ PLArenaPool *arena;
+ SECItem prime; /* p */
+ SECItem subPrime; /* q */
+ SECItem base; /* g */
+ /* XXX chrisk: this needs to be expanded to hold j and validationParms (RFC2459 7.3.2) */
+};
+typedef struct PQGParamsStr PQGParams;
+
+struct PQGVerifyStr {
+ PLArenaPool *arena; /* includes this struct, seed, & h. */
+ unsigned int counter;
+ SECItem seed;
+ SECItem h;
+};
+typedef struct PQGVerifyStr PQGVerify;
+
+struct DSAPublicKeyStr {
+ PQGParams params;
+ SECItem publicValue;
+};
+typedef struct DSAPublicKeyStr DSAPublicKey;
+
+struct DSAPrivateKeyStr {
+ PQGParams params;
+ SECItem publicValue;
+ SECItem privateValue;
+};
+typedef struct DSAPrivateKeyStr DSAPrivateKey;
+
+/***************************************************************************
+** Diffie-Hellman Public and Private Key and related structures
+** Structure member names suggested by PKCS#3.
+*/
+
+struct DHParamsStr {
+ PLArenaPool *arena;
+ SECItem prime; /* p */
+ SECItem base; /* g */
+};
+typedef struct DHParamsStr DHParams;
+
+struct DHPublicKeyStr {
+ PLArenaPool *arena;
+ SECItem prime;
+ SECItem base;
+ SECItem publicValue;
+};
+typedef struct DHPublicKeyStr DHPublicKey;
+
+struct DHPrivateKeyStr {
+ PLArenaPool *arena;
+ SECItem prime;
+ SECItem base;
+ SECItem publicValue;
+ SECItem privateValue;
+};
+typedef struct DHPrivateKeyStr DHPrivateKey;
+
+/***************************************************************************
+** Data structures used for elliptic curve parameters and
+** public and private keys.
+*/
+
+/*
+** The ECParams data structures can encode elliptic curve
+** parameters for both GFp and GF2m curves.
+*/
+
+typedef enum { ec_params_explicit,
+ ec_params_named,
+ ec_params_edwards_named,
+ ec_params_montgomery_named,
+} ECParamsType;
+
+typedef enum { ec_field_GFp = 1,
+ ec_field_GF2m,
+ ec_field_plain
+} ECFieldType;
+
+struct ECFieldIDStr {
+ int size; /* field size in bits */
+ ECFieldType type;
+ union {
+ SECItem prime; /* prime p for (GFp) */
+ SECItem poly; /* irreducible binary polynomial for (GF2m) */
+ } u;
+ int k1; /* first coefficient of pentanomial or
+ * the only coefficient of trinomial
+ */
+ int k2; /* two remaining coefficients of pentanomial */
+ int k3;
+};
+typedef struct ECFieldIDStr ECFieldID;
+
+struct ECCurveStr {
+ SECItem a; /* contains octet stream encoding of
+ * field element (X9.62 section 4.3.3)
+ */
+ SECItem b;
+ SECItem seed;
+};
+typedef struct ECCurveStr ECCurve;
+
+struct ECParamsStr {
+ PLArenaPool *arena;
+ ECParamsType type;
+ ECFieldID fieldID;
+ ECCurve curve;
+ SECItem base;
+ SECItem order;
+ int cofactor;
+ SECItem DEREncoding;
+ ECCurveName name;
+ SECItem curveOID;
+};
+typedef struct ECParamsStr ECParams;
+
+struct ECPublicKeyStr {
+ ECParams ecParams;
+ SECItem publicValue; /* elliptic curve point encoded as
+ * octet stream.
+ */
+};
+typedef struct ECPublicKeyStr ECPublicKey;
+
+struct ECPrivateKeyStr {
+ ECParams ecParams;
+ SECItem publicValue; /* encoded ec point */
+ SECItem privateValue; /* private big integer */
+ SECItem version; /* As per SEC 1, Appendix C, Section C.4 */
+};
+typedef struct ECPrivateKeyStr ECPrivateKey;
+
+typedef void *(*BLapiAllocateFunc)(void);
+typedef void (*BLapiDestroyContextFunc)(void *cx, PRBool freeit);
+typedef SECStatus (*BLapiInitContextFunc)(void *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+typedef SECStatus (*BLapiEncrypt)(void *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+#endif /* _BLAPIT_H_ */
diff --git a/security/nss/lib/freebl/blinit.c b/security/nss/lib/freebl/blinit.c
new file mode 100644
index 0000000000..7eb80b397a
--- /dev/null
+++ b/security/nss/lib/freebl/blinit.c
@@ -0,0 +1,584 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "mpi.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "prinit.h"
+#include "prenv.h"
+
+#if defined(_MSC_VER) && !defined(_M_IX86)
+#include <intrin.h> /* for _xgetbv() */
+#endif
+
+#if defined(_WIN64) && defined(__aarch64__)
+#include <windows.h>
+#endif
+
+#if defined(DARWIN)
+#include <TargetConditionals.h>
+#endif
+
+static PRCallOnceType coFreeblInit;
+
+/* State variables. */
+static PRBool aesni_support_ = PR_FALSE;
+static PRBool clmul_support_ = PR_FALSE;
+static PRBool sha_support_ = PR_FALSE;
+static PRBool avx_support_ = PR_FALSE;
+static PRBool avx2_support_ = PR_FALSE;
+static PRBool adx_support_ = PR_FALSE;
+static PRBool ssse3_support_ = PR_FALSE;
+static PRBool sse4_1_support_ = PR_FALSE;
+static PRBool sse4_2_support_ = PR_FALSE;
+static PRBool arm_neon_support_ = PR_FALSE;
+static PRBool arm_aes_support_ = PR_FALSE;
+static PRBool arm_sha1_support_ = PR_FALSE;
+static PRBool arm_sha2_support_ = PR_FALSE;
+static PRBool arm_pmull_support_ = PR_FALSE;
+static PRBool ppc_crypto_support_ = PR_FALSE;
+
+#ifdef NSS_X86_OR_X64
+/*
+ * Adapted from the example code in "How to detect New Instruction support in
+ * the 4th generation Intel Core processor family" by Max Locktyukhin.
+ * https://www.intel.com/content/dam/develop/external/us/en/documents/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
+ *
+ * XGETBV:
+ * Reads an extended control register (XCR) specified by ECX into EDX:EAX.
+ */
+static PRBool
+check_xcr0_ymm()
+{
+ PRUint32 xcr0;
+#if defined(_MSC_VER)
+#if defined(_M_IX86)
+ __asm {
+ mov ecx, 0
+ xgetbv
+ mov xcr0, eax
+ }
+#else
+ xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */
+#endif /* _M_IX86 */
+#else /* _MSC_VER */
+ /* Old OSX compilers don't support xgetbv. Use byte form. */
+ __asm__(".byte 0x0F, 0x01, 0xd0"
+ : "=a"(xcr0)
+ : "c"(0)
+ : "%edx");
+#endif /* _MSC_VER */
+ /* Check if xmm and ymm state are enabled in XCR0. */
+ return (xcr0 & 6) == 6;
+}
+
+#define ECX_AESNI (1 << 25)
+#define ECX_CLMUL (1 << 1)
+#define ECX_XSAVE (1 << 26)
+#define ECX_OSXSAVE (1 << 27)
+#define ECX_AVX (1 << 28)
+#define EBX_AVX2 (1 << 5)
+#define EBX_ADX (1 << 19)
+#define EBX_BMI1 (1 << 3)
+#define EBX_BMI2 (1 << 8)
+#define EBX_SHA (1 << 29)
+#define ECX_FMA (1 << 12)
+#define ECX_MOVBE (1 << 22)
+#define ECX_SSSE3 (1 << 9)
+#define ECX_SSE4_1 (1 << 19)
+#define ECX_SSE4_2 (1 << 20)
+#define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX)
+#define AVX2_EBX_BITS (EBX_AVX2 | EBX_BMI1 | EBX_BMI2)
+#define AVX2_ECX_BITS (ECX_FMA | ECX_MOVBE)
+
+void
+CheckX86CPUSupport()
+{
+ unsigned long eax, ebx, ecx, edx;
+ unsigned long eax7, ebx7, ecx7, edx7;
+ char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
+ char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL");
+ char *disable_hw_sha = PR_GetEnvSecure("NSS_DISABLE_HW_SHA");
+ char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX");
+ char *disable_avx2 = PR_GetEnvSecure("NSS_DISABLE_AVX2");
+ char *disable_adx = PR_GetEnvSecure("NSS_DISABLE_ADX");
+ char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3");
+ char *disable_sse4_1 = PR_GetEnvSecure("NSS_DISABLE_SSE4_1");
+ char *disable_sse4_2 = PR_GetEnvSecure("NSS_DISABLE_SSE4_2");
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ freebl_cpuid(7, &eax7, &ebx7, &ecx7, &edx7);
+ aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL);
+ clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL);
+ sha_support_ = (PRBool)((ebx7 & EBX_SHA) != 0 && disable_hw_sha == NULL);
+ /* For AVX we ensure that:
+ * - The AVX, OSXSAVE, and XSAVE bits of ECX from CPUID(EAX=1) are set, and
+ * - the SSE and AVX state bits of XCR0 are set (check_xcr0_ymm).
+ */
+ avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() &&
+ disable_avx == NULL;
+ /* For AVX2 we ensure that:
+ * - AVX is supported,
+ * - the AVX2, BMI1, and BMI2 bits of EBX from CPUID(EAX=7) are set, and
+ * - the FMA, and MOVBE bits of ECX from CPUID(EAX=1) are set.
+ * We do not check for LZCNT support.
+ */
+ avx2_support_ = (PRBool)(avx_support_ == PR_TRUE &&
+ (ebx7 & AVX2_EBX_BITS) == AVX2_EBX_BITS &&
+ (ecx & AVX2_ECX_BITS) == AVX2_ECX_BITS &&
+ disable_avx2 == NULL);
+ /* CPUID.(EAX=07H, ECX=0H):EBX.ADX[bit 19]=1 indicates
+ the processor supports ADCX and ADOX instructions.*/
+ adx_support_ = (PRBool)((ebx7 & EBX_ADX) != 0 && disable_adx == NULL);
+ ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 &&
+ disable_ssse3 == NULL);
+ sse4_1_support_ = (PRBool)((ecx & ECX_SSE4_1) != 0 &&
+ disable_sse4_1 == NULL);
+ sse4_2_support_ = (PRBool)((ecx & ECX_SSE4_2) != 0 &&
+ disable_sse4_2 == NULL);
+}
+#endif /* NSS_X86_OR_X64 */
+
+/* clang-format off */
+#if (defined(__aarch64__) || defined(__arm__)) && !defined(TARGET_OS_IPHONE)
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+#if (__has_include(<sys/auxv.h>) || defined(__linux__)) && \
+ defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)
+/* This might be conflict with host compiler */
+#if !defined(__ANDROID__)
+#include <sys/auxv.h>
+#endif
+extern unsigned long getauxval(unsigned long type) __attribute__((weak));
+#elif defined(__arm__) || (!defined(__OpenBSD__) && !defined(_WIN64))
+static unsigned long (*getauxval)(unsigned long) = NULL;
+#endif /* defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)*/
+
+#if defined(__FreeBSD__) && !defined(__aarch64__) && __has_include(<sys/auxv.h>)
+/* Avoid conflict with static declaration above */
+#define getauxval freebl_getauxval
+static unsigned long getauxval(unsigned long type)
+{
+ /* Only AT_HWCAP* return unsigned long */
+ if (type != AT_HWCAP && type != AT_HWCAP2) {
+ return 0;
+ }
+
+ unsigned long ret = 0;
+ elf_aux_info(type, &ret, sizeof(ret));
+ return ret;
+}
+#endif
+
+#ifndef AT_HWCAP2
+#define AT_HWCAP2 26
+#endif
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
+#endif
+
+#endif /* defined(__aarch64__) || defined(__arm__) */
+/* clang-format on */
+
+#if defined(__aarch64__)
+
+#if defined(__linux__)
+// Defines from hwcap.h in Linux kernel - ARM64
+#ifndef HWCAP_AES
+#define HWCAP_AES (1 << 3)
+#endif
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1 << 4)
+#endif
+#ifndef HWCAP_SHA1
+#define HWCAP_SHA1 (1 << 5)
+#endif
+#ifndef HWCAP_SHA2
+#define HWCAP_SHA2 (1 << 6)
+#endif
+#endif /* defined(__linux__) */
+
+#if defined(__FreeBSD__)
+#include <stdint.h>
+#include <machine/armreg.h>
+// Support for older version of armreg.h
+#ifndef ID_AA64ISAR0_AES_VAL
+#define ID_AA64ISAR0_AES_VAL ID_AA64ISAR0_AES
+#endif
+#ifndef ID_AA64ISAR0_SHA1_VAL
+#define ID_AA64ISAR0_SHA1_VAL ID_AA64ISAR0_SHA1
+#endif
+#ifndef ID_AA64ISAR0_SHA2_VAL
+#define ID_AA64ISAR0_SHA2_VAL ID_AA64ISAR0_SHA2
+#endif
+#endif /* defined(__FreeBSD__) */
+
+#if defined(__OpenBSD__)
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#include <machine/armreg.h>
+#endif /* defined(__OpenBSD__) */
+
+void
+CheckARMSupport()
+{
+#if defined(_WIN64)
+ BOOL arm_crypto_support = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
+ arm_aes_support_ = arm_crypto_support;
+ arm_pmull_support_ = arm_crypto_support;
+ arm_sha1_support_ = arm_crypto_support;
+ arm_sha2_support_ = arm_crypto_support;
+#elif defined(__linux__)
+ if (getauxval) {
+ long hwcaps = getauxval(AT_HWCAP);
+ arm_aes_support_ = (hwcaps & HWCAP_AES) == HWCAP_AES;
+ arm_pmull_support_ = (hwcaps & HWCAP_PMULL) == HWCAP_PMULL;
+ arm_sha1_support_ = (hwcaps & HWCAP_SHA1) == HWCAP_SHA1;
+ arm_sha2_support_ = (hwcaps & HWCAP_SHA2) == HWCAP_SHA2;
+ }
+#elif defined(__FreeBSD__)
+ /* qemu-user does not support register access from userspace */
+ if (PR_GetEnvSecure("QEMU_EMULATING") == NULL) {
+ uint64_t isar0 = READ_SPECIALREG(id_aa64isar0_el1);
+ arm_aes_support_ = ID_AA64ISAR0_AES_VAL(isar0) >= ID_AA64ISAR0_AES_BASE;
+ arm_pmull_support_ = ID_AA64ISAR0_AES_VAL(isar0) >= ID_AA64ISAR0_AES_PMULL;
+ arm_sha1_support_ = ID_AA64ISAR0_SHA1_VAL(isar0) >= ID_AA64ISAR0_SHA1_BASE;
+ arm_sha2_support_ = ID_AA64ISAR0_SHA2_VAL(isar0) >= ID_AA64ISAR0_SHA2_BASE;
+ }
+#elif defined(__OpenBSD__)
+ const int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
+ uint64_t isar0;
+ size_t len = sizeof(isar0);
+ if (sysctl(isar0_mib, 2, &isar0, &len, NULL, 0) < 0)
+ return;
+ arm_aes_support_ = ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_BASE;
+ arm_pmull_support_ = ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL;
+ arm_sha1_support_ = ID_AA64ISAR0_SHA1(isar0) >= ID_AA64ISAR0_SHA1_BASE;
+ arm_sha2_support_ = ID_AA64ISAR0_SHA2(isar0) >= ID_AA64ISAR0_SHA2_BASE;
+#elif defined(__ARM_FEATURE_CRYPTO)
+ /*
+ * Although no feature detection, default compiler option allows ARM
+ * Crypto Extension.
+ */
+ arm_aes_support_ = PR_TRUE;
+ arm_pmull_support_ = PR_TRUE;
+ arm_sha1_support_ = PR_TRUE;
+ arm_sha2_support_ = PR_TRUE;
+#endif
+ /* aarch64 must support NEON. */
+ arm_neon_support_ = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON") == NULL;
+ arm_aes_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_AES") == NULL;
+ arm_pmull_support_ &= PR_GetEnvSecure("NSS_DISABLE_PMULL") == NULL;
+ arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
+ arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
+}
+#endif /* defined(__aarch64__) */
+
+#if defined(__arm__)
+// Defines from hwcap.h in Linux kernel - ARM
+/*
+ * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ */
+#ifndef HWCAP_NEON
+#define HWCAP_NEON (1 << 12)
+#endif
+
+/*
+ * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2
+ */
+#ifndef HWCAP2_AES
+#define HWCAP2_AES (1 << 0)
+#endif
+#ifndef HWCAP2_PMULL
+#define HWCAP2_PMULL (1 << 1)
+#endif
+#ifndef HWCAP2_SHA1
+#define HWCAP2_SHA1 (1 << 2)
+#endif
+#ifndef HWCAP2_SHA2
+#define HWCAP2_SHA2 (1 << 3)
+#endif
+
+PRBool
+GetNeonSupport()
+{
+ char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON");
+ if (disable_arm_neon) {
+ return PR_FALSE;
+ }
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+ // Compiler generates NEON instruction as default option.
+ // If no getauxval, compiler generate NEON instruction by default,
+ // we should allow NOEN support.
+ return PR_TRUE;
+#elif !defined(__ANDROID__)
+ // Android's cpu-features.c detects features by the following logic
+ //
+ // - Call getauxval(AT_HWCAP)
+ // - Parse /proc/self/auxv if getauxval is nothing or returns 0
+ // - Parse /proc/cpuinfo if both cannot detect features
+ //
+ // But we don't use it for Android since Android document
+ // (https://developer.android.com/ndk/guides/cpu-features) says
+ // one problem with AT_HWCAP sometimes devices (Nexus 4 and emulator)
+ // are mistaken for IDIV.
+ if (getauxval) {
+ return (getauxval(AT_HWCAP) & HWCAP_NEON);
+ }
+#endif /* defined(__ARM_NEON) || defined(__ARM_NEON__) */
+ return PR_FALSE;
+}
+
+#ifdef __linux__
+static long
+ReadCPUInfoForHWCAP2()
+{
+ FILE *cpuinfo;
+ char buf[512];
+ char *p;
+ long hwcap2 = 0;
+
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+ if (!cpuinfo) {
+ return 0;
+ }
+ while (fgets(buf, 511, cpuinfo)) {
+ if (!memcmp(buf, "Features", 8)) {
+ p = strstr(buf, " aes");
+ if (p && (p[4] == ' ' || p[4] == '\n')) {
+ hwcap2 |= HWCAP2_AES;
+ }
+ p = strstr(buf, " sha1");
+ if (p && (p[5] == ' ' || p[5] == '\n')) {
+ hwcap2 |= HWCAP2_SHA1;
+ }
+ p = strstr(buf, " sha2");
+ if (p && (p[5] == ' ' || p[5] == '\n')) {
+ hwcap2 |= HWCAP2_SHA2;
+ }
+ p = strstr(buf, " pmull");
+ if (p && (p[6] == ' ' || p[6] == '\n')) {
+ hwcap2 |= HWCAP2_PMULL;
+ }
+ break;
+ }
+ }
+
+ fclose(cpuinfo);
+ return hwcap2;
+}
+#endif /* __linux__ */
+
+void
+CheckARMSupport()
+{
+ char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
+ if (getauxval) {
+ // Android's cpu-features.c uses AT_HWCAP2 for newer features.
+ // AT_HWCAP2 is implemented on newer devices / kernel, so we can trust
+ // it since cpu-features.c doesn't have workaround / fallback.
+ // Also, AT_HWCAP2 is supported by glibc 2.18+ on Linux/arm, If
+ // AT_HWCAP2 isn't supported by glibc or Linux kernel, getauxval will
+ // returns 0.
+ long hwcaps = getauxval(AT_HWCAP2);
+#ifdef __linux__
+ if (!hwcaps) {
+ // Some ARMv8 devices may not implement AT_HWCAP2. So we also
+ // read /proc/cpuinfo if AT_HWCAP2 is 0.
+ hwcaps = ReadCPUInfoForHWCAP2();
+ }
+#endif
+ arm_aes_support_ = hwcaps & HWCAP2_AES && disable_hw_aes == NULL;
+ arm_pmull_support_ = hwcaps & HWCAP2_PMULL;
+ arm_sha1_support_ = hwcaps & HWCAP2_SHA1;
+ arm_sha2_support_ = hwcaps & HWCAP2_SHA2;
+ }
+ arm_neon_support_ = GetNeonSupport();
+ arm_sha1_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA1") == NULL;
+ arm_sha2_support_ &= PR_GetEnvSecure("NSS_DISABLE_HW_SHA2") == NULL;
+}
+#endif /* defined(__arm__) */
+
+// Enable when Firefox can use it for Android API 16 and 17.
+// #if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__))
+// #include <cpu-features.h>
+// void
+// CheckARMSupport()
+// {
+// char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON");
+// char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
+// AndroidCpuFamily family = android_getCpuFamily();
+// uint64_t features = android_getCpuFeatures();
+// if (family == ANDROID_CPU_FAMILY_ARM64) {
+// arm_aes_support_ = features & ANDROID_CPU_ARM64_FEATURE_AES &&
+// disable_hw_aes == NULL;
+// arm_pmull_support_ = features & ANDROID_CPU_ARM64_FEATURE_PMULL;
+// arm_sha1_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA1;
+// arm_sha2_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA2;
+// arm_neon_support_ = disable_arm_neon == NULL;
+// }
+// if (family == ANDROID_CPU_FAMILY_ARM) {
+// arm_aes_support_ = features & ANDROID_CPU_ARM_FEATURE_AES &&
+// disable_hw_aes == NULL;
+// arm_pmull_support_ = features & ANDROID_CPU_ARM_FEATURE_PMULL;
+// arm_sha1_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA1;
+// arm_sha2_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA2;
+// arm_neon_support_ = hwcaps & ANDROID_CPU_ARM_FEATURE_NEON &&
+// disable_arm_neon == NULL;
+// }
+// }
+// #endif /* defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) */
+
+PRBool
+aesni_support()
+{
+ return aesni_support_;
+}
+PRBool
+clmul_support()
+{
+ return clmul_support_;
+}
+PRBool
+sha_support()
+{
+ return sha_support_;
+}
+PRBool
+avx_support()
+{
+ return avx_support_;
+}
+PRBool
+avx2_support()
+{
+ return avx2_support_;
+}
+PRBool
+adx_support()
+{
+ return adx_support_;
+}
+PRBool
+ssse3_support()
+{
+ return ssse3_support_;
+}
+PRBool
+sse4_1_support()
+{
+ return sse4_1_support_;
+}
+PRBool
+sse4_2_support()
+{
+ return sse4_2_support_;
+}
+PRBool
+arm_neon_support()
+{
+ return arm_neon_support_;
+}
+PRBool
+arm_aes_support()
+{
+ return arm_aes_support_;
+}
+PRBool
+arm_pmull_support()
+{
+ return arm_pmull_support_;
+}
+PRBool
+arm_sha1_support()
+{
+ return arm_sha1_support_;
+}
+PRBool
+arm_sha2_support()
+{
+ return arm_sha2_support_;
+}
+PRBool
+ppc_crypto_support()
+{
+ return ppc_crypto_support_;
+}
+
+#if defined(__powerpc__)
+
+#ifndef __has_include
+#define __has_include(x) 0
+#endif
+
+/* clang-format off */
+#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD__ >= 12)
+#if __has_include(<sys/auxv.h>)
+#include <sys/auxv.h>
+#endif
+#elif (defined(__FreeBSD__) && __FreeBSD__ < 12)
+#include <sys/sysctl.h>
+#endif
+
+// Defines from cputable.h in Linux kernel - PPC, letting us build on older kernels
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#endif
+
+static void
+CheckPPCSupport()
+{
+ char *disable_hw_crypto = PR_GetEnvSecure("NSS_DISABLE_PPC_GHASH");
+
+ unsigned long hwcaps = 0;
+#if defined(__linux__)
+#if __has_include(<sys/auxv.h>)
+ hwcaps = getauxval(AT_HWCAP2);
+#endif
+#elif defined(__FreeBSD__)
+#if __FreeBSD__ >= 12
+#if __has_include(<sys/auxv.h>)
+ elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps));
+#endif
+#else
+ size_t len = sizeof(hwcaps);
+ sysctlbyname("hw.cpu_features2", &hwcaps, &len, NULL, 0);
+#endif
+#endif
+
+ ppc_crypto_support_ = hwcaps & PPC_FEATURE2_VEC_CRYPTO && disable_hw_crypto == NULL;
+}
+/* clang-format on */
+
+#endif /* __powerpc__ */
+
+static PRStatus
+FreeblInit(void)
+{
+#ifdef NSS_X86_OR_X64
+ CheckX86CPUSupport();
+#elif (defined(__aarch64__) || defined(__arm__))
+ CheckARMSupport();
+#elif (defined(__powerpc__))
+ CheckPPCSupport();
+#endif
+ return PR_SUCCESS;
+}
+
+SECStatus
+BL_Init()
+{
+ if (PR_CallOnce(&coFreeblInit, FreeblInit) != PR_SUCCESS) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ RSA_Init();
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/blname.c b/security/nss/lib/freebl/blname.c
new file mode 100644
index 0000000000..4bad74ada0
--- /dev/null
+++ b/security/nss/lib/freebl/blname.c
@@ -0,0 +1,100 @@
+/*
+ * blname.c - determine the freebl library name.
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if defined(FREEBL_LOWHASH)
+static const char* default_name =
+ SHLIB_PREFIX "freeblpriv" SHLIB_VERSION "." SHLIB_SUFFIX;
+#else
+static const char* default_name =
+ SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+/* getLibName() returns the name of the library to load. */
+
+#if defined(SOLARIS) && defined(__sparc)
+#include <stddef.h>
+#include <strings.h>
+#include <sys/systeminfo.h>
+
+#if defined(NSS_USE_64)
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_64int_3.so";
+const static char non_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+
+const static char int_hybrid_isa[] = "sparcv9";
+const static char fpu_hybrid_isa[] = "sparcv9+vis";
+
+#else
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_32fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_32int64_3.so";
+/* This was for SPARC V8, now obsolete. */
+const static char* const non_hybrid_shared_lib = NULL;
+
+const static char int_hybrid_isa[] = "sparcv8plus";
+const static char fpu_hybrid_isa[] = "sparcv8plus+vis";
+
+#endif
+
+static const char*
+getLibName(void)
+{
+ char* found_int_hybrid;
+ char* found_fpu_hybrid;
+ long buflen;
+ char buf[256];
+
+ buflen = sysinfo(SI_ISALIST, buf, sizeof buf);
+ if (buflen <= 0)
+ return NULL;
+ /* sysinfo output is always supposed to be NUL terminated, but ... */
+ if (buflen < sizeof buf)
+ buf[buflen] = '\0';
+ else
+ buf[(sizeof buf) - 1] = '\0';
+ /* The ISA list is a space separated string of names of ISAs and
+ * ISA extensions, in order of decreasing performance.
+ * There are two different ISAs with which NSS's crypto code can be
+ * accelerated. If both are in the list, we take the first one.
+ * If one is in the list, we use it, and if neither then we use
+ * the base unaccelerated code.
+ */
+ found_int_hybrid = strstr(buf, int_hybrid_isa);
+ found_fpu_hybrid = strstr(buf, fpu_hybrid_isa);
+ if (found_fpu_hybrid &&
+ (!found_int_hybrid ||
+ (found_int_hybrid - found_fpu_hybrid) >= 0)) {
+ return fpu_hybrid_shared_lib;
+ }
+ if (found_int_hybrid) {
+ return int_hybrid_shared_lib;
+ }
+ return non_hybrid_shared_lib;
+}
+
+#elif defined(HPUX) && !defined(NSS_USE_64) && !defined(__ia64)
+#include <unistd.h>
+
+/* This code tests to see if we're running on a PA2.x CPU.
+** It returns true (1) if so, and false (0) otherwise.
+*/
+static const char*
+getLibName(void)
+{
+ long cpu = sysconf(_SC_CPU_VERSION);
+ return (cpu == CPU_PA_RISC2_0)
+ ? "libfreebl_32fpu_3.sl"
+ : "libfreebl_32int_3.sl";
+}
+#else
+/* default case, for platforms/ABIs that have only one freebl shared lib. */
+static const char*
+getLibName(void)
+{
+ return default_name;
+}
+#endif
diff --git a/security/nss/lib/freebl/camellia.c b/security/nss/lib/freebl/camellia.c
new file mode 100644
index 0000000000..80a8472a79
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.c
@@ -0,0 +1,1896 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "camellia.h"
+#include "sha_fast.h" /* for SHA_HTONL and related configuration macros */
+
+/* key constants */
+
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/*
+ * macros
+ */
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+
+/* require a CPU that allows unaligned access */
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+#define CAMELLIA_NEED_TMP_VARIABLE 1
+#endif
+
+#define GETU32(p) SHA_HTONL(*((PRUint32 *)(p)))
+#define PUTU32(ct, st) \
+ { \
+ *((PRUint32 *)(ct)) = SHA_HTONL(st); \
+ }
+
+#else /* no unaligned access */
+
+#define GETU32(pt) \
+ (((PRUint32)(pt)[0] << 24) ^ ((PRUint32)(pt)[1] << 16) ^ ((PRUint32)(pt)[2] << 8) ^ ((PRUint32)(pt)[3]))
+
+#define PUTU32(ct, st) \
+ { \
+ (ct)[0] = (PRUint8)((st) >> 24); \
+ (ct)[1] = (PRUint8)((st) >> 16); \
+ (ct)[2] = (PRUint8)((st) >> 8); \
+ (ct)[3] = (PRUint8)(st); \
+ }
+
+#endif
+
+#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2])
+#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1])
+
+/* rotation right shift 1byte */
+#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24))
+/* rotation left shift 1bit */
+#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31))
+/* rotation left shift 1byte */
+#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24))
+
+#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ ll = (ll << bits) + (lr >> (32 - bits)); \
+ lr = (lr << bits) + (rl >> (32 - bits)); \
+ rl = (rl << bits) + (rr >> (32 - bits)); \
+ rr = (rr << bits) + (w0 >> (32 - bits)); \
+ } while (0)
+
+#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ w1 = lr; \
+ ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
+ lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
+ rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
+ rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
+ } while (0)
+
+#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)])
+#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)])
+#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)])
+#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)])
+
+#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ il = xl ^ kl; \
+ ir = xr ^ kr; \
+ t0 = il >> 16; \
+ t1 = ir >> 16; \
+ yl = CAMELLIA_SP1110(ir & 0xff) ^ \
+ CAMELLIA_SP0222((t1 >> 8) & 0xff) ^ \
+ CAMELLIA_SP3033(t1 & 0xff) ^ \
+ CAMELLIA_SP4404((ir >> 8) & 0xff); \
+ yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) ^ \
+ CAMELLIA_SP0222(t0 & 0xff) ^ \
+ CAMELLIA_SP3033((il >> 8) & 0xff) ^ \
+ CAMELLIA_SP4404(il & 0xff); \
+ yl ^= yr; \
+ yr = CAMELLIA_RR8(yr); \
+ yr ^= yl; \
+ } while (0)
+
+/*
+ * for speed up
+ *
+ */
+#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \
+ do { \
+ t0 = kll; \
+ t0 &= ll; \
+ lr ^= CAMELLIA_RL1(t0); \
+ t1 = klr; \
+ t1 |= lr; \
+ ll ^= t1; \
+ \
+ t2 = krr; \
+ t2 |= rr; \
+ rl ^= t2; \
+ t3 = krl; \
+ t3 &= rl; \
+ rr ^= CAMELLIA_RL1(t3); \
+ } while (0)
+
+#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ ir = CAMELLIA_SP1110(xr & 0xff) ^ \
+ CAMELLIA_SP0222((xr >> 24) & 0xff) ^ \
+ CAMELLIA_SP3033((xr >> 16) & 0xff) ^ \
+ CAMELLIA_SP4404((xr >> 8) & 0xff); \
+ il = CAMELLIA_SP1110((xl >> 24) & 0xff) ^ \
+ CAMELLIA_SP0222((xl >> 16) & 0xff) ^ \
+ CAMELLIA_SP3033((xl >> 8) & 0xff) ^ \
+ CAMELLIA_SP4404(xl & 0xff); \
+ il ^= kl; \
+ ir ^= kr; \
+ ir ^= il; \
+ il = CAMELLIA_RR8(il); \
+ il ^= ir; \
+ yl ^= ir; \
+ yr ^= il; \
+ } while (0)
+
+static const PRUint32 camellia_sp1110[256] = {
+ 0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00,
+ 0xb3b3b300, 0x27272700, 0xc0c0c000, 0xe5e5e500,
+ 0xe4e4e400, 0x85858500, 0x57575700, 0x35353500,
+ 0xeaeaea00, 0x0c0c0c00, 0xaeaeae00, 0x41414100,
+ 0x23232300, 0xefefef00, 0x6b6b6b00, 0x93939300,
+ 0x45454500, 0x19191900, 0xa5a5a500, 0x21212100,
+ 0xededed00, 0x0e0e0e00, 0x4f4f4f00, 0x4e4e4e00,
+ 0x1d1d1d00, 0x65656500, 0x92929200, 0xbdbdbd00,
+ 0x86868600, 0xb8b8b800, 0xafafaf00, 0x8f8f8f00,
+ 0x7c7c7c00, 0xebebeb00, 0x1f1f1f00, 0xcecece00,
+ 0x3e3e3e00, 0x30303000, 0xdcdcdc00, 0x5f5f5f00,
+ 0x5e5e5e00, 0xc5c5c500, 0x0b0b0b00, 0x1a1a1a00,
+ 0xa6a6a600, 0xe1e1e100, 0x39393900, 0xcacaca00,
+ 0xd5d5d500, 0x47474700, 0x5d5d5d00, 0x3d3d3d00,
+ 0xd9d9d900, 0x01010100, 0x5a5a5a00, 0xd6d6d600,
+ 0x51515100, 0x56565600, 0x6c6c6c00, 0x4d4d4d00,
+ 0x8b8b8b00, 0x0d0d0d00, 0x9a9a9a00, 0x66666600,
+ 0xfbfbfb00, 0xcccccc00, 0xb0b0b000, 0x2d2d2d00,
+ 0x74747400, 0x12121200, 0x2b2b2b00, 0x20202000,
+ 0xf0f0f000, 0xb1b1b100, 0x84848400, 0x99999900,
+ 0xdfdfdf00, 0x4c4c4c00, 0xcbcbcb00, 0xc2c2c200,
+ 0x34343400, 0x7e7e7e00, 0x76767600, 0x05050500,
+ 0x6d6d6d00, 0xb7b7b700, 0xa9a9a900, 0x31313100,
+ 0xd1d1d100, 0x17171700, 0x04040400, 0xd7d7d700,
+ 0x14141400, 0x58585800, 0x3a3a3a00, 0x61616100,
+ 0xdedede00, 0x1b1b1b00, 0x11111100, 0x1c1c1c00,
+ 0x32323200, 0x0f0f0f00, 0x9c9c9c00, 0x16161600,
+ 0x53535300, 0x18181800, 0xf2f2f200, 0x22222200,
+ 0xfefefe00, 0x44444400, 0xcfcfcf00, 0xb2b2b200,
+ 0xc3c3c300, 0xb5b5b500, 0x7a7a7a00, 0x91919100,
+ 0x24242400, 0x08080800, 0xe8e8e800, 0xa8a8a800,
+ 0x60606000, 0xfcfcfc00, 0x69696900, 0x50505000,
+ 0xaaaaaa00, 0xd0d0d000, 0xa0a0a000, 0x7d7d7d00,
+ 0xa1a1a100, 0x89898900, 0x62626200, 0x97979700,
+ 0x54545400, 0x5b5b5b00, 0x1e1e1e00, 0x95959500,
+ 0xe0e0e000, 0xffffff00, 0x64646400, 0xd2d2d200,
+ 0x10101000, 0xc4c4c400, 0x00000000, 0x48484800,
+ 0xa3a3a300, 0xf7f7f700, 0x75757500, 0xdbdbdb00,
+ 0x8a8a8a00, 0x03030300, 0xe6e6e600, 0xdadada00,
+ 0x09090900, 0x3f3f3f00, 0xdddddd00, 0x94949400,
+ 0x87878700, 0x5c5c5c00, 0x83838300, 0x02020200,
+ 0xcdcdcd00, 0x4a4a4a00, 0x90909000, 0x33333300,
+ 0x73737300, 0x67676700, 0xf6f6f600, 0xf3f3f300,
+ 0x9d9d9d00, 0x7f7f7f00, 0xbfbfbf00, 0xe2e2e200,
+ 0x52525200, 0x9b9b9b00, 0xd8d8d800, 0x26262600,
+ 0xc8c8c800, 0x37373700, 0xc6c6c600, 0x3b3b3b00,
+ 0x81818100, 0x96969600, 0x6f6f6f00, 0x4b4b4b00,
+ 0x13131300, 0xbebebe00, 0x63636300, 0x2e2e2e00,
+ 0xe9e9e900, 0x79797900, 0xa7a7a700, 0x8c8c8c00,
+ 0x9f9f9f00, 0x6e6e6e00, 0xbcbcbc00, 0x8e8e8e00,
+ 0x29292900, 0xf5f5f500, 0xf9f9f900, 0xb6b6b600,
+ 0x2f2f2f00, 0xfdfdfd00, 0xb4b4b400, 0x59595900,
+ 0x78787800, 0x98989800, 0x06060600, 0x6a6a6a00,
+ 0xe7e7e700, 0x46464600, 0x71717100, 0xbababa00,
+ 0xd4d4d400, 0x25252500, 0xababab00, 0x42424200,
+ 0x88888800, 0xa2a2a200, 0x8d8d8d00, 0xfafafa00,
+ 0x72727200, 0x07070700, 0xb9b9b900, 0x55555500,
+ 0xf8f8f800, 0xeeeeee00, 0xacacac00, 0x0a0a0a00,
+ 0x36363600, 0x49494900, 0x2a2a2a00, 0x68686800,
+ 0x3c3c3c00, 0x38383800, 0xf1f1f100, 0xa4a4a400,
+ 0x40404000, 0x28282800, 0xd3d3d300, 0x7b7b7b00,
+ 0xbbbbbb00, 0xc9c9c900, 0x43434300, 0xc1c1c100,
+ 0x15151500, 0xe3e3e300, 0xadadad00, 0xf4f4f400,
+ 0x77777700, 0xc7c7c700, 0x80808000, 0x9e9e9e00
+};
+
+static const PRUint32 camellia_sp0222[256] = {
+ 0x00e0e0e0, 0x00050505, 0x00585858, 0x00d9d9d9,
+ 0x00676767, 0x004e4e4e, 0x00818181, 0x00cbcbcb,
+ 0x00c9c9c9, 0x000b0b0b, 0x00aeaeae, 0x006a6a6a,
+ 0x00d5d5d5, 0x00181818, 0x005d5d5d, 0x00828282,
+ 0x00464646, 0x00dfdfdf, 0x00d6d6d6, 0x00272727,
+ 0x008a8a8a, 0x00323232, 0x004b4b4b, 0x00424242,
+ 0x00dbdbdb, 0x001c1c1c, 0x009e9e9e, 0x009c9c9c,
+ 0x003a3a3a, 0x00cacaca, 0x00252525, 0x007b7b7b,
+ 0x000d0d0d, 0x00717171, 0x005f5f5f, 0x001f1f1f,
+ 0x00f8f8f8, 0x00d7d7d7, 0x003e3e3e, 0x009d9d9d,
+ 0x007c7c7c, 0x00606060, 0x00b9b9b9, 0x00bebebe,
+ 0x00bcbcbc, 0x008b8b8b, 0x00161616, 0x00343434,
+ 0x004d4d4d, 0x00c3c3c3, 0x00727272, 0x00959595,
+ 0x00ababab, 0x008e8e8e, 0x00bababa, 0x007a7a7a,
+ 0x00b3b3b3, 0x00020202, 0x00b4b4b4, 0x00adadad,
+ 0x00a2a2a2, 0x00acacac, 0x00d8d8d8, 0x009a9a9a,
+ 0x00171717, 0x001a1a1a, 0x00353535, 0x00cccccc,
+ 0x00f7f7f7, 0x00999999, 0x00616161, 0x005a5a5a,
+ 0x00e8e8e8, 0x00242424, 0x00565656, 0x00404040,
+ 0x00e1e1e1, 0x00636363, 0x00090909, 0x00333333,
+ 0x00bfbfbf, 0x00989898, 0x00979797, 0x00858585,
+ 0x00686868, 0x00fcfcfc, 0x00ececec, 0x000a0a0a,
+ 0x00dadada, 0x006f6f6f, 0x00535353, 0x00626262,
+ 0x00a3a3a3, 0x002e2e2e, 0x00080808, 0x00afafaf,
+ 0x00282828, 0x00b0b0b0, 0x00747474, 0x00c2c2c2,
+ 0x00bdbdbd, 0x00363636, 0x00222222, 0x00383838,
+ 0x00646464, 0x001e1e1e, 0x00393939, 0x002c2c2c,
+ 0x00a6a6a6, 0x00303030, 0x00e5e5e5, 0x00444444,
+ 0x00fdfdfd, 0x00888888, 0x009f9f9f, 0x00656565,
+ 0x00878787, 0x006b6b6b, 0x00f4f4f4, 0x00232323,
+ 0x00484848, 0x00101010, 0x00d1d1d1, 0x00515151,
+ 0x00c0c0c0, 0x00f9f9f9, 0x00d2d2d2, 0x00a0a0a0,
+ 0x00555555, 0x00a1a1a1, 0x00414141, 0x00fafafa,
+ 0x00434343, 0x00131313, 0x00c4c4c4, 0x002f2f2f,
+ 0x00a8a8a8, 0x00b6b6b6, 0x003c3c3c, 0x002b2b2b,
+ 0x00c1c1c1, 0x00ffffff, 0x00c8c8c8, 0x00a5a5a5,
+ 0x00202020, 0x00898989, 0x00000000, 0x00909090,
+ 0x00474747, 0x00efefef, 0x00eaeaea, 0x00b7b7b7,
+ 0x00151515, 0x00060606, 0x00cdcdcd, 0x00b5b5b5,
+ 0x00121212, 0x007e7e7e, 0x00bbbbbb, 0x00292929,
+ 0x000f0f0f, 0x00b8b8b8, 0x00070707, 0x00040404,
+ 0x009b9b9b, 0x00949494, 0x00212121, 0x00666666,
+ 0x00e6e6e6, 0x00cecece, 0x00ededed, 0x00e7e7e7,
+ 0x003b3b3b, 0x00fefefe, 0x007f7f7f, 0x00c5c5c5,
+ 0x00a4a4a4, 0x00373737, 0x00b1b1b1, 0x004c4c4c,
+ 0x00919191, 0x006e6e6e, 0x008d8d8d, 0x00767676,
+ 0x00030303, 0x002d2d2d, 0x00dedede, 0x00969696,
+ 0x00262626, 0x007d7d7d, 0x00c6c6c6, 0x005c5c5c,
+ 0x00d3d3d3, 0x00f2f2f2, 0x004f4f4f, 0x00191919,
+ 0x003f3f3f, 0x00dcdcdc, 0x00797979, 0x001d1d1d,
+ 0x00525252, 0x00ebebeb, 0x00f3f3f3, 0x006d6d6d,
+ 0x005e5e5e, 0x00fbfbfb, 0x00696969, 0x00b2b2b2,
+ 0x00f0f0f0, 0x00313131, 0x000c0c0c, 0x00d4d4d4,
+ 0x00cfcfcf, 0x008c8c8c, 0x00e2e2e2, 0x00757575,
+ 0x00a9a9a9, 0x004a4a4a, 0x00575757, 0x00848484,
+ 0x00111111, 0x00454545, 0x001b1b1b, 0x00f5f5f5,
+ 0x00e4e4e4, 0x000e0e0e, 0x00737373, 0x00aaaaaa,
+ 0x00f1f1f1, 0x00dddddd, 0x00595959, 0x00141414,
+ 0x006c6c6c, 0x00929292, 0x00545454, 0x00d0d0d0,
+ 0x00787878, 0x00707070, 0x00e3e3e3, 0x00494949,
+ 0x00808080, 0x00505050, 0x00a7a7a7, 0x00f6f6f6,
+ 0x00777777, 0x00939393, 0x00868686, 0x00838383,
+ 0x002a2a2a, 0x00c7c7c7, 0x005b5b5b, 0x00e9e9e9,
+ 0x00eeeeee, 0x008f8f8f, 0x00010101, 0x003d3d3d
+};
+
+static const PRUint32 camellia_sp3033[256] = {
+ 0x38003838, 0x41004141, 0x16001616, 0x76007676,
+ 0xd900d9d9, 0x93009393, 0x60006060, 0xf200f2f2,
+ 0x72007272, 0xc200c2c2, 0xab00abab, 0x9a009a9a,
+ 0x75007575, 0x06000606, 0x57005757, 0xa000a0a0,
+ 0x91009191, 0xf700f7f7, 0xb500b5b5, 0xc900c9c9,
+ 0xa200a2a2, 0x8c008c8c, 0xd200d2d2, 0x90009090,
+ 0xf600f6f6, 0x07000707, 0xa700a7a7, 0x27002727,
+ 0x8e008e8e, 0xb200b2b2, 0x49004949, 0xde00dede,
+ 0x43004343, 0x5c005c5c, 0xd700d7d7, 0xc700c7c7,
+ 0x3e003e3e, 0xf500f5f5, 0x8f008f8f, 0x67006767,
+ 0x1f001f1f, 0x18001818, 0x6e006e6e, 0xaf00afaf,
+ 0x2f002f2f, 0xe200e2e2, 0x85008585, 0x0d000d0d,
+ 0x53005353, 0xf000f0f0, 0x9c009c9c, 0x65006565,
+ 0xea00eaea, 0xa300a3a3, 0xae00aeae, 0x9e009e9e,
+ 0xec00ecec, 0x80008080, 0x2d002d2d, 0x6b006b6b,
+ 0xa800a8a8, 0x2b002b2b, 0x36003636, 0xa600a6a6,
+ 0xc500c5c5, 0x86008686, 0x4d004d4d, 0x33003333,
+ 0xfd00fdfd, 0x66006666, 0x58005858, 0x96009696,
+ 0x3a003a3a, 0x09000909, 0x95009595, 0x10001010,
+ 0x78007878, 0xd800d8d8, 0x42004242, 0xcc00cccc,
+ 0xef00efef, 0x26002626, 0xe500e5e5, 0x61006161,
+ 0x1a001a1a, 0x3f003f3f, 0x3b003b3b, 0x82008282,
+ 0xb600b6b6, 0xdb00dbdb, 0xd400d4d4, 0x98009898,
+ 0xe800e8e8, 0x8b008b8b, 0x02000202, 0xeb00ebeb,
+ 0x0a000a0a, 0x2c002c2c, 0x1d001d1d, 0xb000b0b0,
+ 0x6f006f6f, 0x8d008d8d, 0x88008888, 0x0e000e0e,
+ 0x19001919, 0x87008787, 0x4e004e4e, 0x0b000b0b,
+ 0xa900a9a9, 0x0c000c0c, 0x79007979, 0x11001111,
+ 0x7f007f7f, 0x22002222, 0xe700e7e7, 0x59005959,
+ 0xe100e1e1, 0xda00dada, 0x3d003d3d, 0xc800c8c8,
+ 0x12001212, 0x04000404, 0x74007474, 0x54005454,
+ 0x30003030, 0x7e007e7e, 0xb400b4b4, 0x28002828,
+ 0x55005555, 0x68006868, 0x50005050, 0xbe00bebe,
+ 0xd000d0d0, 0xc400c4c4, 0x31003131, 0xcb00cbcb,
+ 0x2a002a2a, 0xad00adad, 0x0f000f0f, 0xca00caca,
+ 0x70007070, 0xff00ffff, 0x32003232, 0x69006969,
+ 0x08000808, 0x62006262, 0x00000000, 0x24002424,
+ 0xd100d1d1, 0xfb00fbfb, 0xba00baba, 0xed00eded,
+ 0x45004545, 0x81008181, 0x73007373, 0x6d006d6d,
+ 0x84008484, 0x9f009f9f, 0xee00eeee, 0x4a004a4a,
+ 0xc300c3c3, 0x2e002e2e, 0xc100c1c1, 0x01000101,
+ 0xe600e6e6, 0x25002525, 0x48004848, 0x99009999,
+ 0xb900b9b9, 0xb300b3b3, 0x7b007b7b, 0xf900f9f9,
+ 0xce00cece, 0xbf00bfbf, 0xdf00dfdf, 0x71007171,
+ 0x29002929, 0xcd00cdcd, 0x6c006c6c, 0x13001313,
+ 0x64006464, 0x9b009b9b, 0x63006363, 0x9d009d9d,
+ 0xc000c0c0, 0x4b004b4b, 0xb700b7b7, 0xa500a5a5,
+ 0x89008989, 0x5f005f5f, 0xb100b1b1, 0x17001717,
+ 0xf400f4f4, 0xbc00bcbc, 0xd300d3d3, 0x46004646,
+ 0xcf00cfcf, 0x37003737, 0x5e005e5e, 0x47004747,
+ 0x94009494, 0xfa00fafa, 0xfc00fcfc, 0x5b005b5b,
+ 0x97009797, 0xfe00fefe, 0x5a005a5a, 0xac00acac,
+ 0x3c003c3c, 0x4c004c4c, 0x03000303, 0x35003535,
+ 0xf300f3f3, 0x23002323, 0xb800b8b8, 0x5d005d5d,
+ 0x6a006a6a, 0x92009292, 0xd500d5d5, 0x21002121,
+ 0x44004444, 0x51005151, 0xc600c6c6, 0x7d007d7d,
+ 0x39003939, 0x83008383, 0xdc00dcdc, 0xaa00aaaa,
+ 0x7c007c7c, 0x77007777, 0x56005656, 0x05000505,
+ 0x1b001b1b, 0xa400a4a4, 0x15001515, 0x34003434,
+ 0x1e001e1e, 0x1c001c1c, 0xf800f8f8, 0x52005252,
+ 0x20002020, 0x14001414, 0xe900e9e9, 0xbd00bdbd,
+ 0xdd00dddd, 0xe400e4e4, 0xa100a1a1, 0xe000e0e0,
+ 0x8a008a8a, 0xf100f1f1, 0xd600d6d6, 0x7a007a7a,
+ 0xbb00bbbb, 0xe300e3e3, 0x40004040, 0x4f004f4f
+};
+
+static const PRUint32 camellia_sp4404[256] = {
+ 0x70700070, 0x2c2c002c, 0xb3b300b3, 0xc0c000c0,
+ 0xe4e400e4, 0x57570057, 0xeaea00ea, 0xaeae00ae,
+ 0x23230023, 0x6b6b006b, 0x45450045, 0xa5a500a5,
+ 0xeded00ed, 0x4f4f004f, 0x1d1d001d, 0x92920092,
+ 0x86860086, 0xafaf00af, 0x7c7c007c, 0x1f1f001f,
+ 0x3e3e003e, 0xdcdc00dc, 0x5e5e005e, 0x0b0b000b,
+ 0xa6a600a6, 0x39390039, 0xd5d500d5, 0x5d5d005d,
+ 0xd9d900d9, 0x5a5a005a, 0x51510051, 0x6c6c006c,
+ 0x8b8b008b, 0x9a9a009a, 0xfbfb00fb, 0xb0b000b0,
+ 0x74740074, 0x2b2b002b, 0xf0f000f0, 0x84840084,
+ 0xdfdf00df, 0xcbcb00cb, 0x34340034, 0x76760076,
+ 0x6d6d006d, 0xa9a900a9, 0xd1d100d1, 0x04040004,
+ 0x14140014, 0x3a3a003a, 0xdede00de, 0x11110011,
+ 0x32320032, 0x9c9c009c, 0x53530053, 0xf2f200f2,
+ 0xfefe00fe, 0xcfcf00cf, 0xc3c300c3, 0x7a7a007a,
+ 0x24240024, 0xe8e800e8, 0x60600060, 0x69690069,
+ 0xaaaa00aa, 0xa0a000a0, 0xa1a100a1, 0x62620062,
+ 0x54540054, 0x1e1e001e, 0xe0e000e0, 0x64640064,
+ 0x10100010, 0x00000000, 0xa3a300a3, 0x75750075,
+ 0x8a8a008a, 0xe6e600e6, 0x09090009, 0xdddd00dd,
+ 0x87870087, 0x83830083, 0xcdcd00cd, 0x90900090,
+ 0x73730073, 0xf6f600f6, 0x9d9d009d, 0xbfbf00bf,
+ 0x52520052, 0xd8d800d8, 0xc8c800c8, 0xc6c600c6,
+ 0x81810081, 0x6f6f006f, 0x13130013, 0x63630063,
+ 0xe9e900e9, 0xa7a700a7, 0x9f9f009f, 0xbcbc00bc,
+ 0x29290029, 0xf9f900f9, 0x2f2f002f, 0xb4b400b4,
+ 0x78780078, 0x06060006, 0xe7e700e7, 0x71710071,
+ 0xd4d400d4, 0xabab00ab, 0x88880088, 0x8d8d008d,
+ 0x72720072, 0xb9b900b9, 0xf8f800f8, 0xacac00ac,
+ 0x36360036, 0x2a2a002a, 0x3c3c003c, 0xf1f100f1,
+ 0x40400040, 0xd3d300d3, 0xbbbb00bb, 0x43430043,
+ 0x15150015, 0xadad00ad, 0x77770077, 0x80800080,
+ 0x82820082, 0xecec00ec, 0x27270027, 0xe5e500e5,
+ 0x85850085, 0x35350035, 0x0c0c000c, 0x41410041,
+ 0xefef00ef, 0x93930093, 0x19190019, 0x21210021,
+ 0x0e0e000e, 0x4e4e004e, 0x65650065, 0xbdbd00bd,
+ 0xb8b800b8, 0x8f8f008f, 0xebeb00eb, 0xcece00ce,
+ 0x30300030, 0x5f5f005f, 0xc5c500c5, 0x1a1a001a,
+ 0xe1e100e1, 0xcaca00ca, 0x47470047, 0x3d3d003d,
+ 0x01010001, 0xd6d600d6, 0x56560056, 0x4d4d004d,
+ 0x0d0d000d, 0x66660066, 0xcccc00cc, 0x2d2d002d,
+ 0x12120012, 0x20200020, 0xb1b100b1, 0x99990099,
+ 0x4c4c004c, 0xc2c200c2, 0x7e7e007e, 0x05050005,
+ 0xb7b700b7, 0x31310031, 0x17170017, 0xd7d700d7,
+ 0x58580058, 0x61610061, 0x1b1b001b, 0x1c1c001c,
+ 0x0f0f000f, 0x16160016, 0x18180018, 0x22220022,
+ 0x44440044, 0xb2b200b2, 0xb5b500b5, 0x91910091,
+ 0x08080008, 0xa8a800a8, 0xfcfc00fc, 0x50500050,
+ 0xd0d000d0, 0x7d7d007d, 0x89890089, 0x97970097,
+ 0x5b5b005b, 0x95950095, 0xffff00ff, 0xd2d200d2,
+ 0xc4c400c4, 0x48480048, 0xf7f700f7, 0xdbdb00db,
+ 0x03030003, 0xdada00da, 0x3f3f003f, 0x94940094,
+ 0x5c5c005c, 0x02020002, 0x4a4a004a, 0x33330033,
+ 0x67670067, 0xf3f300f3, 0x7f7f007f, 0xe2e200e2,
+ 0x9b9b009b, 0x26260026, 0x37370037, 0x3b3b003b,
+ 0x96960096, 0x4b4b004b, 0xbebe00be, 0x2e2e002e,
+ 0x79790079, 0x8c8c008c, 0x6e6e006e, 0x8e8e008e,
+ 0xf5f500f5, 0xb6b600b6, 0xfdfd00fd, 0x59590059,
+ 0x98980098, 0x6a6a006a, 0x46460046, 0xbaba00ba,
+ 0x25250025, 0x42420042, 0xa2a200a2, 0xfafa00fa,
+ 0x07070007, 0x55550055, 0xeeee00ee, 0x0a0a000a,
+ 0x49490049, 0x68680068, 0x38380038, 0xa4a400a4,
+ 0x28280028, 0x7b7b007b, 0xc9c900c9, 0xc1c100c1,
+ 0xe3e300e3, 0xf4f400f4, 0xc7c700c7, 0x9e9e009e
+};
+
+/**
+ * Stuff related to the Camellia key schedule
+ */
+#define subl(x) subL[(x)]
+#define subr(x) subR[(x)]
+
+void
+camellia_setup128(const unsigned char *key, PRUint32 *subkey)
+{
+ PRUint32 kll, klr, krl, krr;
+ PRUint32 il, ir, t0, t1, w0, w1;
+ PRUint32 kw4l, kw4r, dw, tl, tr;
+ PRUint32 subL[26];
+ PRUint32 subR[26];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ /**
+ * k == kll || klr || krl || krr (|| is concatination)
+ */
+ kll = GETU32(key);
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ /**
+ * generate KL dependent subkeys
+ */
+ subl(0) = kll;
+ subr(0) = klr;
+ subl(1) = krl;
+ subr(1) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(4) = kll;
+ subr(4) = klr;
+ subl(5) = krl;
+ subr(5) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(10) = kll;
+ subr(10) = klr;
+ subl(11) = krl;
+ subr(11) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(13) = krl;
+ subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(16) = kll;
+ subr(16) = klr;
+ subl(17) = krl;
+ subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(18) = kll;
+ subr(18) = klr;
+ subl(19) = krl;
+ subr(19) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll;
+ subr(22) = klr;
+ subl(23) = krl;
+ subr(23) = krr;
+
+ /* generate KA */
+ kll = subl(0);
+ klr = subr(0);
+ krl = subl(1);
+ krr = subr(1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0;
+ klr ^= w1;
+
+ /* generate KA dependent subkeys */
+ subl(2) = kll;
+ subr(2) = klr;
+ subl(3) = krl;
+ subr(3) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll;
+ subr(6) = klr;
+ subl(7) = krl;
+ subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(8) = kll;
+ subr(8) = klr;
+ subl(9) = krl;
+ subr(9) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(12) = kll;
+ subr(12) = klr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(14) = kll;
+ subr(14) = klr;
+ subl(15) = krl;
+ subr(15) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(20) = kll;
+ subr(20) = klr;
+ subl(21) = krl;
+ subr(21) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(24) = kll;
+ subr(24) = klr;
+ subl(25) = krl;
+ subr(25) = krr;
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1);
+ subr(3) ^= subr(1);
+ subl(5) ^= subl(1);
+ subr(5) ^= subr(1);
+ subl(7) ^= subl(1);
+ subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1);
+ subr(11) ^= subr(1);
+ subl(13) ^= subl(1);
+ subr(13) ^= subr(1);
+ subl(15) ^= subl(1);
+ subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1);
+ subr(19) ^= subr(1);
+ subl(21) ^= subl(1);
+ subr(21) ^= subr(1);
+ subl(23) ^= subl(1);
+ subr(23) ^= subr(1);
+ subl(24) ^= subl(1);
+ subr(24) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(25);
+ kw4r = subr(25);
+ subl(22) ^= kw4l;
+ subr(22) ^= kw4r;
+ subl(20) ^= kw4l;
+ subr(20) ^= kw4r;
+ subl(18) ^= kw4l;
+ subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l;
+ subr(14) ^= kw4r;
+ subl(12) ^= kw4l;
+ subr(12) ^= kw4r;
+ subl(10) ^= kw4l;
+ subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l;
+ subr(6) ^= kw4r;
+ subl(4) ^= kw4l;
+ subr(4) ^= kw4r;
+ subl(2) ^= kw4l;
+ subr(2) ^= kw4r;
+ subl(0) ^= kw4l;
+ subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ CamelliaSubkeyL(23) = subl(22);
+ CamelliaSubkeyR(23) = subr(22);
+ CamelliaSubkeyL(24) = subl(24) ^ subl(23);
+ CamelliaSubkeyR(24) = subr(24) ^ subr(23);
+
+ /* apply the inverse of the last half of P-function */
+ dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+ dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+ dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+ dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+ dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+ dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+ dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+ dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+ dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+ dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+ dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+ dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+ dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+ dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+ dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+ dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+ dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+ dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+
+ return;
+}
+
+void
+camellia_setup256(const unsigned char *key, PRUint32 *subkey)
+{
+ PRUint32 kll, klr, krl, krr; /* left half of key */
+ PRUint32 krll, krlr, krrl, krrr; /* right half of key */
+ PRUint32 il, ir, t0, t1, w0, w1; /* temporary variables */
+ PRUint32 kw4l, kw4r, dw, tl, tr;
+ PRUint32 subL[34];
+ PRUint32 subR[34];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ /**
+ * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr)
+ * (|| is concatination)
+ */
+
+ kll = GETU32(key);
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ krll = GETU32(key + 16);
+ krlr = GETU32(key + 20);
+ krrl = GETU32(key + 24);
+ krrr = GETU32(key + 28);
+
+ /* generate KL dependent subkeys */
+ subl(0) = kll;
+ subr(0) = klr;
+ subl(1) = krl;
+ subr(1) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45);
+ subl(12) = kll;
+ subr(12) = klr;
+ subl(13) = krl;
+ subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(16) = kll;
+ subr(16) = klr;
+ subl(17) = krl;
+ subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll;
+ subr(22) = klr;
+ subl(23) = krl;
+ subr(23) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(30) = kll;
+ subr(30) = klr;
+ subl(31) = krl;
+ subr(31) = krr;
+
+ /* generate KR dependent subkeys */
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(4) = krll;
+ subr(4) = krlr;
+ subl(5) = krrl;
+ subr(5) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(8) = krll;
+ subr(8) = krlr;
+ subl(9) = krrl;
+ subr(9) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(18) = krll;
+ subr(18) = krlr;
+ subl(19) = krrl;
+ subr(19) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+ subl(26) = krll;
+ subr(26) = krlr;
+ subl(27) = krrl;
+ subr(27) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+
+ /* generate KA */
+ kll = subl(0) ^ krll;
+ klr = subr(0) ^ krlr;
+ krl = subl(1) ^ krrl;
+ krr = subr(1) ^ krrr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ kll ^= krll;
+ klr ^= krlr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0 ^ krrl;
+ krr ^= w1 ^ krrr;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0;
+ klr ^= w1;
+
+ /* generate KB */
+ krll ^= kll;
+ krlr ^= klr;
+ krrl ^= krl;
+ krrr ^= krr;
+ CAMELLIA_F(krll, krlr,
+ CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R,
+ w0, w1, il, ir, t0, t1);
+ krrl ^= w0;
+ krrr ^= w1;
+ CAMELLIA_F(krrl, krrr,
+ CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R,
+ w0, w1, il, ir, t0, t1);
+ krll ^= w0;
+ krlr ^= w1;
+
+ /* generate KA dependent subkeys */
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll;
+ subr(6) = klr;
+ subl(7) = krl;
+ subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(14) = kll;
+ subr(14) = klr;
+ subl(15) = krl;
+ subr(15) = krr;
+ subl(24) = klr;
+ subr(24) = krl;
+ subl(25) = krr;
+ subr(25) = kll;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49);
+ subl(28) = kll;
+ subr(28) = klr;
+ subl(29) = krl;
+ subr(29) = krr;
+
+ /* generate KB dependent subkeys */
+ subl(2) = krll;
+ subr(2) = krlr;
+ subl(3) = krrl;
+ subr(3) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(10) = krll;
+ subr(10) = krlr;
+ subl(11) = krrl;
+ subr(11) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(20) = krll;
+ subr(20) = krlr;
+ subl(21) = krrl;
+ subr(21) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51);
+ subl(32) = krll;
+ subr(32) = krlr;
+ subl(33) = krrl;
+ subr(33) = krrr;
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1);
+ subr(3) ^= subr(1);
+ subl(5) ^= subl(1);
+ subr(5) ^= subr(1);
+ subl(7) ^= subl(1);
+ subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1);
+ subr(11) ^= subr(1);
+ subl(13) ^= subl(1);
+ subr(13) ^= subr(1);
+ subl(15) ^= subl(1);
+ subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1);
+ subr(19) ^= subr(1);
+ subl(21) ^= subl(1);
+ subr(21) ^= subr(1);
+ subl(23) ^= subl(1);
+ subr(23) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(25);
+ dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(27) ^= subl(1);
+ subr(27) ^= subr(1);
+ subl(29) ^= subl(1);
+ subr(29) ^= subr(1);
+ subl(31) ^= subl(1);
+ subr(31) ^= subr(1);
+ subl(32) ^= subl(1);
+ subr(32) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(33);
+ kw4r = subr(33);
+ subl(30) ^= kw4l;
+ subr(30) ^= kw4r;
+ subl(28) ^= kw4l;
+ subr(28) ^= kw4r;
+ subl(26) ^= kw4l;
+ subr(26) ^= kw4r;
+ kw4l ^= kw4r & ~subr(24);
+ dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw);
+ subl(22) ^= kw4l;
+ subr(22) ^= kw4r;
+ subl(20) ^= kw4l;
+ subr(20) ^= kw4r;
+ subl(18) ^= kw4l;
+ subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l;
+ subr(14) ^= kw4r;
+ subl(12) ^= kw4l;
+ subr(12) ^= kw4r;
+ subl(10) ^= kw4l;
+ subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l;
+ subr(6) ^= kw4r;
+ subl(4) ^= kw4l;
+ subr(4) ^= kw4r;
+ subl(2) ^= kw4l;
+ subr(2) ^= kw4r;
+ subl(0) ^= kw4l;
+ subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ tl = subl(26) ^ (subr(26) & ~subr(24));
+ dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(23) = subl(22) ^ tl;
+ CamelliaSubkeyR(23) = subr(22) ^ tr;
+ CamelliaSubkeyL(24) = subl(24);
+ CamelliaSubkeyR(24) = subr(24);
+ CamelliaSubkeyL(25) = subl(25);
+ CamelliaSubkeyR(25) = subr(25);
+ tl = subl(23) ^ (subr(23) & ~subr(25));
+ dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(26) = tl ^ subl(27);
+ CamelliaSubkeyR(26) = tr ^ subr(27);
+ CamelliaSubkeyL(27) = subl(26) ^ subl(28);
+ CamelliaSubkeyR(27) = subr(26) ^ subr(28);
+ CamelliaSubkeyL(28) = subl(27) ^ subl(29);
+ CamelliaSubkeyR(28) = subr(27) ^ subr(29);
+ CamelliaSubkeyL(29) = subl(28) ^ subl(30);
+ CamelliaSubkeyR(29) = subr(28) ^ subr(30);
+ CamelliaSubkeyL(30) = subl(29) ^ subl(31);
+ CamelliaSubkeyR(30) = subr(29) ^ subr(31);
+ CamelliaSubkeyL(31) = subl(30);
+ CamelliaSubkeyR(31) = subr(30);
+ CamelliaSubkeyL(32) = subl(32) ^ subl(31);
+ CamelliaSubkeyR(32) = subr(32) ^ subr(31);
+
+ /* apply the inverse of the last half of P-function */
+ dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+ dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+ dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+ dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+ dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+ dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+ dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+ dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+ dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+ dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+ dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+ dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+ dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+ dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+ dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+ dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+ dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+ dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+ dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw;
+ dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw;
+ dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw;
+ dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw;
+ dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
+ dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw, CamelliaSubkeyL(31) = dw;
+
+ return;
+}
+
+void
+camellia_setup192(const unsigned char *key, PRUint32 *subkey)
+{
+ unsigned char kk[32];
+ PRUint32 krll, krlr, krrl, krrr;
+
+ memcpy(kk, key, 24);
+ memcpy((unsigned char *)&krll, key + 16, 4);
+ memcpy((unsigned char *)&krlr, key + 20, 4);
+ krrl = ~krll;
+ krrr = ~krlr;
+ memcpy(kk + 24, (unsigned char *)&krrl, 4);
+ memcpy(kk + 28, (unsigned char *)&krrr, 4);
+ camellia_setup256(kk, subkey);
+ return;
+}
+
+/**
+ * Stuff related to camellia encryption/decryption
+ *
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt128(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1;
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+ /* main iteration */
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(24);
+ io[3] ^= CamelliaSubkeyR(24);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt128(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(24);
+ io[1] ^= CamelliaSubkeyR(24);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+/**
+ * stuff for 192 and 256bit encryption/decryption
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt256(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+ CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(32);
+ io[3] ^= CamelliaSubkeyR(32);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt256(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(32);
+ io[1] ^= CamelliaSubkeyR(32);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+ CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Stuff related to the Camellia key schedule
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_key_expansion(CamelliaContext *cx,
+ const unsigned char *key,
+ const unsigned int keysize)
+{
+ cx->keysize = keysize;
+
+ switch (keysize) {
+ case 16:
+ camellia_setup128(key, cx->expandedKey);
+ break;
+ case 24:
+ camellia_setup192(key, cx->expandedKey);
+ break;
+ case 32:
+ camellia_setup256(key, cx->expandedKey);
+ break;
+ default:
+ break;
+ }
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Camellia modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_encryptECB(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ CamelliaBlockFunc *encryptor;
+
+ encryptor = (cx->keysize == 16)
+ ? &camellia_encrypt128
+ : &camellia_encrypt256;
+
+ while (inputLen > 0) {
+ (*encryptor)(cx->expandedKey, output, input);
+
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+camellia_encryptCBC(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned int j;
+ unsigned char *lastblock;
+ unsigned char inblock[CAMELLIA_BLOCK_SIZE];
+ CamelliaBlockFunc *encryptor;
+
+ if (!inputLen)
+ return SECSuccess;
+ lastblock = cx->iv;
+
+ encryptor = (cx->keysize == 16)
+ ? &camellia_encrypt128
+ : &camellia_encrypt256;
+
+ while (inputLen > 0) {
+ /* XOR with the last block (IV if first block) */
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ inblock[j] = input[j] ^ lastblock[j];
+ /* encrypt */
+ (*encryptor)(cx->expandedKey, output, inblock);
+
+ /* move to the next block */
+ lastblock = output;
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ memcpy(cx->iv, lastblock, CAMELLIA_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+SECStatus
+camellia_decryptECB(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ CamelliaBlockFunc *decryptor;
+
+ decryptor = (cx->keysize == 16)
+ ? &camellia_decrypt128
+ : &camellia_decrypt256;
+
+ while (inputLen > 0) {
+
+ (*decryptor)(cx->expandedKey, output, input);
+
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+camellia_decryptCBC(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ const unsigned char *in;
+ unsigned char *out;
+ unsigned int j;
+ unsigned char newIV[CAMELLIA_BLOCK_SIZE];
+ CamelliaBlockFunc *decryptor;
+
+ if (!inputLen)
+ return SECSuccess;
+
+ PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+
+ in = input + (inputLen - CAMELLIA_BLOCK_SIZE);
+ memcpy(newIV, in, CAMELLIA_BLOCK_SIZE);
+ out = output + (inputLen - CAMELLIA_BLOCK_SIZE);
+
+ decryptor = (cx->keysize == 16)
+ ? &camellia_decrypt128
+ : &camellia_decrypt256;
+
+ while (inputLen > CAMELLIA_BLOCK_SIZE) {
+ (*decryptor)(cx->expandedKey, out, in);
+
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ out[j] ^= in[(int)(j - CAMELLIA_BLOCK_SIZE)];
+
+ out -= CAMELLIA_BLOCK_SIZE;
+ in -= CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ if (in == input) {
+ (*decryptor)(cx->expandedKey, out, in);
+
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ out[j] ^= cx->iv[j];
+ }
+ memcpy(cx->iv, newIV, CAMELLIA_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ *************************************************************************/
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+ return PORT_ZNew(CamelliaContext);
+}
+
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+ unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int unused)
+{
+ if (key == NULL ||
+ (keysize != 16 && keysize != 24 && keysize != 32)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_CAMELLIA_CBC) {
+ memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+ cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+ } else {
+ cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+ }
+
+ /* Generate expanded key */
+ if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+ goto cleanup;
+
+ return SECSuccess;
+cleanup:
+ return SECFailure;
+}
+
+/*
+ * Camellia_CreateContext
+ * create a new context for Camellia operations
+ */
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keysize)
+{
+ CamelliaContext *cx;
+
+ if (key == NULL ||
+ (keysize != 16 && keysize != 24 && keysize != 32)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ cx = PORT_ZNew(CamelliaContext);
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return NULL;
+ }
+
+ /* copy in the iv, if neccessary */
+ if (mode == NSS_CAMELLIA_CBC) {
+ memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+ cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+ } else {
+ cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+ }
+ /* copy keysize */
+ cx->keysize = keysize;
+
+ /* Generate expanded key */
+ if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+ goto cleanup;
+
+ return cx;
+cleanup:
+ PORT_ZFree(cx, sizeof *cx);
+ return NULL;
+}
+
+/*
+ * Camellia_DestroyContext
+ *
+ * Zero an Camellia cipher context. If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+ if (cx)
+ memset(cx, 0, sizeof *cx);
+ if (freeit)
+ PORT_Free(cx);
+}
+
+/*
+ * Camellia_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+
+ /* Check args */
+ if (cx == NULL || output == NULL || input == NULL ||
+ outputLen == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+
+ return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+/*
+ * Camellia_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+
+ /* Check args */
+ if (cx == NULL || output == NULL || input == NULL || outputLen == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+
+ return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
diff --git a/security/nss/lib/freebl/camellia.h b/security/nss/lib/freebl/camellia.h
new file mode 100644
index 0000000000..15114db9a6
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.h
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CAMELLIA_H_
+#define _CAMELLIA_H_ 1
+
+#define CAMELLIA_BLOCK_SIZE 16 /* bytes */
+#define CAMELLIA_MIN_KEYSIZE 16 /* bytes */
+#define CAMELLIA_MAX_KEYSIZE 32 /* bytes */
+
+#define CAMELLIA_MAX_EXPANDEDKEY (34 * 2) /* 32bit unit */
+
+typedef PRUint32 KEY_TABLE_TYPE[CAMELLIA_MAX_EXPANDEDKEY];
+
+typedef SECStatus CamelliaFunc(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+typedef SECStatus CamelliaBlockFunc(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input);
+
+/* CamelliaContextStr
+ *
+ * Values which maintain the state for Camellia encryption/decryption.
+ *
+ * keysize - the number of key bits
+ * worker - the encryption/decryption function to use with this context
+ * iv - initialization vector for CBC mode
+ * expandedKey - the round keys in 4-byte words
+ */
+struct CamelliaContextStr {
+ PRUint32 keysize; /* bytes */
+ CamelliaFunc *worker;
+ PRUint32 expandedKey[CAMELLIA_MAX_EXPANDEDKEY];
+ PRUint8 iv[CAMELLIA_BLOCK_SIZE];
+};
+
+#endif /* _CAMELLIA_H_ */
diff --git a/security/nss/lib/freebl/chacha20-ppc64le.S b/security/nss/lib/freebl/chacha20-ppc64le.S
new file mode 100644
index 0000000000..487ff830a5
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20-ppc64le.S
@@ -0,0 +1,668 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# vs0 - vs15 : buffer for xor
+# vs32 - vs47 (v0 - v15) : 4 "converted" states
+# vs48 - vs51 (v16 - v19) : original state
+# vs52 - vs55 (v20 - v23) : "converted" constants
+# vs56 (v24) : "converted" counter
+# vs57 (v25) : increment for "converted" counter
+# vs60 - vs63 (v28 - v31) : constants for rotate left or vpermxor
+
+#define r0 0
+#define sp 1
+#define r2 2
+#define rSIZE 3
+#define rDST 4
+#define rSRC 5
+#define rKEY 6
+#define rNONCE 7
+#define rCNTR 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+#define r13 13
+#define r14 14
+#define r15 15
+#define r16 16
+#define r17 17
+#define r18 18
+#define r19 19
+#define r20 20
+#define r21 21
+#define r22 22
+#define r23 23
+#define r24 24
+#define r25 25
+#define r26 26
+#define r27 27
+#define r28 28
+#define r29 29
+#define r30 30
+#define r31 31
+
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31
+
+#define vs0 0
+#define vs1 1
+#define vs2 2
+#define vs3 3
+#define vs4 4
+#define vs5 5
+#define vs6 6
+#define vs7 7
+#define vs8 8
+#define vs9 9
+#define vs10 10
+#define vs11 11
+#define vs12 12
+#define vs13 13
+#define vs14 14
+#define vs15 15
+#define vs16 16
+#define vs17 17
+#define vs18 18
+#define vs19 19
+#define vs20 20
+#define vs21 21
+#define vs22 22
+#define vs23 23
+#define vs24 24
+#define vs25 25
+#define vs26 26
+#define vs27 27
+#define vs28 28
+#define vs29 29
+#define vs30 30
+#define vs31 31
+#define vs32 32
+#define vs33 33
+#define vs34 34
+#define vs35 35
+#define vs36 36
+#define vs37 37
+#define vs38 38
+#define vs39 39
+#define vs40 40
+#define vs41 41
+#define vs42 42
+#define vs43 43
+#define vs44 44
+#define vs45 45
+#define vs46 46
+#define vs47 47
+#define vs48 48
+#define vs49 49
+#define vs50 50
+#define vs51 51
+#define vs52 52
+#define vs53 53
+#define vs54 54
+#define vs55 55
+#define vs56 56
+#define vs57 57
+#define vs58 58
+#define vs59 59
+#define vs60 60
+#define vs61 61
+#define vs62 62
+#define vs63 63
+
+.abiversion 2
+.section ".data"
+.align 5
+lblock: .skip 256
+cnts0: .long 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+cnts1: .long 0x61707865, 0x61707865, 0x61707865, 0x61707865
+cnts2: .long 0x3320646e, 0x3320646e, 0x3320646e, 0x3320646e
+cnts3: .long 0x79622d32, 0x79622d32, 0x79622d32, 0x79622d32
+cnts4: .long 0x6b206574, 0x6b206574, 0x6b206574, 0x6b206574
+st4: .long 0, 0, 0, 0
+cntr: .long 0, 0, 0, 0
+incr: .long 4, 4, 4, 4
+rotl1: .long 0x22330011, 0x66774455, 0xAABB8899, 0xEEFFCCDD
+rotl2: .long 12, 12, 12, 12
+rotl3: .long 0x11223300, 0x55667744, 0x99AABB88, 0xDDEEFFCC
+rotl4: .long 7, 7, 7, 7
+
+.section ".text"
+.align 5
+.globl chacha20vsx
+.type chacha20vsx, @function
+chacha20vsx:
+ # prologue
+ addis 2, r12, .TOC.-chacha20vsx@ha
+ addi 2, 2, .TOC.-chacha20vsx@l
+ .localentry chacha20vsx, .-chacha20vsx
+ std r14, -8(sp)
+ std r15, -16(sp)
+ std r16, -24(sp)
+ std r17, -32(sp)
+ std r18, -40(sp)
+ std r19, -48(sp)
+ std r20, -56(sp)
+ std r21, -64(sp)
+ std r22, -72(sp)
+ std r23, -80(sp)
+ std r24, -88(sp)
+ std r25, -96(sp)
+ std r26, -104(sp)
+ std r27, -112(sp)
+ std r28, -120(sp)
+ std r29, -128(sp)
+ std r30, -136(sp)
+ std r31, -144(sp)
+
+ addi r14, sp, -160
+
+ li r16, -16
+ li r17, -32
+ li r18, -48
+ li r19, -64
+ li r20, -80
+ li r21, -96
+ li r22, -112
+ li r23, -128
+ li r24, -144
+ li r25, -160
+ li r26, -176
+ li r27, -192
+ li r28, -208
+
+ # save f14, f15
+ stxvw4x vs14, 0, r14
+ stxvw4x vs15, r16, r14
+
+ # save v20 - v31
+ stxvw4x vs52, r17, r14
+ stxvw4x vs53, r18, r14
+ stxvw4x vs54, r19, r14
+ stxvw4x vs55, r20, r14
+ stxvw4x vs56, r21, r14
+ stxvw4x vs57, r22, r14
+ stxvw4x vs58, r23, r14
+ stxvw4x vs59, r24, r14
+ stxvw4x vs60, r25, r14
+ stxvw4x vs61, r26, r14
+ stxvw4x vs62, r27, r14
+ stxvw4x vs63, r28, r14
+
+ # offset in src/dst
+ li r17, 16
+ li r18, 32
+ li r19, 48
+ li r20, 64
+ li r21, 80
+ li r22, 96
+ li r23, 112
+ li r24, 128
+ li r25, 144
+ li r26, 160
+ li r27, 176
+ li r28, 192
+ li r29, 208
+ li r30, 224
+ li r31, 240
+
+ # load const's address
+ addis r14, 2, cnts0@toc@ha
+ addi r14, r14, cnts0@toc@l
+
+ # save nonce to st4
+ lwz r15, 0(rNONCE)
+ stw r15, 84(r14)
+ lwz r15, 4(rNONCE)
+ stw r15, 88(r14)
+ lwz r15, 8(rNONCE)
+ stw r15, 92(r14)
+
+ # load state to vectors
+ lxvw4x vs48, 0, r14
+ lxvw4x vs49, 0, rKEY
+ lxvw4x vs50, r17, rKEY
+ lxvw4x vs51, r21, r14
+
+ # load consts for x4 rounds
+ lxvw4x vs52, r17, r14
+ lxvw4x vs53, r18, r14
+ lxvw4x vs54, r19, r14
+ lxvw4x vs55, r20, r14
+
+ # counter
+ stw rCNTR, 96(r14)
+ addi rCNTR, rCNTR, 1
+ stw rCNTR, 100(r14)
+ addi rCNTR, rCNTR, 1
+ stw rCNTR, 104(r14)
+ addi rCNTR, rCNTR, 1
+ stw rCNTR, 108(r14)
+ lxvw4x vs56, r22, r14
+
+ # load increment
+ lxvw4x vs57, r23, r14
+
+ # load rotl to vectors
+ lxvw4x vs60, r24, r14
+ lxvw4x vs61, r25, r14
+ lxvw4x vs62, r26, r14
+ lxvw4x vs63, r27, r14
+
+ # counter for loop = size/256
+ li r15, 256
+ divdu. r16, rSIZE, r15
+ beq lastblock
+ mtctr r16
+
+mainloop:
+ # init 16 vectors (4 states x4)
+ vor v0, v20, v20
+ vor v1, v21, v21
+ vor v2, v22, v22
+ vor v3, v23, v23
+ vspltw v4, v17, v0
+ vspltw v5, v17, v1
+ vspltw v6, v17, v2
+ vspltw v7, v17, v3
+ vspltw v8, v18, v0
+ vspltw v9, v18, v1
+ vspltw v10, v18, v2
+ vspltw v11, v18, v3
+ vor v12, v24, v24
+ vspltw v13, v19, v1
+ vspltw v14, v19, v2
+ vspltw v15, v19, v3
+
+.macro _plus a b_y b_x
+ vadduwm \a, \a, \b_y*4+(\b_x)%4
+ vadduwm \a+1, \a+1, \b_y*4+(\b_x+1)%4
+ vadduwm \a+2, \a+2, \b_y*4+(\b_x+2)%4
+ vadduwm \a+3, \a+3, \b_y*4+(\b_x+3)%4
+.endm
+
+.macro _xor a b_y b_x
+ vxor \a, \a, \b_y*4+(\b_x)%4
+ vxor \a+1, \a+1, \b_y*4+(\b_x+1)%4
+ vxor \a+2, \a+2, \b_y*4+(\b_x+2)%4
+ vxor \a+3, \a+3, \b_y*4+(\b_x+3)%4
+.endm
+
+.macro _rotl a b
+ vrlw \a, \a, \b
+ vrlw \a+1, \a+1, \b
+ vrlw \a+2, \a+2, \b
+ vrlw \a+3, \a+3, \b
+.endm
+
+.macro _pxor a b_y b_x c
+ vpermxor \a, \a, \b_y*4+(\b_x)%4, \c
+ vpermxor \a+1, \a+1, \b_y*4+(\b_x+1)%4, \c
+ vpermxor \a+2, \a+2, \b_y*4+(\b_x+2)%4, \c
+ vpermxor \a+3, \a+3, \b_y*4+(\b_x+3)%4, \c
+.endm
+
+# 00 01 02 03
+# 04 05 06 07
+# 08 09 10 11
+# 12 13 14 15
+.macro doubleround
+ # column round
+ _plus v0, v1, v0 # a+=b
+ _pxor v12, v0, v0, v28 # d^=a; d<<<=16
+ _plus v8, v3, v0 # c+=d
+ _xor v4, v2, v0 # b^=c
+ _rotl v4, v29 # b<<<=12
+ _plus v0, v1, v0 # a+=b
+ _pxor v12, v0, v0, v30 # d^=a; d<<<=8
+ _plus v8, v3, v0 # c+=d
+ _xor v4, v2, v0 # b^=c
+ _rotl v4, v31 # b<<<=7
+
+ # diagonal round
+ _plus v0, v1, v1 # a+=b
+ _pxor v12, v0, v1, v28 # d^=a; d<<<=16
+ _plus v8, v3, v1 # c+=d
+ _xor v4, v2, v1 # b^=c
+ _rotl v4, v29 # b<<<=12
+ _plus v0, v1, v1 # a+=b
+ _pxor v12, v0, v1, v30 # d^=a; d<<<=8
+ _plus v8, v3, v1 # c+=d
+ _xor v4, v2, v1 # b^=c
+ _rotl v4, v31 # b<<<=7
+.endm
+
+ doubleround # 1
+ doubleround # 2
+ doubleround # 3
+ doubleround # 4
+ doubleround # 5
+ doubleround # 6
+ doubleround # 7
+ doubleround # 8
+ doubleround # 9
+ doubleround # 10
+
+ # counter += original counter
+ vadduwm v12, v12, v24
+
+.macro convert a
+ vmrgew 26, 0+\a, 1+\a
+ vmrgew 27, 2+\a, 3+\a
+ vmrgow 0+\a, 0+\a, 1+\a
+ vmrgow 2+\a, 2+\a, 3+\a
+ xxmrghd 33+\a, 32+\a, 34+\a
+ xxmrgld 35+\a, 32+\a, 34+\a
+ xxmrghd 32+\a, 58, 59
+ xxmrgld 34+\a, 58, 59
+.endm
+
+ convert 0
+ convert 4
+ convert 8
+ convert 12
+
+.macro addition a
+ vadduwm 0+\a, 0+\a, 16
+ vadduwm 4+\a, 4+\a, 17
+ vadduwm 8+\a, 8+\a, 18
+ vadduwm 12+\a, 12+\a, 19
+.endm
+
+ addition 0
+ addition 1
+ addition 2
+ addition 3
+
+ # load text/cipher
+ lxvw4x vs0, 0, rSRC
+ lxvw4x vs1, r17, rSRC
+ lxvw4x vs2, r18, rSRC
+ lxvw4x vs3, r19, rSRC
+ lxvw4x vs4, r20, rSRC
+ lxvw4x vs5, r21, rSRC
+ lxvw4x vs6, r22, rSRC
+ lxvw4x vs7, r23, rSRC
+ lxvw4x vs8, r24, rSRC
+ lxvw4x vs9, r25, rSRC
+ lxvw4x vs10, r26, rSRC
+ lxvw4x vs11, r27, rSRC
+ lxvw4x vs12, r28, rSRC
+ lxvw4x vs13, r29, rSRC
+ lxvw4x vs14, r30, rSRC
+ lxvw4x vs15, r31, rSRC
+ # xor (encrypt/decrypt)
+ xxlxor vs0, vs0, vs32
+ xxlxor vs1, vs1, vs36
+ xxlxor vs2, vs2, vs40
+ xxlxor vs3, vs3, vs44
+ xxlxor vs4, vs4, vs33
+ xxlxor vs5, vs5, vs37
+ xxlxor vs6, vs6, vs41
+ xxlxor vs7, vs7, vs45
+ xxlxor vs8, vs8, vs34
+ xxlxor vs9, vs9, vs38
+ xxlxor vs10, vs10, vs42
+ xxlxor vs11, vs11, vs46
+ xxlxor vs12, vs12, vs35
+ xxlxor vs13, vs13, vs39
+ xxlxor vs14, vs14, vs43
+ xxlxor vs15, vs15, vs47
+ # store cipher/text
+ stxvw4x vs0, 0, rDST
+ stxvw4x vs1, r17, rDST
+ stxvw4x vs2, r18, rDST
+ stxvw4x vs3, r19, rDST
+ stxvw4x vs4, r20, rDST
+ stxvw4x vs5, r21, rDST
+ stxvw4x vs6, r22, rDST
+ stxvw4x vs7, r23, rDST
+ stxvw4x vs8, r24, rDST
+ stxvw4x vs9, r25, rDST
+ stxvw4x vs10, r26, rDST
+ stxvw4x vs11, r27, rDST
+ stxvw4x vs12, r28, rDST
+ stxvw4x vs13, r29, rDST
+ stxvw4x vs14, r30, rDST
+ stxvw4x vs15, r31, rDST
+
+ # src/dst increment
+ addi rSRC, rSRC, 256
+ addi rDST, rDST, 256
+
+ # counter increment
+ vadduwm v24, v24, v25
+
+ bdnz mainloop
+
+lastblock:
+ # reminder
+ mulld r16, r16, r15
+ subf. r16, r16, rSIZE
+
+ # check reminder
+ beq exitsub
+
+ addi r14, r14, -256
+ # last block x4
+ # init 16 vectors (4 states x4)
+ vor v0, v20, v20
+ vor v1, v21, v21
+ vor v2, v22, v22
+ vor v3, v23, v23
+ vspltw v4, v17, v0
+ vspltw v5, v17, v1
+ vspltw v6, v17, v2
+ vspltw v7, v17, v3
+ vspltw v8, v18, v0
+ vspltw v9, v18, v1
+ vspltw v10, v18, v2
+ vspltw v11, v18, v3
+ vor v12, v24, v24
+ vspltw v13, v19, v1
+ vspltw v14, v19, v2
+ vspltw v15, v19, v3
+
+ doubleround # 1
+ doubleround # 2
+ doubleround # 3
+ doubleround # 4
+ doubleround # 5
+ doubleround # 6
+ doubleround # 7
+ doubleround # 8
+ doubleround # 9
+ doubleround # 10
+
+ vadduwm v12, v12, v24
+
+ convert 0
+ convert 4
+ convert 8
+ convert 12
+
+ addition 0
+ addition 1
+ addition 2
+ addition 3
+
+ # store vectors
+ stxvw4x vs32, 0, r14
+ stxvw4x vs36, r17, r14
+ stxvw4x vs40, r18, r14
+ stxvw4x vs44, r19, r14
+ stxvw4x vs33, r20, r14
+ stxvw4x vs37, r21, r14
+ stxvw4x vs41, r22, r14
+ stxvw4x vs45, r23, r14
+ stxvw4x vs34, r24, r14
+ stxvw4x vs38, r25, r14
+ stxvw4x vs42, r26, r14
+ stxvw4x vs46, r27, r14
+ stxvw4x vs35, r28, r14
+ stxvw4x vs39, r29, r14
+ stxvw4x vs43, r30, r14
+ stxvw4x vs47, r31, r14
+
+ mtctr r16
+ addi rSIZE, r14, -1
+ addi rSRC, rSRC, -1
+ addi rDST, rDST, -1
+xorlast:
+ lbzu r15, 1(rSIZE)
+ lbzu r16, 1(rSRC)
+ xor r15, r15, r16
+ stbu r15, 1(rDST)
+ bdnz xorlast
+
+ # zeroing last block
+ xxlxor vs0, vs0, vs0
+ stxvw4x vs0, 0, r14
+ stxvw4x vs0, r17, r14
+ stxvw4x vs0, r18, r14
+ stxvw4x vs0, r19, r14
+ stxvw4x vs0, r20, r14
+ stxvw4x vs0, r21, r14
+ stxvw4x vs0, r22, r14
+ stxvw4x vs0, r23, r14
+ stxvw4x vs0, r24, r14
+ stxvw4x vs0, r25, r14
+ stxvw4x vs0, r26, r14
+ stxvw4x vs0, r27, r14
+ stxvw4x vs0, r28, r14
+ stxvw4x vs0, r29, r14
+ stxvw4x vs0, r30, r14
+ stxvw4x vs0, r31, r14
+
+exitsub:
+ # zeroing volatile registers
+ xxlxor vs0, vs0, vs0
+ xxlxor vs1, vs1, vs1
+ xxlxor vs2, vs2, vs2
+ xxlxor vs3, vs3, vs3
+ xxlxor vs4, vs4, vs4
+ xxlxor vs5, vs5, vs5
+ xxlxor vs6, vs6, vs6
+ xxlxor vs7, vs7, vs7
+ xxlxor vs8, vs8, vs8
+ xxlxor vs9, vs9, vs9
+ xxlxor vs10, vs10, vs10
+ xxlxor vs11, vs11, vs11
+ xxlxor vs12, vs12, vs12
+ xxlxor vs13, vs13, vs13
+
+ xxlxor vs32, vs32, vs32
+ xxlxor vs33, vs33, vs33
+ xxlxor vs34, vs34, vs34
+ xxlxor vs35, vs35, vs35
+ xxlxor vs36, vs36, vs36
+ xxlxor vs37, vs37, vs37
+ xxlxor vs38, vs38, vs38
+ xxlxor vs39, vs39, vs39
+ xxlxor vs40, vs40, vs40
+ xxlxor vs41, vs41, vs41
+ xxlxor vs42, vs42, vs42
+ xxlxor vs43, vs43, vs43
+ xxlxor vs44, vs44, vs44
+ xxlxor vs45, vs45, vs45
+ xxlxor vs46, vs46, vs46
+ xxlxor vs47, vs47, vs47
+ xxlxor vs48, vs48, vs48
+ xxlxor vs49, vs49, vs49
+ xxlxor vs50, vs50, vs50
+ xxlxor vs51, vs51, vs51
+
+ li rSIZE, 0
+ li rDST, 0
+ li rSRC, 0
+ li rKEY, 0
+ li rNONCE, 0
+ li rCNTR, 0
+
+ # epilogue
+ addi r14, sp, -160
+
+ li r16, -16
+ li r17, -32
+ li r18, -48
+ li r19, -64
+ li r20, -80
+ li r21, -96
+ li r22, -112
+ li r23, -128
+ li r24, -144
+ li r25, -160
+ li r26, -176
+ li r27, -192
+ li r28, -208
+
+ # load f14, f15
+ lxvw4x vs14, 0, r14
+ lxvw4x vs15, r16, r14
+
+ # load v20 - v31
+ lxvw4x vs52, r17, r14
+ lxvw4x vs53, r18, r14
+ lxvw4x vs54, r19, r14
+ lxvw4x vs55, r20, r14
+ lxvw4x vs56, r21, r14
+ lxvw4x vs57, r22, r14
+ lxvw4x vs58, r23, r14
+ lxvw4x vs59, r24, r14
+ lxvw4x vs60, r25, r14
+ lxvw4x vs61, r26, r14
+ lxvw4x vs62, r27, r14
+ lxvw4x vs63, r28, r14
+
+ ld r14, -8(sp)
+ ld r15, -16(sp)
+ ld r16, -24(sp)
+ ld r17, -32(sp)
+ ld r18, -40(sp)
+ ld r19, -48(sp)
+ ld r20, -56(sp)
+ ld r21, -64(sp)
+ ld r22, -72(sp)
+ ld r23, -80(sp)
+ ld r24, -88(sp)
+ ld r25, -96(sp)
+ ld r26, -104(sp)
+ ld r27, -112(sp)
+ ld r28, -120(sp)
+ ld r29, -128(sp)
+ ld r30, -136(sp)
+ ld r31, -144(sp)
+
+ blr
diff --git a/security/nss/lib/freebl/chacha20poly1305-ppc.c b/security/nss/lib/freebl/chacha20poly1305-ppc.c
new file mode 100644
index 0000000000..c9766cd403
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305-ppc.c
@@ -0,0 +1,588 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_32.h"
+
+/* Forward declaration from chacha20-ppc64le.S */
+void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k,
+ uint8_t *nonce, uint32_t ctr);
+
+static inline void
+poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ uint64_t *pre0 = ctx + (uint32_t)5U;
+ uint64_t *acc0 = ctx;
+ uint32_t nb = n * (uint32_t)16U / (uint32_t)16U;
+ uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = blocks + i * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = blocks + nb * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ uint64_t mask = b;
+ uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(rem[0U]));
+ if (r > (uint32_t)0U) {
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_32(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ uint64_t ctx[25U] = { 0U };
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_32_poly1305_init(ctx, k);
+ poly1305_padded_32(ctx, aadlen, aad);
+ poly1305_padded_32(ctx, mlen, m);
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_32_poly1305_finish(out, k, ctx);
+}
+
+void
+Chacha20Poly1305_vsx_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ chacha20vsx(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, mac);
+}
+
+uint32_t
+Chacha20Poly1305_vsx_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ chacha20vsx(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c
new file mode 100644
index 0000000000..29bbc9d1c1
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.c
@@ -0,0 +1,549 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include "seccomon.h"
+#include "secerr.h"
+#include "blapit.h"
+#include "blapii.h"
+#include "chacha20poly1305.h"
+
+// There are three implementations of ChaCha20Poly1305:
+// 1) 128-bit with AVX hardware acceleration used on x64
+// 2) 256-bit with AVX2 hardware acceleration used on x64
+// 3) 32-bit used on all other platforms
+
+// On x64 when AVX2 and other necessary registers are available,
+// the 256bit-verctorized version will be used. When AVX2 features
+// are unavailable or disabled but AVX registers are available, the
+// 128bit-vectorized version will be used. In all other cases the
+// scalar version of the HACL* code will be used.
+
+// Instead of including the headers (they bring other things we don't want),
+// we declare the functions here.
+// Usage is guarded by runtime checks of required hardware features.
+
+// Forward declaration from Hacl_Chacha20_Vec128.h and Hacl_Chacha20Poly1305_128.h.
+extern void Hacl_Chacha20_Vec128_chacha20_encrypt_128(uint32_t len, uint8_t *out,
+ uint8_t *text, uint8_t *key,
+ uint8_t *n1, uint32_t ctr);
+extern void
+Hacl_Chacha20Poly1305_128_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+extern uint32_t
+Hacl_Chacha20Poly1305_128_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+
+// Forward declaration from Hacl_Chacha20_Vec256.h and Hacl_Chacha20Poly1305_256.h.
+extern void Hacl_Chacha20_Vec256_chacha20_encrypt_256(uint32_t len, uint8_t *out,
+ uint8_t *text, uint8_t *key,
+ uint8_t *n1, uint32_t ctr);
+extern void
+Hacl_Chacha20Poly1305_256_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+extern uint32_t
+Hacl_Chacha20Poly1305_256_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+
+// Forward declaration from Hacl_Chacha20.h and Hacl_Chacha20Poly1305_32.h.
+extern void Hacl_Chacha20_chacha20_encrypt(uint32_t len, uint8_t *out,
+ uint8_t *text, uint8_t *key,
+ uint8_t *n1, uint32_t ctr);
+extern void
+Hacl_Chacha20Poly1305_32_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+extern uint32_t
+Hacl_Chacha20Poly1305_32_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+
+// Forward declaration from chacha20-ppc64le.S
+void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k,
+ uint8_t *nonce, uint32_t ctr);
+
+// Forward declaration from chacha20poly1305-ppc.c
+extern void
+Chacha20Poly1305_vsx_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+extern uint32_t
+Chacha20Poly1305_vsx_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen,
+ uint8_t *aad, uint32_t mlen, uint8_t *m,
+ uint8_t *cipher, uint8_t *mac);
+
+SECStatus
+ChaCha20_InitContext(ChaCha20Context *ctx, const unsigned char *key,
+ unsigned int keyLen, const unsigned char *nonce,
+ unsigned int nonceLen, PRUint32 ctr)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ if (keyLen != 32) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ ctx->counter = ctr;
+ PORT_Memcpy(ctx->key, key, sizeof(ctx->key));
+ PORT_Memcpy(ctx->nonce, nonce, sizeof(ctx->nonce));
+
+ return SECSuccess;
+#endif
+}
+
+ChaCha20Context *
+ChaCha20_CreateContext(const unsigned char *key, unsigned int keyLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ PRUint32 ctr)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return NULL;
+#else
+ ChaCha20Context *ctx;
+
+ ctx = PORT_New(ChaCha20Context);
+ if (ctx == NULL) {
+ return NULL;
+ }
+
+ if (ChaCha20_InitContext(ctx, key, keyLen, nonce, nonceLen, ctr) != SECSuccess) {
+ PORT_Free(ctx);
+ ctx = NULL;
+ }
+
+ return ctx;
+#endif
+}
+
+void
+ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit)
+{
+#ifndef NSS_DISABLE_CHACHAPOLY
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ if (keyLen != 32) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ if (tagLen != 16) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ PORT_Memcpy(ctx->key, key, sizeof(ctx->key));
+ ctx->tagLen = tagLen;
+
+ return SECSuccess;
+#endif
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return NULL;
+#else
+ ChaCha20Poly1305Context *ctx;
+
+ ctx = PORT_New(ChaCha20Poly1305Context);
+ if (ctx == NULL) {
+ return NULL;
+ }
+
+ if (ChaCha20Poly1305_InitContext(ctx, key, keyLen, tagLen) != SECSuccess) {
+ PORT_Free(ctx);
+ ctx = NULL;
+ }
+
+ return ctx;
+#endif
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+#ifndef NSS_DISABLE_CHACHAPOLY
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+#endif
+}
+
+#ifndef NSS_DISABLE_CHACHAPOLY
+void
+ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k,
+ uint8_t *nonce, uint32_t ctr)
+{
+#ifdef NSS_X64
+#ifndef NSS_DISABLE_AVX2
+ if (avx2_support()) {
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256(len, output, block, k, nonce, ctr);
+ }
+#endif
+
+#ifndef NSS_DISABLE_SSE3
+ if (ssse3_support() && sse4_1_support() && avx_support()) {
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr);
+ }
+#endif
+
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \
+ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX)
+ if (ppc_crypto_support()) {
+ chacha20vsx(len, output, block, k, nonce, ctr);
+ } else
+#endif
+ {
+ Hacl_Chacha20_chacha20_encrypt(len, output, block, k, nonce, ctr);
+ }
+}
+#endif /* NSS_DISABLE_CHACHAPOLY */
+
+SECStatus
+ChaCha20_Xor(unsigned char *output, const unsigned char *block, unsigned int len,
+ const unsigned char *k, const unsigned char *nonce, PRUint32 ctr)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ // ChaCha has a 64 octet block, with a 32-bit block counter.
+ if (sizeof(len) > 4) {
+ unsigned long long len_ull = len;
+ if (len_ull >= (1ULL << (6 + 32))) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+ ChaCha20Xor(output, (uint8_t *)block, len, (uint8_t *)k,
+ (uint8_t *)nonce, ctr);
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ // ChaCha has a 64 octet block, with a 32-bit block counter.
+ if (sizeof(inputLen) > 4) {
+ unsigned long long inputLen_ull = inputLen;
+ if (inputLen_ull >= (1ULL << (6 + 32))) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+ if (maxOutputLen < inputLen + ctx->tagLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+#ifdef NSS_X64
+#ifndef NSS_DISABLE_AVX2
+ if (avx2_support()) {
+ Hacl_Chacha20Poly1305_256_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, output + inputLen);
+ goto finish;
+ }
+#endif
+
+#ifndef NSS_DISABLE_SSE3
+ if (ssse3_support() && sse4_1_support() && avx_support()) {
+ Hacl_Chacha20Poly1305_128_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, output + inputLen);
+ goto finish;
+ }
+#endif
+
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \
+ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX)
+ if (ppc_crypto_support()) {
+ Chacha20Poly1305_vsx_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, output + inputLen);
+ goto finish;
+ }
+#endif
+ {
+ Hacl_Chacha20Poly1305_32_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, output + inputLen);
+ goto finish;
+ }
+
+finish:
+ *outputLen = inputLen + ctx->tagLen;
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ unsigned int ciphertextLen;
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (inputLen < ctx->tagLen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ ciphertextLen = inputLen - ctx->tagLen;
+ if (maxOutputLen < ciphertextLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ // ChaCha has a 64 octet block, with a 32-bit block counter.
+ if (inputLen >= (1ULL << (6 + 32)) + ctx->tagLen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ uint32_t res = 1;
+#ifdef NSS_X64
+#ifndef NSS_DISABLE_AVX2
+ if (avx2_support()) {
+ res = Hacl_Chacha20Poly1305_256_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen);
+ goto finish;
+ }
+#endif
+
+#ifndef NSS_DISABLE_SSE3
+ if (ssse3_support() && sse4_1_support() && avx_support()) {
+ res = Hacl_Chacha20Poly1305_128_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen);
+ goto finish;
+ }
+#endif
+
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \
+ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX)
+ if (ppc_crypto_support()) {
+ res = Chacha20Poly1305_vsx_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen);
+ goto finish;
+ }
+#endif
+ {
+ res = Hacl_Chacha20Poly1305_32_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen);
+ goto finish;
+ }
+
+finish:
+ if (res) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+
+ *outputLen = ciphertextLen;
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Encrypt(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen, const unsigned char *nonce,
+ unsigned int nonceLen, const unsigned char *ad,
+ unsigned int adLen, unsigned char *outTag)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ // ChaCha has a 64 octet block, with a 32-bit block counter.
+ if (sizeof(inputLen) > 4) {
+ unsigned long long inputLen_ull = inputLen;
+ if (inputLen_ull >= (1ULL << (6 + 32))) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+#ifdef NSS_X64
+#ifndef NSS_DISABLE_AVX2
+ if (avx2_support()) {
+ Hacl_Chacha20Poly1305_256_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, outTag);
+ goto finish;
+ }
+#endif
+
+#ifndef NSS_DISABLE_SSE3
+ if (ssse3_support() && sse4_1_support() && avx_support()) {
+ Hacl_Chacha20Poly1305_128_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, outTag);
+ goto finish;
+ }
+#endif
+
+ else
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \
+ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX)
+ if (ppc_crypto_support()) {
+ Chacha20Poly1305_vsx_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, outTag);
+ goto finish;
+ } else
+#endif
+ {
+ Hacl_Chacha20Poly1305_32_aead_encrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen,
+ (uint8_t *)input, output, outTag);
+ goto finish;
+ }
+
+finish:
+ *outputLen = inputLen;
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Decrypt(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen, const unsigned char *nonce,
+ unsigned int nonceLen, const unsigned char *ad,
+ unsigned int adLen, const unsigned char *tagIn)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ unsigned int ciphertextLen;
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ ciphertextLen = inputLen;
+ if (maxOutputLen < ciphertextLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ // ChaCha has a 64 octet block, with a 32-bit block counter.
+ if (sizeof(inputLen) > 4) {
+ unsigned long long inputLen_ull = inputLen;
+ if (inputLen_ull >= (1ULL << (6 + 32))) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+
+ uint32_t res = 1;
+#ifdef NSS_X64
+#ifndef NSS_DISABLE_AVX2
+ if (avx2_support()) {
+ res = Hacl_Chacha20Poly1305_256_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn);
+ goto finish;
+ }
+#endif
+
+#ifndef NSS_DISABLE_SSE3
+ if (ssse3_support() && sse4_1_support() && avx_support()) {
+ res = Hacl_Chacha20Poly1305_128_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn);
+ goto finish;
+ }
+#endif
+
+#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \
+ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX)
+ if (ppc_crypto_support()) {
+ res = Chacha20Poly1305_vsx_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn);
+ goto finish;
+ }
+#endif
+ {
+ res = Hacl_Chacha20Poly1305_32_aead_decrypt(
+ (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen,
+ (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn);
+ goto finish;
+ }
+
+finish:
+ if (res) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+
+ *outputLen = ciphertextLen;
+ return SECSuccess;
+#endif
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.h b/security/nss/lib/freebl/chacha20poly1305.h
new file mode 100644
index 0000000000..fff528af39
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.h
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CHACHA20_POLY1305_H_
+#define _CHACHA20_POLY1305_H_ 1
+
+/* ChaCha20Poly1305ContextStr saves the key and tag length for a
+ * ChaCha20+Poly1305 AEAD operation. */
+struct ChaCha20Poly1305ContextStr {
+ unsigned char key[32];
+ unsigned char tagLen;
+};
+
+struct ChaCha20ContextStr {
+ unsigned char key[32];
+ unsigned char nonce[12];
+ PRUint32 counter;
+};
+
+#endif /* _CHACHA20_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/cmac.c b/security/nss/lib/freebl/cmac.c
new file mode 100644
index 0000000000..222cef1b4c
--- /dev/null
+++ b/security/nss/lib/freebl/cmac.c
@@ -0,0 +1,323 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "rijndael.h"
+#include "blapi.h"
+#include "cmac.h"
+#include "secerr.h"
+#include "nspr.h"
+
+struct CMACContextStr {
+ /* Information about the block cipher to use internally. The cipher should
+ * be placed in ECB mode so that we can use it to directly encrypt blocks.
+ *
+ *
+ * To add a new cipher, add an entry to CMACCipher, update CMAC_Init,
+ * cmac_Encrypt, and CMAC_Destroy methods to handle the new cipher, and
+ * add a new Context pointer to the cipher union with the correct type. */
+ CMACCipher cipherType;
+ union {
+ AESContext *aes;
+ } cipher;
+ unsigned int blockSize;
+
+ /* Internal keys which are conditionally used by the algorithm. Derived
+ * from encrypting the NULL block. We leave the storing of (and the
+ * cleanup of) the CMAC key to the underlying block cipher. */
+ unsigned char k1[MAX_BLOCK_SIZE];
+ unsigned char k2[MAX_BLOCK_SIZE];
+
+ /* When Update is called with data which isn't a multiple of the block
+ * size, we need a place to put it. HMAC handles this by passing it to
+ * the underlying hash function right away; we can't do that as the
+ * contract on the cipher object is different. */
+ unsigned int partialIndex;
+ unsigned char partialBlock[MAX_BLOCK_SIZE];
+
+ /* Last encrypted block. This gets xor-ed with partialBlock prior to
+ * encrypting it. NIST defines this to be the empty string to begin. */
+ unsigned char lastBlock[MAX_BLOCK_SIZE];
+};
+
+static void
+cmac_ShiftLeftOne(unsigned char *out, const unsigned char *in, int length)
+{
+ int i = 0;
+ for (; i < length - 1; i++) {
+ out[i] = in[i] << 1;
+ out[i] |= in[i + 1] >> 7;
+ }
+ out[i] = in[i] << 1;
+}
+
+static SECStatus
+cmac_Encrypt(CMACContext *ctx, unsigned char *output,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (ctx->cipherType == CMAC_AES) {
+ unsigned int tmpOutputLen;
+ SECStatus rv = AES_Encrypt(ctx->cipher.aes, output, &tmpOutputLen,
+ ctx->blockSize, input, inputLen);
+
+ /* Assumption: AES_Encrypt (when in ECB mode) always returns an
+ * output of length equal to blockSize (what was pass as the value
+ * of the maxOutputLen parameter). */
+ PORT_Assert(tmpOutputLen == ctx->blockSize);
+ return rv;
+ }
+
+ return SECFailure;
+}
+
+/* NIST SP.800-38B, 6.1 Subkey Generation */
+static SECStatus
+cmac_GenerateSubkeys(CMACContext *ctx)
+{
+ unsigned char null_block[MAX_BLOCK_SIZE] = { 0 };
+ unsigned char L[MAX_BLOCK_SIZE];
+ unsigned char v;
+ unsigned char i;
+
+ /* Step 1: L = AES(key, null_block) */
+ if (cmac_Encrypt(ctx, L, null_block, ctx->blockSize) != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* In the following, some effort has been made to be constant time. Rather
+ * than conditioning on the value of the MSB (of L or K1), we use the loop
+ * to build a mask for the conditional constant. */
+
+ /* Step 2: If MSB(L) = 0, K1 = L << 1. Else, K1 = (L << 1) ^ R_b. */
+ cmac_ShiftLeftOne(ctx->k1, L, ctx->blockSize);
+ v = L[0] >> 7;
+ for (i = 1; i <= 7; i <<= 1) {
+ v |= (v << i);
+ }
+ ctx->k1[ctx->blockSize - 1] ^= (0x87 & v);
+
+ /* Step 3: If MSB(K1) = 0, K2 = K1 << 1. Else, K2 = (K1 <, 1) ^ R_b. */
+ cmac_ShiftLeftOne(ctx->k2, ctx->k1, ctx->blockSize);
+ v = ctx->k1[0] >> 7;
+ for (i = 1; i <= 7; i <<= 1) {
+ v |= (v << i);
+ }
+ ctx->k2[ctx->blockSize - 1] ^= (0x87 & v);
+
+ /* Any intermediate value in the computation of the subkey shall be
+ * secret. */
+ PORT_Memset(null_block, 0, MAX_BLOCK_SIZE);
+ PORT_Memset(L, 0, MAX_BLOCK_SIZE);
+
+ /* Step 4: Return the values. */
+ return SECSuccess;
+}
+
+/* NIST SP.800-38B, 6.2 MAC Generation step 6 */
+static SECStatus
+cmac_UpdateState(CMACContext *ctx)
+{
+ if (ctx == NULL || ctx->partialIndex != ctx->blockSize) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Step 6: C_i = CIPHER(key, C_{i-1} ^ M_i) for 1 <= i <= n, and
+ * C_0 is defined as the empty string. */
+
+ for (unsigned int index = 0; index < ctx->blockSize; index++) {
+ ctx->partialBlock[index] ^= ctx->lastBlock[index];
+ }
+
+ return cmac_Encrypt(ctx, ctx->lastBlock, ctx->partialBlock, ctx->blockSize);
+}
+
+SECStatus
+CMAC_Init(CMACContext *ctx, CMACCipher type,
+ const unsigned char *key, unsigned int key_len)
+{
+ if (ctx == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ /* We only currently support AES-CMAC. */
+ if (type != CMAC_AES) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+
+ ctx->blockSize = AES_BLOCK_SIZE;
+ ctx->cipherType = CMAC_AES;
+ ctx->cipher.aes = AES_CreateContext(key, NULL, NSS_AES, 1, key_len,
+ ctx->blockSize);
+ if (ctx->cipher.aes == NULL) {
+ return SECFailure;
+ }
+
+ return CMAC_Begin(ctx);
+}
+
+CMACContext *
+CMAC_Create(CMACCipher type, const unsigned char *key,
+ unsigned int key_len)
+{
+ CMACContext *result = PORT_New(CMACContext);
+
+ if (CMAC_Init(result, type, key, key_len) != SECSuccess) {
+ CMAC_Destroy(result, PR_TRUE);
+ return NULL;
+ }
+
+ return result;
+}
+
+SECStatus
+CMAC_Begin(CMACContext *ctx)
+{
+ if (ctx == NULL) {
+ return SECFailure;
+ }
+
+ /* Ensure that our blockSize is less than the maximum. When this fails,
+ * a cipher with a larger block size was added and MAX_BLOCK_SIZE needs
+ * to be updated accordingly. */
+ PORT_Assert(ctx->blockSize <= MAX_BLOCK_SIZE);
+
+ if (cmac_GenerateSubkeys(ctx) != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* Set the index to write partial blocks at to zero. This saves us from
+ * having to clear ctx->partialBlock. */
+ ctx->partialIndex = 0;
+
+ /* Step 5: Let C_0 = 0^b. */
+ PORT_Memset(ctx->lastBlock, 0, ctx->blockSize);
+
+ return SECSuccess;
+}
+
+/* NIST SP.800-38B, 6.2 MAC Generation */
+SECStatus
+CMAC_Update(CMACContext *ctx, const unsigned char *data,
+ unsigned int data_len)
+{
+ unsigned int data_index = 0;
+ if (ctx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (data == NULL || data_len == 0) {
+ return SECSuccess;
+ }
+
+ /* Copy as many bytes from data into ctx->partialBlock as we can, up to
+ * the maximum of the remaining data and the remaining space in
+ * ctx->partialBlock.
+ *
+ * Note that we swap the order (encrypt *then* copy) because the last
+ * block is different from the rest. If we end on an even multiple of
+ * the block size, we have to be able to XOR it with K1. But we won't know
+ * that it is the last until CMAC_Finish is called (and by then, CMAC_Update
+ * has already returned). */
+ while (data_index < data_len) {
+ if (ctx->partialIndex == ctx->blockSize) {
+ if (cmac_UpdateState(ctx) != SECSuccess) {
+ return SECFailure;
+ }
+
+ ctx->partialIndex = 0;
+ }
+
+ unsigned int copy_len = data_len - data_index;
+ if (copy_len > (ctx->blockSize - ctx->partialIndex)) {
+ copy_len = ctx->blockSize - ctx->partialIndex;
+ }
+
+ PORT_Memcpy(ctx->partialBlock + ctx->partialIndex, data + data_index, copy_len);
+ data_index += copy_len;
+ ctx->partialIndex += copy_len;
+ }
+
+ return SECSuccess;
+}
+
+/* NIST SP.800-38B, 6.2 MAC Generation */
+SECStatus
+CMAC_Finish(CMACContext *ctx, unsigned char *result,
+ unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (ctx == NULL || result == NULL || max_result_len == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (max_result_len > ctx->blockSize) {
+ /* This is a weird situation. The PKCS #11 soft tokencode passes
+ * sizeof(result) here, which is hard-coded as SFTK_MAX_MAC_LENGTH.
+ * This later gets truncated to min(SFTK_MAX_MAC_LENGTH, requested). */
+ max_result_len = ctx->blockSize;
+ }
+
+ /* Step 4: If M_n* is a complete block, M_n = K1 ^ M_n*. Else,
+ * M_n = K2 ^ (M_n* || 10^j). */
+ if (ctx->partialIndex == ctx->blockSize) {
+ /* XOR in K1. */
+ for (unsigned int index = 0; index < ctx->blockSize; index++) {
+ ctx->partialBlock[index] ^= ctx->k1[index];
+ }
+ } else {
+ /* Use 10* padding on the partial block. */
+ ctx->partialBlock[ctx->partialIndex++] = 0x80;
+ PORT_Memset(ctx->partialBlock + ctx->partialIndex, 0,
+ ctx->blockSize - ctx->partialIndex);
+ ctx->partialIndex = ctx->blockSize;
+
+ /* XOR in K2. */
+ for (unsigned int index = 0; index < ctx->blockSize; index++) {
+ ctx->partialBlock[index] ^= ctx->k2[index];
+ }
+ }
+
+ /* Encrypt the block. */
+ if (cmac_UpdateState(ctx) != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* Step 7 & 8: T = MSB_tlen(C_n); return T. */
+ PORT_Memcpy(result, ctx->lastBlock, max_result_len);
+ if (result_len != NULL) {
+ *result_len = max_result_len;
+ }
+ return SECSuccess;
+}
+
+void
+CMAC_Destroy(CMACContext *ctx, PRBool free_it)
+{
+ if (ctx == NULL) {
+ return;
+ }
+
+ if (ctx->cipherType == CMAC_AES && ctx->cipher.aes != NULL) {
+ AES_DestroyContext(ctx->cipher.aes, PR_TRUE);
+ }
+
+ /* Destroy everything in the context. This includes sensitive data in
+ * K1, K2, and lastBlock. */
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+
+ if (free_it == PR_TRUE) {
+ PORT_Free(ctx);
+ }
+}
diff --git a/security/nss/lib/freebl/cmac.h b/security/nss/lib/freebl/cmac.h
new file mode 100644
index 0000000000..6a6f42c797
--- /dev/null
+++ b/security/nss/lib/freebl/cmac.h
@@ -0,0 +1,47 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CMAC_H_
+#define _CMAC_H_
+
+typedef struct CMACContextStr CMACContext;
+
+SEC_BEGIN_PROTOS
+
+/* Enum for identifying the underlying block cipher we're using internally. */
+typedef enum {
+ CMAC_AES = 0
+} CMACCipher;
+
+/* Initialize an existing CMACContext struct. */
+SECStatus CMAC_Init(CMACContext *ctx, CMACCipher type,
+ const unsigned char *key, unsigned int key_len);
+
+/* Allocate and initialize a new CMAC context with the specified cipher and
+ * key. */
+CMACContext *CMAC_Create(CMACCipher type, const unsigned char *key,
+ unsigned int key_len);
+
+/* Called automatically by CMAC_*{Create,Init}(...). Only useful for restarting
+ * an already-started CMAC instance. */
+SECStatus CMAC_Begin(CMACContext *ctx);
+
+/* Add the specified bytes into the CMAC state. */
+SECStatus CMAC_Update(CMACContext *ctx, const unsigned char *data,
+ unsigned int data_len);
+
+/* Finalize the CMAC state and return the result. */
+SECStatus CMAC_Finish(CMACContext *ctx, unsigned char *result,
+ unsigned int *result_len,
+ unsigned int max_result_len);
+
+/* Note: CMAC_Clone isn't implemented here because AES doesn't expose a
+ * context-cloning operation. */
+
+/* Destroy a CMAC context, optionally freeing it. */
+void CMAC_Destroy(CMACContext *ctx, PRBool free_it);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk
new file mode 100644
index 0000000000..a4182a4186
--- /dev/null
+++ b/security/nss/lib/freebl/config.mk
@@ -0,0 +1,93 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# only do this in the outermost freebl build.
+ifndef FREEBL_CHILD_BUILD
+
+# We're going to change this build so that it builds libfreebl.a with
+# just loader.c. Then we have to build this directory twice again to
+# build the two DSOs.
+# To build libfreebl.a with just loader.c, we must now override many
+# of the make variables setup by the prior inclusion of CORECONF's config.mk
+
+CSRCS = loader.c
+SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX))
+OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+ $(NOSUCHFILE) so_locations
+
+# this is not a recursive child make. We make a static lib. (archive)
+
+# Override the values defined in coreconf's ruleset.mk.
+#
+# - (1) LIBRARY: a static (archival) library
+# - (2) SHARED_LIBRARY: a shared (dynamic link) library
+# - (3) IMPORT_LIBRARY: an import library, used only on Windows
+# - (4) PROGRAM: an executable binary
+#
+# override these variables to prevent building a DSO/DLL.
+ TARGETS = $(LIBRARY)
+ SHARED_LIBRARY =
+ IMPORT_LIBRARY =
+ PROGRAM =
+
+else
+
+# This is a recursive child make. We build the shared lib.
+
+TARGETS = $(SHARED_LIBRARY)
+LIBRARY =
+IMPORT_LIBRARY =
+PROGRAM =
+
+ifeq ($(OS_TARGET), SunOS)
+OS_LIBS += -lkstat
+endif
+
+ifeq (,$(filter-out WIN%,$(OS_TARGET)))
+
+RESNAME = freebl.rc
+
+ifdef NS_USE_GCC
+OS_LIBS += -ladvapi32
+else
+OS_LIBS += advapi32.lib
+endif
+
+ifdef NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+ -L$(DIST)/lib \
+ -L$(NSSUTIL_LIB_DIR) \
+ -lnssutil3 \
+ -L$(NSPR_LIB_DIR) \
+ -lnspr4 \
+ $(NULL)
+else # ! NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+ $(DIST)/lib/nssutil3.lib \
+ $(NSPR_LIB_DIR)/$(NSPR31_LIB_PREFIX)nspr4.lib \
+ $(NULL)
+endif # NS_USE_GCC
+
+else
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+#drop pthreads as well
+OS_PTHREAD=
+else
+EXTRA_SHARED_LIBS += \
+ -L$(DIST)/lib \
+ -L$(NSSUTIL_LIB_DIR) \
+ -lnssutil3 \
+ -L$(NSPR_LIB_DIR) \
+ -lnspr4 \
+ $(NULL)
+endif
+endif
+
+ifeq ($(OS_ARCH), Darwin)
+EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib
+endif
+
+endif
diff --git a/security/nss/lib/freebl/crypto_primitives.c b/security/nss/lib/freebl/crypto_primitives.c
new file mode 100644
index 0000000000..425f9fcc80
--- /dev/null
+++ b/security/nss/lib/freebl/crypto_primitives.c
@@ -0,0 +1,36 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+/* This file holds useful functions and macros for crypto code. */
+#include "crypto_primitives.h"
+
+/*
+ * FREEBL_HTONLL(x): swap bytes in a 64-bit integer.
+ */
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64))
+
+__inline__ PRUint64
+swap8b(PRUint64 value)
+{
+ __asm__("bswapq %0"
+ : "+r"(value));
+ return (value);
+}
+
+#elif defined(IS_LITTLE_ENDIAN) && !defined(_MSC_VER) && !__has_builtin(__builtin_bswap64) && !((defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))))
+
+PRUint64
+swap8b(PRUint64 x)
+{
+ PRUint64 t1 = x;
+ t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8);
+ t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16);
+ return (t1 >> 32) | (t1 << 32);
+}
+
+#endif
diff --git a/security/nss/lib/freebl/crypto_primitives.h b/security/nss/lib/freebl/crypto_primitives.h
new file mode 100644
index 0000000000..86a6927ad6
--- /dev/null
+++ b/security/nss/lib/freebl/crypto_primitives.h
@@ -0,0 +1,66 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file holds useful functions and macros for crypto code. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <stdlib.h>
+#include "prtypes.h"
+
+/* For non-clang platform */
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+/* Unfortunately this isn't always set when it should be. */
+#if defined(HAVE_LONG_LONG)
+
+/*
+ * ROTR64/ROTL64(x, n): rotate a 64-bit integer x by n bites to the right/left.
+ */
+#if defined(_MSC_VER)
+#pragma intrinsic(_rotr64, _rotl64)
+#define ROTR64(x, n) _rotr64((x), (n))
+#define ROTL64(x, n) _rotl64((x), (n))
+#else
+#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#endif
+
+/*
+ * FREEBL_HTONLL(x): swap bytes in a 64-bit integer.
+ */
+#if defined(IS_LITTLE_ENDIAN)
+#if defined(_MSC_VER)
+
+#pragma intrinsic(_byteswap_uint64)
+#define FREEBL_HTONLL(x) _byteswap_uint64(x)
+
+/* gcc doesn't have __has_builtin, but it does have __builtin_bswap64 */
+#elif __has_builtin(__builtin_bswap64) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+
+#define FREEBL_HTONLL(x) __builtin_bswap64(x)
+
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64))
+
+PRUint64 swap8b(PRUint64 value);
+#define FREEBL_HTONLL(x) swap8b(x)
+
+#else
+
+#define SHA_MASK16 0x0000FFFF0000FFFFULL
+#define SHA_MASK8 0x00FF00FF00FF00FFULL
+PRUint64 swap8b(PRUint64 x);
+#define FREEBL_HTONLL(x) swap8b(x)
+
+#endif /* _MSC_VER */
+
+#else /* IS_LITTLE_ENDIAN */
+#define FREEBL_HTONLL(x) (x)
+#endif
+
+#endif /* HAVE_LONG_LONG */
diff --git a/security/nss/lib/freebl/ctr.c b/security/nss/lib/freebl/ctr.c
new file mode 100644
index 0000000000..239a60da24
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.c
@@ -0,0 +1,276 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "blapit.h"
+#include "blapii.h"
+#include "ctr.h"
+#include "pkcs11t.h"
+#include "secerr.h"
+
+#ifdef USE_HW_AES
+#ifdef NSS_X86_OR_X64
+#include "intel-aes.h"
+#endif
+#include "rijndael.h"
+#endif
+
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
+SECStatus
+CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher,
+ const unsigned char *param)
+{
+ const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param;
+
+ if (ctrParams->ulCounterBits == 0 ||
+ ctrParams->ulCounterBits > AES_BLOCK_SIZE * PR_BITS_PER_BYTE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Invariant: 0 < ctr->bufPtr <= AES_BLOCK_SIZE */
+ ctr->checkWrap = PR_FALSE;
+ ctr->bufPtr = AES_BLOCK_SIZE; /* no unused data in the buffer */
+ ctr->cipher = cipher;
+ ctr->context = context;
+ ctr->counterBits = ctrParams->ulCounterBits;
+ if (AES_BLOCK_SIZE > sizeof(ctr->counter) ||
+ AES_BLOCK_SIZE > sizeof(ctrParams->cb)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ PORT_Memcpy(ctr->counter, ctrParams->cb, AES_BLOCK_SIZE);
+ if (ctr->counterBits < 64) {
+ PORT_Memcpy(ctr->counterFirst, ctr->counter, AES_BLOCK_SIZE);
+ ctr->checkWrap = PR_TRUE;
+ }
+ return SECSuccess;
+}
+
+CTRContext *
+CTR_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *param)
+{
+ CTRContext *ctr;
+ SECStatus rv;
+
+ /* first fill in the Counter context */
+ ctr = PORT_ZNew(CTRContext);
+ if (ctr == NULL) {
+ return NULL;
+ }
+ rv = CTR_InitContext(ctr, context, cipher, param);
+ if (rv != SECSuccess) {
+ CTR_DestroyContext(ctr, PR_TRUE);
+ ctr = NULL;
+ }
+ return ctr;
+}
+
+void
+CTR_DestroyContext(CTRContext *ctr, PRBool freeit)
+{
+ PORT_Memset(ctr, 0, sizeof(CTRContext));
+ if (freeit) {
+ PORT_Free(ctr);
+ }
+}
+
+/*
+ * Used by counter mode. Increment the counter block. Not all bits in the
+ * counter block are part of the counter, counterBits tells how many bits
+ * are part of the counter. The counter block is blocksize long. It's a
+ * big endian value.
+ *
+ * XXX Does not handle counter rollover.
+ */
+static void
+ctr_GetNextCtr(unsigned char *counter, unsigned int counterBits,
+ unsigned int blocksize)
+{
+ unsigned char *counterPtr = counter + blocksize - 1;
+ unsigned char mask, count;
+
+ PORT_Assert(counterBits <= blocksize * PR_BITS_PER_BYTE);
+ while (counterBits >= PR_BITS_PER_BYTE) {
+ if (++(*(counterPtr--))) {
+ return;
+ }
+ counterBits -= PR_BITS_PER_BYTE;
+ }
+ if (counterBits == 0) {
+ return;
+ }
+ /* increment the final partial byte */
+ mask = (1 << counterBits) - 1;
+ count = ++(*counterPtr) & mask;
+ *counterPtr = ((*counterPtr) & ~mask) | count;
+ return;
+}
+
+static void
+ctr_xor(unsigned char *target, const unsigned char *x,
+ const unsigned char *y, unsigned int count)
+{
+ unsigned int i;
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+ while (count >= 16) {
+ vst1q_u8(target, veorq_u8(vld1q_u8(x), vld1q_u8(y)));
+ target += 16;
+ x += 16;
+ y += 16;
+ count -= 16;
+ }
+#endif
+ for (i = 0; i < count; i++) {
+ *target++ = *x++ ^ *y++;
+ }
+}
+
+SECStatus
+CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tmp;
+ SECStatus rv;
+
+ // Limit block count to 2^counterBits - 2
+ if (ctr->counterBits < (sizeof(unsigned int) * 8) &&
+ inlen > ((1 << ctr->counterBits) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outlen = 0;
+ if (ctr->bufPtr != blocksize) {
+ unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+ ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+ ctr->bufPtr += needed;
+ outbuf += needed;
+ inbuf += needed;
+ *outlen += needed;
+ inlen -= needed;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ PORT_Assert(ctr->bufPtr == blocksize);
+ }
+
+ while (inlen >= blocksize) {
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (ctr->checkWrap) {
+ if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ }
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ *outlen += blocksize;
+ inlen -= blocksize;
+ }
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (ctr->checkWrap) {
+ if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ }
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+ ctr->bufPtr = inlen;
+ *outlen += inlen;
+ return SECSuccess;
+}
+
+#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)
+SECStatus
+CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int fullblocks;
+ unsigned int tmp;
+ SECStatus rv;
+
+ // Limit block count to 2^counterBits - 2
+ if (ctr->counterBits < (sizeof(unsigned int) * 8) &&
+ inlen > ((1 << ctr->counterBits) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outlen = 0;
+ if (ctr->bufPtr != blocksize) {
+ unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+ ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+ ctr->bufPtr += needed;
+ outbuf += needed;
+ inbuf += needed;
+ *outlen += needed;
+ inlen -= needed;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ PORT_Assert(ctr->bufPtr == blocksize);
+ }
+
+ if (inlen >= blocksize) {
+ rv = intel_aes_ctr_worker(((AESContext *)(ctr->context))->Nr)(
+ ctr, outbuf, outlen, maxout, inbuf, inlen, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ fullblocks = (inlen / blocksize) * blocksize;
+ *outlen += fullblocks;
+ outbuf += fullblocks;
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+ }
+
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+ ctr->bufPtr = inlen;
+ *outlen += inlen;
+ return SECSuccess;
+}
+#endif
diff --git a/security/nss/lib/freebl/ctr.h b/security/nss/lib/freebl/ctr.h
new file mode 100644
index 0000000000..a397e690e6
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.h
@@ -0,0 +1,52 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTR_H
+#define CTR_H 1
+
+#include "blapii.h"
+
+/* This structure is defined in this header because both ctr.c and gcm.c
+ * need it. */
+struct CTRContextStr {
+ freeblCipherFunc cipher;
+ void *context;
+ unsigned char counter[MAX_BLOCK_SIZE];
+ unsigned char buffer[MAX_BLOCK_SIZE];
+ unsigned char counterFirst[MAX_BLOCK_SIZE]; /* counter overlfow value */
+ PRBool checkWrap; /*check for counter overflow*/
+ unsigned long counterBits;
+ unsigned int bufPtr;
+};
+
+typedef struct CTRContextStr CTRContext;
+
+SECStatus CTR_InitContext(CTRContext *ctr, void *context,
+ freeblCipherFunc cipher, const unsigned char *param);
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTRContext does not own context. context needs to remain valid for as long
+ * as the CTRContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *param);
+
+void CTR_DestroyContext(CTRContext *ctr, PRBool freeit);
+
+SECStatus CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+#ifdef USE_HW_AES
+SECStatus CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/cts.c b/security/nss/lib/freebl/cts.c
new file mode 100644
index 0000000000..774294b7a1
--- /dev/null
+++ b/security/nss/lib/freebl/cts.c
@@ -0,0 +1,303 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapit.h"
+#include "blapii.h"
+#include "cts.h"
+#include "secerr.h"
+
+struct CTSContextStr {
+ freeblCipherFunc cipher;
+ void *context;
+ /* iv stores the last ciphertext block of the previous message.
+ * Only used by decrypt. */
+ unsigned char iv[MAX_BLOCK_SIZE];
+};
+
+CTSContext *
+CTS_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *iv)
+{
+ CTSContext *cts;
+
+ cts = PORT_ZNew(CTSContext);
+ if (cts == NULL) {
+ return NULL;
+ }
+ PORT_Memcpy(cts->iv, iv, MAX_BLOCK_SIZE);
+ cts->cipher = cipher;
+ cts->context = context;
+ return cts;
+}
+
+void
+CTS_DestroyContext(CTSContext *cts, PRBool freeit)
+{
+ if (freeit) {
+ PORT_Free(cts);
+ }
+}
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Generically handle cipher text stealing. Basically this is doing CBC
+ * operations except someone can pass us a partial block.
+ *
+ * Output Order:
+ * CS-1: C1||C2||C3..Cn-1(could be partial)||Cn (NIST)
+ * CS-2: pad == 0 C1||C2||C3...Cn-1(is full)||Cn (Schneier)
+ * CS-2: pad != 0 C1||C2||C3...Cn||Cn-1(is partial)(Schneier)
+ * CS-3: C1||C2||C3...Cn||Cn-1(could be partial) (Kerberos)
+ *
+ * The characteristics of these three options:
+ * - NIST & Schneier (CS-1 & CS-2) are identical to CBC if there are no
+ * partial blocks on input.
+ * - Scheier and Kerberos (CS-2 and CS-3) have no embedded partial blocks,
+ * which make decoding easier.
+ * - NIST & Kerberos (CS-1 and CS-3) have consistent block order independent
+ * of padding.
+ *
+ * PKCS #11 did not specify which version to implement, but points to the NIST
+ * spec, so this code implements CTS-CS-1 from NIST.
+ *
+ * To convert the returned buffer to:
+ * CS-2 (Schneier): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = *outlen % blocksize;
+ * if (pad) {
+ * memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ * }
+ * CS-3 (Kerberos): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = *outlen % blocksize;
+ * if (pad == 0) {
+ * pad = blocksize;
+ * }
+ * memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ */
+SECStatus
+CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned char lastBlock[MAX_BLOCK_SIZE];
+ unsigned int tmp;
+ int fullblocks;
+ int written;
+ unsigned char *saveout = outbuf;
+ SECStatus rv;
+
+ if (inlen < blocksize) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ fullblocks = (inlen / blocksize) * blocksize;
+ rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+ fullblocks, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ *outlen = fullblocks; /* AES low level doesn't set outlen */
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ written = *outlen - (blocksize - inlen);
+ outbuf += written;
+ maxout -= written;
+
+ /*
+ * here's the CTS magic, we pad our final block with zeros,
+ * then do a CBC encrypt. CBC will xor our plain text with
+ * the previous block (Cn-1), capturing part of that block (Cn-1**) as it
+ * xors with the zero pad. We then write this full block, overwritting
+ * (Cn-1**) in our buffer. This allows us to have input data == output
+ * data since Cn contains enough information to reconver Cn-1** when
+ * we decrypt (at the cost of some complexity as you can see in decrypt
+ * below */
+ PORT_Memcpy(lastBlock, inbuf, inlen);
+ PORT_Memset(lastBlock + inlen, 0, blocksize - inlen);
+ rv = (*cts->cipher)(cts->context, outbuf, &tmp, maxout, lastBlock,
+ blocksize, blocksize);
+ PORT_Memset(lastBlock, 0, blocksize);
+ if (rv == SECSuccess) {
+ *outlen = written + blocksize;
+ } else {
+ PORT_Memset(saveout, 0, written + blocksize);
+ }
+ return rv;
+}
+
+#define XOR_BLOCK(x, y, count) \
+ for (i = 0; i < count; i++) \
+ x[i] = x[i] ^ y[i]
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Decrypt, Expect CS-1: input. See the comment on the encrypt side
+ * to understand what CS-2 and CS-3 mean.
+ *
+ * To convert the input buffer to CS-1 from ...
+ * CS-2 (Schneier): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = inlen % blocksize;
+ * if (pad) {
+ * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ * memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ * }
+ * CS-3 (Kerberos): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = inlen % blocksize;
+ * if (pad == 0) {
+ * pad = blocksize;
+ * }
+ * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ * memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ */
+SECStatus
+CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned char *Pn;
+ unsigned char Cn_2[MAX_BLOCK_SIZE]; /* block Cn-2 */
+ unsigned char Cn_1[MAX_BLOCK_SIZE]; /* block Cn-1 */
+ unsigned char Cn[MAX_BLOCK_SIZE]; /* block Cn */
+ unsigned char lastBlock[MAX_BLOCK_SIZE];
+ const unsigned char *tmp;
+ unsigned char *saveout = outbuf;
+ unsigned int tmpLen;
+ unsigned int fullblocks, pad;
+ unsigned int i;
+ SECStatus rv;
+
+ if (inlen < blocksize) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ fullblocks = (inlen / blocksize) * blocksize;
+
+ /* even though we expect the input to be CS-1, CS-2 is easier to parse,
+ * so convert to CS-2 immediately. NOTE: this is the same code as in
+ * the comment for encrypt. NOTE2: since we can't modify inbuf unless
+ * inbuf and outbuf overlap, just copy inbuf to outbuf and modify it there
+ */
+ pad = inlen - fullblocks;
+ if (pad != 0) {
+ if (inbuf != outbuf) {
+ memcpy(outbuf, inbuf, inlen);
+ /* keep the names so we logically know how we are using the
+ * buffers */
+ inbuf = outbuf;
+ }
+ memcpy(lastBlock, inbuf + inlen - blocksize, blocksize);
+ /* we know inbuf == outbuf now, inbuf is declared const and can't
+ * be the target, so use outbuf for the target here */
+ memcpy(outbuf + inlen - pad, inbuf + inlen - blocksize - pad, pad);
+ memcpy(outbuf + inlen - blocksize - pad, lastBlock, blocksize);
+ }
+ /* save the previous to last block so we can undo the misordered
+ * chaining */
+ tmp = (fullblocks < blocksize * 2) ? cts->iv : inbuf + fullblocks - blocksize * 2;
+ PORT_Memcpy(Cn_2, tmp, blocksize);
+ PORT_Memcpy(Cn, inbuf + fullblocks - blocksize, blocksize);
+ rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+ fullblocks, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ *outlen = fullblocks; /* AES low level doesn't set outlen */
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ outbuf += fullblocks;
+
+ /* recover the stolen text */
+ PORT_Memset(lastBlock, 0, blocksize);
+ PORT_Memcpy(lastBlock, inbuf, inlen);
+ PORT_Memcpy(Cn_1, inbuf, inlen);
+ Pn = outbuf - blocksize;
+ /* inbuf points to Cn-1* in the input buffer */
+ /* NOTE: below there are 2 sections marked "make up for the out of order
+ * cbc decryption". You may ask, what is going on here.
+ * Short answer: CBC automatically xors the plain text with the previous
+ * encrypted block. We are decrypting the last 2 blocks out of order, so
+ * we have to 'back out' the decrypt xor and 'add back' the encrypt xor.
+ * Long answer: When we encrypted, we encrypted as follows:
+ * Pn-2, Pn-1, (Pn || 0), but on decryption we can't
+ * decrypt Cn-1 until we decrypt Cn because part of Cn-1 is stored in
+ * Cn (see below). So above we decrypted all the full blocks:
+ * Cn-2, Cn,
+ * to get:
+ * Pn-2, Pn, Except that Pn is not yet corect. On encrypt, we
+ * xor'd Pn || 0 with Cn-1, but on decrypt we xor'd it with Cn-2
+ * To recover Pn, we xor the block with Cn-1* || 0 (in last block) and
+ * Cn-2 to get Pn || Cn-1**. Pn can then be written to the output buffer
+ * and we can now reunite Cn-1. With the full Cn-1 we can decrypt it,
+ * but now decrypt is going to xor the decrypted data with Cn instead of
+ * Cn-2. xoring Cn and Cn-2 restores the original Pn-1 and we can now
+ * write that oout to the buffer */
+
+ /* make up for the out of order CBC decryption */
+ XOR_BLOCK(lastBlock, Cn_2, blocksize);
+ XOR_BLOCK(lastBlock, Pn, blocksize);
+ /* last buf now has Pn || Cn-1**, copy out Pn */
+ PORT_Memcpy(outbuf, lastBlock, inlen);
+ *outlen += inlen;
+ /* copy Cn-1* into last buf to recover Cn-1 */
+ PORT_Memcpy(lastBlock, Cn_1, inlen);
+ /* note: because Cn and Cn-1 were out of order, our pointer to Pn also
+ * points to where Pn-1 needs to reside. From here on out read Pn in
+ * the code as really Pn-1. */
+ rv = (*cts->cipher)(cts->context, Pn, &tmpLen, blocksize, lastBlock,
+ blocksize, blocksize);
+ if (rv != SECSuccess) {
+ PORT_Memset(lastBlock, 0, blocksize);
+ PORT_Memset(saveout, 0, *outlen);
+ return SECFailure;
+ }
+ /* make up for the out of order CBC decryption */
+ XOR_BLOCK(Pn, Cn_2, blocksize);
+ XOR_BLOCK(Pn, Cn, blocksize);
+ /* reset iv to Cn */
+ PORT_Memcpy(cts->iv, Cn, blocksize);
+ /* This makes Cn the last block for the next decrypt operation, which
+ * matches the encrypt. We don't care about the contexts of last block,
+ * only the side effect of setting the internal IV */
+ (void)(*cts->cipher)(cts->context, lastBlock, &tmpLen, blocksize, Cn,
+ blocksize, blocksize);
+ /* clear last block. At this point last block contains Pn xor Cn_1 xor
+ * Cn_2, both of with an attacker would know, so we need to clear this
+ * buffer out */
+ PORT_Memset(lastBlock, 0, blocksize);
+ /* Cn, Cn_1, and Cn_2 have encrypted data, so no need to clear them */
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/cts.h b/security/nss/lib/freebl/cts.h
new file mode 100644
index 0000000000..ddd56197f6
--- /dev/null
+++ b/security/nss/lib/freebl/cts.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTS_H
+#define CTS_H 1
+
+#include "blapii.h"
+
+typedef struct CTSContextStr CTSContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTSContext does not own context. context needs to remain valid for as long
+ * as the CTSContext is valid.
+ *
+ * The cipher argument is a block cipher in the CBC mode.
+ */
+CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *iv);
+
+void CTS_DestroyContext(CTSContext *cts, PRBool freeit);
+
+SECStatus CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+#endif
diff --git a/security/nss/lib/freebl/deprecated/alg2268.c b/security/nss/lib/freebl/deprecated/alg2268.c
new file mode 100644
index 0000000000..ac97363099
--- /dev/null
+++ b/security/nss/lib/freebl/deprecated/alg2268.c
@@ -0,0 +1,509 @@
+/*
+ * alg2268.c - implementation of the algorithm in RFC 2268
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "../blapi.h"
+#include "../blapii.h"
+#include "secerr.h"
+#ifdef XP_UNIX_XXX
+#include <stddef.h> /* for ptrdiff_t */
+#endif
+
+/*
+** RC2 symmetric block cypher
+*/
+
+typedef SECStatus(rc2Func)(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen);
+
+/* forward declarations */
+static rc2Func rc2_EncryptECB;
+static rc2Func rc2_DecryptECB;
+static rc2Func rc2_EncryptCBC;
+static rc2Func rc2_DecryptCBC;
+
+typedef union {
+ PRUint32 l[2];
+ PRUint16 s[4];
+ PRUint8 b[8];
+} RC2Block;
+
+struct RC2ContextStr {
+ union {
+ PRUint8 Kb[128];
+ PRUint16 Kw[64];
+ } u;
+ RC2Block iv;
+ rc2Func *enc;
+ rc2Func *dec;
+};
+
+#define B u.Kb
+#define K u.Kw
+#define BYTESWAP(x) ((x) << 8 | (x) >> 8)
+#define SWAPK(i) cx->K[i] = (tmpS = cx->K[i], BYTESWAP(tmpS))
+#define RC2_BLOCK_SIZE 8
+
+#define LOAD_HARD(R) \
+ R[0] = (PRUint16)input[1] << 8 | input[0]; \
+ R[1] = (PRUint16)input[3] << 8 | input[2]; \
+ R[2] = (PRUint16)input[5] << 8 | input[4]; \
+ R[3] = (PRUint16)input[7] << 8 | input[6];
+#define LOAD_EASY(R) \
+ R[0] = ((PRUint16 *)input)[0]; \
+ R[1] = ((PRUint16 *)input)[1]; \
+ R[2] = ((PRUint16 *)input)[2]; \
+ R[3] = ((PRUint16 *)input)[3];
+#define STORE_HARD(R) \
+ output[0] = (PRUint8)(R[0]); \
+ output[1] = (PRUint8)(R[0] >> 8); \
+ output[2] = (PRUint8)(R[1]); \
+ output[3] = (PRUint8)(R[1] >> 8); \
+ output[4] = (PRUint8)(R[2]); \
+ output[5] = (PRUint8)(R[2] >> 8); \
+ output[6] = (PRUint8)(R[3]); \
+ output[7] = (PRUint8)(R[3] >> 8);
+#define STORE_EASY(R) \
+ ((PRUint16 *)output)[0] = R[0]; \
+ ((PRUint16 *)output)[1] = R[1]; \
+ ((PRUint16 *)output)[2] = R[2]; \
+ ((PRUint16 *)output)[3] = R[3];
+
+#if defined(NSS_X86_OR_X64)
+#define LOAD(R) LOAD_EASY(R)
+#define STORE(R) STORE_EASY(R)
+#elif !defined(IS_LITTLE_ENDIAN)
+#define LOAD(R) LOAD_HARD(R)
+#define STORE(R) STORE_HARD(R)
+#else
+#define LOAD(R) \
+ if ((ptrdiff_t)input & 1) { \
+ LOAD_HARD(R) \
+ } else { \
+ LOAD_EASY(R) \
+ }
+#define STORE(R) \
+ if ((ptrdiff_t)input & 1) { \
+ STORE_HARD(R) \
+ } else { \
+ STORE_EASY(R) \
+ }
+#endif
+
+static const PRUint8 S[256] = {
+ 0331, 0170, 0371, 0304, 0031, 0335, 0265, 0355, 0050, 0351, 0375, 0171, 0112, 0240, 0330, 0235,
+ 0306, 0176, 0067, 0203, 0053, 0166, 0123, 0216, 0142, 0114, 0144, 0210, 0104, 0213, 0373, 0242,
+ 0027, 0232, 0131, 0365, 0207, 0263, 0117, 0023, 0141, 0105, 0155, 0215, 0011, 0201, 0175, 0062,
+ 0275, 0217, 0100, 0353, 0206, 0267, 0173, 0013, 0360, 0225, 0041, 0042, 0134, 0153, 0116, 0202,
+ 0124, 0326, 0145, 0223, 0316, 0140, 0262, 0034, 0163, 0126, 0300, 0024, 0247, 0214, 0361, 0334,
+ 0022, 0165, 0312, 0037, 0073, 0276, 0344, 0321, 0102, 0075, 0324, 0060, 0243, 0074, 0266, 0046,
+ 0157, 0277, 0016, 0332, 0106, 0151, 0007, 0127, 0047, 0362, 0035, 0233, 0274, 0224, 0103, 0003,
+ 0370, 0021, 0307, 0366, 0220, 0357, 0076, 0347, 0006, 0303, 0325, 0057, 0310, 0146, 0036, 0327,
+ 0010, 0350, 0352, 0336, 0200, 0122, 0356, 0367, 0204, 0252, 0162, 0254, 0065, 0115, 0152, 0052,
+ 0226, 0032, 0322, 0161, 0132, 0025, 0111, 0164, 0113, 0237, 0320, 0136, 0004, 0030, 0244, 0354,
+ 0302, 0340, 0101, 0156, 0017, 0121, 0313, 0314, 0044, 0221, 0257, 0120, 0241, 0364, 0160, 0071,
+ 0231, 0174, 0072, 0205, 0043, 0270, 0264, 0172, 0374, 0002, 0066, 0133, 0045, 0125, 0227, 0061,
+ 0055, 0135, 0372, 0230, 0343, 0212, 0222, 0256, 0005, 0337, 0051, 0020, 0147, 0154, 0272, 0311,
+ 0323, 0000, 0346, 0317, 0341, 0236, 0250, 0054, 0143, 0026, 0001, 0077, 0130, 0342, 0211, 0251,
+ 0015, 0070, 0064, 0033, 0253, 0063, 0377, 0260, 0273, 0110, 0014, 0137, 0271, 0261, 0315, 0056,
+ 0305, 0363, 0333, 0107, 0345, 0245, 0234, 0167, 0012, 0246, 0040, 0150, 0376, 0177, 0301, 0255
+};
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+ return PORT_ZNew(RC2Context);
+}
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key, unsigned int len,
+ const unsigned char *input, int mode, unsigned int efLen8,
+ unsigned int unused)
+{
+ PRUint8 *L, *L2;
+ int i;
+#if !defined(IS_LITTLE_ENDIAN)
+ PRUint16 tmpS;
+#endif
+ PRUint8 tmpB;
+
+ if (!key || !cx || !len || len > (sizeof cx->B) ||
+ efLen8 > (sizeof cx->B)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_RC2) {
+ /* groovy */
+ } else if (mode == NSS_RC2_CBC) {
+ if (!input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ } else {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (mode == NSS_RC2_CBC) {
+ cx->enc = &rc2_EncryptCBC;
+ cx->dec = &rc2_DecryptCBC;
+ LOAD(cx->iv.s);
+ } else {
+ cx->enc = &rc2_EncryptECB;
+ cx->dec = &rc2_DecryptECB;
+ }
+
+ /* Step 0. Copy key into table. */
+ memcpy(cx->B, key, len);
+
+ /* Step 1. Compute all values to the right of the key. */
+ L2 = cx->B;
+ L = L2 + len;
+ tmpB = L[-1];
+ for (i = (sizeof cx->B) - len; i > 0; --i) {
+ *L++ = tmpB = S[(PRUint8)(tmpB + *L2++)];
+ }
+
+ /* step 2. Adjust left most byte of effective key. */
+ i = (sizeof cx->B) - efLen8;
+ L = cx->B + i;
+ *L = tmpB = S[*L]; /* mask is always 0xff */
+
+ /* step 3. Recompute all values to the left of effective key. */
+ L2 = --L + efLen8;
+ while (L >= cx->B) {
+ *L-- = tmpB = S[tmpB ^ *L2--];
+ }
+
+#if !defined(IS_LITTLE_ENDIAN)
+ for (i = 63; i >= 0; --i) {
+ SWAPK(i); /* candidate for unrolling */
+ }
+#endif
+ return SECSuccess;
+}
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+** "mode" one of NSS_RC2 or NSS_RC2_CBC
+** "effectiveKeyLen" in bytes, not bits.
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode, unsigned efLen8)
+{
+ RC2Context *cx = PORT_ZNew(RC2Context);
+ if (cx) {
+ SECStatus rv = RC2_InitContext(cx, key, len, iv, mode, efLen8, 0);
+ if (rv != SECSuccess) {
+ RC2_DestroyContext(cx, PR_TRUE);
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+/*
+** Destroy an RC2 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+ }
+}
+
+#define ROL(x, k) (x << k | x >> (16 - k))
+#define MIX(j) \
+ R0 = R0 + cx->K[4 * j + 0] + (R3 & R2) + (~R3 & R1); \
+ R0 = ROL(R0, 1); \
+ R1 = R1 + cx->K[4 * j + 1] + (R0 & R3) + (~R0 & R2); \
+ R1 = ROL(R1, 2); \
+ R2 = R2 + cx->K[4 * j + 2] + (R1 & R0) + (~R1 & R3); \
+ R2 = ROL(R2, 3); \
+ R3 = R3 + cx->K[4 * j + 3] + (R2 & R1) + (~R2 & R0); \
+ R3 = ROL(R3, 5)
+#define MASH \
+ R0 = R0 + cx->K[R3 & 63]; \
+ R1 = R1 + cx->K[R0 & 63]; \
+ R2 = R2 + cx->K[R1 & 63]; \
+ R3 = R3 + cx->K[R2 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Encrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+ register PRUint16 R0, R1, R2, R3;
+
+ /* step 1. Initialize input. */
+ R0 = input->s[0];
+ R1 = input->s[1];
+ R2 = input->s[2];
+ R3 = input->s[3];
+
+ /* step 2. Expand Key (already done, in context) */
+ /* step 3. j = 0 */
+ /* step 4. Perform 5 mixing rounds. */
+
+ MIX(0);
+ MIX(1);
+ MIX(2);
+ MIX(3);
+ MIX(4);
+
+ /* step 5. Perform 1 mashing round. */
+ MASH;
+
+ /* step 6. Perform 6 mixing rounds. */
+
+ MIX(5);
+ MIX(6);
+ MIX(7);
+ MIX(8);
+ MIX(9);
+ MIX(10);
+
+ /* step 7. Perform 1 mashing round. */
+ MASH;
+
+ /* step 8. Perform 5 mixing rounds. */
+
+ MIX(11);
+ MIX(12);
+ MIX(13);
+ MIX(14);
+ MIX(15);
+
+ /* output results */
+ output->s[0] = R0;
+ output->s[1] = R1;
+ output->s[2] = R2;
+ output->s[3] = R3;
+}
+
+#define ROR(x, k) (x >> k | x << (16 - k))
+#define R_MIX(j) \
+ R3 = ROR(R3, 5); \
+ R3 = R3 - cx->K[4 * j + 3] - (R2 & R1) - (~R2 & R0); \
+ R2 = ROR(R2, 3); \
+ R2 = R2 - cx->K[4 * j + 2] - (R1 & R0) - (~R1 & R3); \
+ R1 = ROR(R1, 2); \
+ R1 = R1 - cx->K[4 * j + 1] - (R0 & R3) - (~R0 & R2); \
+ R0 = ROR(R0, 1); \
+ R0 = R0 - cx->K[4 * j + 0] - (R3 & R2) - (~R3 & R1)
+#define R_MASH \
+ R3 = R3 - cx->K[R2 & 63]; \
+ R2 = R2 - cx->K[R1 & 63]; \
+ R1 = R1 - cx->K[R0 & 63]; \
+ R0 = R0 - cx->K[R3 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Decrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+ register PRUint16 R0, R1, R2, R3;
+
+ /* step 1. Initialize input. */
+ R0 = input->s[0];
+ R1 = input->s[1];
+ R2 = input->s[2];
+ R3 = input->s[3];
+
+ /* step 2. Expand Key (already done, in context) */
+ /* step 3. j = 63 */
+ /* step 4. Perform 5 r_mixing rounds. */
+ R_MIX(15);
+ R_MIX(14);
+ R_MIX(13);
+ R_MIX(12);
+ R_MIX(11);
+
+ /* step 5. Perform 1 r_mashing round. */
+ R_MASH;
+
+ /* step 6. Perform 6 r_mixing rounds. */
+ R_MIX(10);
+ R_MIX(9);
+ R_MIX(8);
+ R_MIX(7);
+ R_MIX(6);
+ R_MIX(5);
+
+ /* step 7. Perform 1 r_mashing round. */
+ R_MASH;
+
+ /* step 8. Perform 5 r_mixing rounds. */
+ R_MIX(4);
+ R_MIX(3);
+ R_MIX(2);
+ R_MIX(1);
+ R_MIX(0);
+
+ /* output results */
+ output->s[0] = R0;
+ output->s[1] = R1;
+ output->s[2] = R2;
+ output->s[3] = R3;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptECB(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptECB(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Decrypt1Block(cx, &iBlock, &iBlock);
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptCBC(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+
+ LOAD(iBlock.s)
+ iBlock.l[0] ^= cx->iv.l[0];
+ iBlock.l[1] ^= cx->iv.l[1];
+ rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+ cx->iv = iBlock;
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptCBC(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+ RC2Block oBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Decrypt1Block(cx, &oBlock, &iBlock);
+ oBlock.l[0] ^= cx->iv.l[0];
+ oBlock.l[1] ^= cx->iv.l[1];
+ cx->iv = iBlock;
+ STORE(oBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+/*
+** Perform RC2 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ if (inputLen) {
+ if (inputLen % RC2_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ rv = (*cx->enc)(cx, output, input, inputLen);
+ }
+ if (rv == SECSuccess) {
+ *outputLen = inputLen;
+ }
+ return rv;
+}
+
+/*
+** Perform RC2 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ if (inputLen) {
+ if (inputLen % RC2_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ rv = (*cx->dec)(cx, output, input, inputLen);
+ }
+ if (rv == SECSuccess) {
+ *outputLen = inputLen;
+ }
+ return rv;
+}
diff --git a/security/nss/lib/freebl/deprecated/seed.c b/security/nss/lib/freebl/deprecated/seed.c
new file mode 100644
index 0000000000..fd27bbd0e1
--- /dev/null
+++ b/security/nss/lib/freebl/deprecated/seed.c
@@ -0,0 +1,671 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#ifdef WIN32
+#include <memory.h>
+#endif
+
+#include "seed.h"
+#include "secerr.h"
+
+static const seed_word SS[4][256] = {
+ { 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0,
+ 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124,
+ 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c,
+ 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360,
+ 0x28082028, 0x04444044, 0x20002020, 0x1d8d919c,
+ 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314,
+ 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378,
+ 0x3b8bb3b8, 0x13031310, 0x12c2d2d0, 0x2ecee2ec,
+ 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8,
+ 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074,
+ 0x2ccce0ec, 0x15859194, 0x0b0b0308, 0x17475354,
+ 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100,
+ 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098,
+ 0x10001010, 0x0cccc0cc, 0x32c2f2f0, 0x19c9d1d8,
+ 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380,
+ 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8,
+ 0x20406060, 0x10405050, 0x2383a3a0, 0x2bcbe3e8,
+ 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c,
+ 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078,
+ 0x2686a2a4, 0x12021210, 0x2f8fa3ac, 0x15c5d1d4,
+ 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140,
+ 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008,
+ 0x1f0f131c, 0x19899198, 0x00000000, 0x19091118,
+ 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0,
+ 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324,
+ 0x3080b0b0, 0x0b8b8388, 0x0e0e020c, 0x2b8ba3a8,
+ 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c,
+ 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208,
+ 0x3f8fb3bc, 0x2fcfe3ec, 0x33c3f3f0, 0x05c5c1c4,
+ 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064,
+ 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218,
+ 0x06060204, 0x21012120, 0x2b4b6368, 0x26466264,
+ 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288,
+ 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0,
+ 0x3a4a7278, 0x07474344, 0x16869294, 0x25c5e1e4,
+ 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc,
+ 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac,
+ 0x36063234, 0x15051114, 0x22022220, 0x38083038,
+ 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c,
+ 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394,
+ 0x35053134, 0x0bcbc3c8, 0x0ecec2cc, 0x3c0c303c,
+ 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188,
+ 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8,
+ 0x14849094, 0x19495158, 0x02828280, 0x04c4c0c4,
+ 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364,
+ 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8,
+ 0x0f0f030c, 0x0e8e828c, 0x02424240, 0x23032320,
+ 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4,
+ 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0,
+ 0x2f4f636c, 0x3d0d313c, 0x2d0d212c, 0x00404040,
+ 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0,
+ 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154,
+ 0x3b0b3338, 0x1cccd0dc, 0x28486068, 0x3f4f737c,
+ 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254,
+ 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244,
+ 0x3585b1b4, 0x2b0b2328, 0x25456164, 0x3acaf2f8,
+ 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c,
+ 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0,
+ 0x31013130, 0x2acae2e8, 0x2d4d616c, 0x1f4f535c,
+ 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088,
+ 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4,
+ 0x22426260, 0x29092128, 0x07070304, 0x33033330,
+ 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178,
+ 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298 },
+ { 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2,
+ 0xcc0fcfc3, 0xdc1eced2, 0xb03383b3, 0xb83888b0,
+ 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3,
+ 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53,
+ 0xc003c3c3, 0x60224262, 0x30330333, 0xb43585b1,
+ 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3,
+ 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602,
+ 0x1c1c0c10, 0xbc3c8cb0, 0x34360632, 0x480b4b43,
+ 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0,
+ 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0,
+ 0xc002c2c2, 0x44054541, 0xe021c1e1, 0xd416c6d2,
+ 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890,
+ 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32,
+ 0xa42585a1, 0xf839c9f1, 0x0c0d0d01, 0xdc1fcfd3,
+ 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72,
+ 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272,
+ 0x40024242, 0xd414c4d0, 0x40014141, 0xc000c0c0,
+ 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83,
+ 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13,
+ 0xc80acac2, 0x2c2c0c20, 0xa82a8aa2, 0x34340430,
+ 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1,
+ 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0,
+ 0x54174753, 0xac2e8ea2, 0x08080800, 0xc405c5c1,
+ 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1,
+ 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131,
+ 0xf435c5f1, 0x880a8a82, 0x682a4a62, 0xb03181b1,
+ 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202,
+ 0x20220222, 0x04040400, 0x68284860, 0x70314171,
+ 0x04070703, 0xd81bcbd3, 0x9c1d8d91, 0x98198991,
+ 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951,
+ 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0,
+ 0x981a8a92, 0xa02383a3, 0xa82b8ba3, 0xd010c0d0,
+ 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12,
+ 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3,
+ 0x94168692, 0x783b4b73, 0x5c1c4c50, 0xa02282a2,
+ 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41,
+ 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32,
+ 0x0c0c0c00, 0x2c2e0e22, 0xb83a8ab2, 0x6c2e4e62,
+ 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292,
+ 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0,
+ 0x14150511, 0xf83bcbf3, 0x70304070, 0x74354571,
+ 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303,
+ 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470,
+ 0xd415c5d1, 0xb43484b0, 0xe82acae2, 0x08090901,
+ 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040,
+ 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501,
+ 0xf83acaf2, 0x00010101, 0xf030c0f0, 0x282a0a22,
+ 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343,
+ 0x84058581, 0x14140410, 0x88098981, 0x981b8b93,
+ 0xb03080b0, 0xe425c5e1, 0x48084840, 0x78394971,
+ 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282,
+ 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53,
+ 0x74374773, 0x54144450, 0xb03282b2, 0x1c1d0d11,
+ 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642,
+ 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3,
+ 0x7c3e4e72, 0xd81acad2, 0xc809c9c1, 0xfc3dcdf1,
+ 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30,
+ 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70,
+ 0x0c0e0e02, 0x50104050, 0x38390931, 0x24260622,
+ 0x30320232, 0x84048480, 0x68294961, 0x90138393,
+ 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0,
+ 0xc80bcbc3, 0x50134353, 0x080a0a02, 0x84078783,
+ 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83,
+ 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3 },
+ { 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3,
+ 0x50541444, 0x111c1d0d, 0xa0ac2c8c, 0x21242505,
+ 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e,
+ 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343,
+ 0x20282808, 0x40440444, 0x20202000, 0x919c1d8d,
+ 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707,
+ 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b,
+ 0xb3b83b8b, 0x13101303, 0xd2d012c2, 0xe2ec2ece,
+ 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888,
+ 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444,
+ 0xe0ec2ccc, 0x91941585, 0x03080b0b, 0x53541747,
+ 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101,
+ 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888,
+ 0x10101000, 0xc0cc0ccc, 0xf2f032c2, 0xd1d819c9,
+ 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383,
+ 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9,
+ 0x60602040, 0x50501040, 0xa3a02383, 0xe3e82bcb,
+ 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f,
+ 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848,
+ 0xa2a42686, 0x12101202, 0xa3ac2f8f, 0xd1d415c5,
+ 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141,
+ 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808,
+ 0x131c1f0f, 0x91981989, 0x00000000, 0x11181909,
+ 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1,
+ 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707,
+ 0xb0b03080, 0x83880b8b, 0x020c0e0e, 0xa3a82b8b,
+ 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d,
+ 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a,
+ 0xb3bc3f8f, 0xe3ec2fcf, 0xf3f033c3, 0xc1c405c5,
+ 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444,
+ 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a,
+ 0x02040606, 0x21202101, 0x63682b4b, 0x62642646,
+ 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a,
+ 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0,
+ 0x72783a4a, 0x43440747, 0x92941686, 0xe1e425c5,
+ 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf,
+ 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e,
+ 0x32343606, 0x11141505, 0x22202202, 0x30383808,
+ 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c,
+ 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787,
+ 0x31343505, 0xc3c80bcb, 0xc2cc0ece, 0x303c3c0c,
+ 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989,
+ 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8,
+ 0x90941484, 0x51581949, 0x82800282, 0xc0c404c4,
+ 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747,
+ 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888,
+ 0x030c0f0f, 0x828c0e8e, 0x42400242, 0x23202303,
+ 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484,
+ 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2,
+ 0x636c2f4f, 0x313c3d0d, 0x212c2d0d, 0x40400040,
+ 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1,
+ 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545,
+ 0x33383b0b, 0xd0dc1ccc, 0x60682848, 0x737c3f4f,
+ 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646,
+ 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646,
+ 0xb1b43585, 0x23282b0b, 0x61642545, 0xf2f83aca,
+ 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f,
+ 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282,
+ 0x31303101, 0xe2e82aca, 0x616c2d4d, 0x535c1f4f,
+ 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888,
+ 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4,
+ 0x62602242, 0x21282909, 0x03040707, 0x33303303,
+ 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949,
+ 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a },
+ { 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426,
+ 0xcfc3cc0f, 0xced2dc1e, 0x83b3b033, 0x88b0b838,
+ 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407,
+ 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b,
+ 0xc3c3c003, 0x42626022, 0x03333033, 0x85b1b435,
+ 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427,
+ 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406,
+ 0x0c101c1c, 0x8cb0bc3c, 0x06323436, 0x4b43480b,
+ 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828,
+ 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434,
+ 0xc2c2c002, 0x45414405, 0xc1e1e021, 0xc6d2d416,
+ 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818,
+ 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e,
+ 0x85a1a425, 0xc9f1f839, 0x0d010c0d, 0xcfd3dc1f,
+ 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a,
+ 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032,
+ 0x42424002, 0xc4d0d414, 0x41414001, 0xc0c0c000,
+ 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b,
+ 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f,
+ 0xcac2c80a, 0x0c202c2c, 0x8aa2a82a, 0x04303434,
+ 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829,
+ 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838,
+ 0x47535417, 0x8ea2ac2e, 0x08000808, 0xc5c1c405,
+ 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839,
+ 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031,
+ 0xc5f1f435, 0x8a82880a, 0x4a62682a, 0x81b1b031,
+ 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002,
+ 0x02222022, 0x04000404, 0x48606828, 0x41717031,
+ 0x07030407, 0xcbd3d81b, 0x8d919c1d, 0x89919819,
+ 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819,
+ 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c,
+ 0x8a92981a, 0x83a3a023, 0x8ba3a82b, 0xc0d0d010,
+ 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a,
+ 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f,
+ 0x86929416, 0x4b73783b, 0x4c505c1c, 0x82a2a022,
+ 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d,
+ 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a,
+ 0x0c000c0c, 0x0e222c2e, 0x8ab2b83a, 0x4e626c2e,
+ 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012,
+ 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c,
+ 0x05111415, 0xcbf3f83b, 0x40707030, 0x45717435,
+ 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003,
+ 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434,
+ 0xc5d1d415, 0x84b0b434, 0xcae2e82a, 0x09010809,
+ 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000,
+ 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405,
+ 0xcaf2f83a, 0x01010001, 0xc0f0f030, 0x0a22282a,
+ 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003,
+ 0x85818405, 0x04101414, 0x89818809, 0x8b93981b,
+ 0x80b0b030, 0xc5e1e425, 0x48404808, 0x49717839,
+ 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002,
+ 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f,
+ 0x47737437, 0x44505414, 0x82b2b032, 0x0d111c1d,
+ 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406,
+ 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b,
+ 0x4e727c3e, 0xcad2d81a, 0xc9c1c809, 0xcdf1fc3d,
+ 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c,
+ 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c,
+ 0x0e020c0e, 0x40505010, 0x09313839, 0x06222426,
+ 0x02323032, 0x84808404, 0x49616829, 0x83939013,
+ 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424,
+ 0xcbc3c80b, 0x43535013, 0x0a02080a, 0x87838407,
+ 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f,
+ 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437 }
+};
+
+/* key schedule constants - golden ratio */
+#define KC0 0x9e3779b9
+#define KC1 0x3c6ef373
+#define KC2 0x78dde6e6
+#define KC3 0xf1bbcdcc
+#define KC4 0xe3779b99
+#define KC5 0xc6ef3733
+#define KC6 0x8dde6e67
+#define KC7 0x1bbcdccf
+#define KC8 0x3779b99e
+#define KC9 0x6ef3733c
+#define KC10 0xdde6e678
+#define KC11 0xbbcdccf1
+#define KC12 0x779b99e3
+#define KC13 0xef3733c6
+#define KC14 0xde6e678d
+#define KC15 0xbcdccf1b
+
+void
+SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+ SEED_KEY_SCHEDULE *ks)
+{
+ seed_word K0, K1, K2, K3;
+ seed_word t0, t1;
+
+ char2word(rawkey, K0);
+ char2word(rawkey + 4, K1);
+ char2word(rawkey + 8, K2);
+ char2word(rawkey + 12, K3);
+
+ t0 = (K0 + K2 - KC0);
+ t1 = (K1 - K3 + KC0);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[0]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC1);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[2]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC2);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[4]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC3);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[6]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC4);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[8]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC5);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[10]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC6);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[12]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC7);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[14]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC8);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[16]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC9);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[18]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC10);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[20]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC11);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[22]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC12);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[24]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC13);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[26]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC14);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[28]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC15);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[30]);
+}
+
+void
+SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks)
+{
+ seed_word L0, L1, R0, R1;
+ seed_word t0, t1;
+
+ char2word(s, L0);
+ char2word(s + 4, L1);
+ char2word(s + 8, R0);
+ char2word(s + 12, R1);
+
+ E_SEED(t0, t1, L0, L1, R0, R1, 0);
+ E_SEED(t0, t1, R0, R1, L0, L1, 2);
+ E_SEED(t0, t1, L0, L1, R0, R1, 4);
+ E_SEED(t0, t1, R0, R1, L0, L1, 6);
+ E_SEED(t0, t1, L0, L1, R0, R1, 8);
+ E_SEED(t0, t1, R0, R1, L0, L1, 10);
+ E_SEED(t0, t1, L0, L1, R0, R1, 12);
+ E_SEED(t0, t1, R0, R1, L0, L1, 14);
+ E_SEED(t0, t1, L0, L1, R0, R1, 16);
+ E_SEED(t0, t1, R0, R1, L0, L1, 18);
+ E_SEED(t0, t1, L0, L1, R0, R1, 20);
+ E_SEED(t0, t1, R0, R1, L0, L1, 22);
+ E_SEED(t0, t1, L0, L1, R0, R1, 24);
+ E_SEED(t0, t1, R0, R1, L0, L1, 26);
+ E_SEED(t0, t1, L0, L1, R0, R1, 28);
+ E_SEED(t0, t1, R0, R1, L0, L1, 30);
+
+ word2char(R0, d);
+ word2char(R1, d + 4);
+ word2char(L0, d + 8);
+ word2char(L1, d + 12);
+}
+
+void
+SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks)
+{
+ seed_word L0, L1, R0, R1;
+ seed_word t0, t1;
+
+ char2word(s, L0);
+ char2word(s + 4, L1);
+ char2word(s + 8, R0);
+ char2word(s + 12, R1);
+
+ E_SEED(t0, t1, L0, L1, R0, R1, 30);
+ E_SEED(t0, t1, R0, R1, L0, L1, 28);
+ E_SEED(t0, t1, L0, L1, R0, R1, 26);
+ E_SEED(t0, t1, R0, R1, L0, L1, 24);
+ E_SEED(t0, t1, L0, L1, R0, R1, 22);
+ E_SEED(t0, t1, R0, R1, L0, L1, 20);
+ E_SEED(t0, t1, L0, L1, R0, R1, 18);
+ E_SEED(t0, t1, R0, R1, L0, L1, 16);
+ E_SEED(t0, t1, L0, L1, R0, R1, 14);
+ E_SEED(t0, t1, R0, R1, L0, L1, 12);
+ E_SEED(t0, t1, L0, L1, R0, R1, 10);
+ E_SEED(t0, t1, R0, R1, L0, L1, 8);
+ E_SEED(t0, t1, L0, L1, R0, R1, 6);
+ E_SEED(t0, t1, R0, R1, L0, L1, 4);
+ E_SEED(t0, t1, L0, L1, R0, R1, 2);
+ E_SEED(t0, t1, R0, R1, L0, L1, 0);
+
+ word2char(R0, d);
+ word2char(R1, d + 4);
+ word2char(L0, d + 8);
+ word2char(L1, d + 12);
+}
+
+void
+SEED_ecb_encrypt(const unsigned char *in,
+ unsigned char *out,
+ size_t inLen,
+ const SEED_KEY_SCHEDULE *ks, int enc)
+{
+ if (enc) {
+ while (inLen > 0) {
+ SEED_encrypt(in, out, ks);
+ out += SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ inLen -= SEED_BLOCK_SIZE;
+ }
+ } else {
+ while (inLen > 0) {
+ SEED_decrypt(in, out, ks);
+ out += SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ inLen -= SEED_BLOCK_SIZE;
+ }
+ }
+}
+
+void
+SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SEED_KEY_SCHEDULE *ks,
+ unsigned char ivec[SEED_BLOCK_SIZE], int enc)
+{
+ size_t n;
+ unsigned char tmp[SEED_BLOCK_SIZE];
+ const unsigned char *iv = ivec;
+
+ if (enc) {
+ while (len >= SEED_BLOCK_SIZE) {
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n) {
+ out[n] = in[n] ^ iv[n];
+ }
+
+ SEED_encrypt(out, out, ks);
+ iv = out;
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ for (n = 0; n < len; ++n) {
+ out[n] = in[n] ^ iv[n];
+ }
+
+ for (n = len; n < SEED_BLOCK_SIZE; ++n) {
+ out[n] = iv[n];
+ }
+
+ SEED_encrypt(out, out, ks);
+ iv = out;
+ }
+
+ memcpy(ivec, iv, SEED_BLOCK_SIZE);
+ } else if (in != out) {
+ while (len >= SEED_BLOCK_SIZE) {
+ SEED_decrypt(in, out, ks);
+
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n) {
+ out[n] ^= iv[n];
+ }
+
+ iv = in;
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ SEED_decrypt(in, tmp, ks);
+
+ for (n = 0; n < len; ++n) {
+ out[n] = tmp[n] ^ iv[n];
+ }
+
+ iv = in;
+ }
+
+ memcpy(ivec, iv, SEED_BLOCK_SIZE);
+ } else {
+ while (len >= SEED_BLOCK_SIZE) {
+ memcpy(tmp, in, SEED_BLOCK_SIZE);
+ SEED_decrypt(in, out, ks);
+
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n) {
+ out[n] ^= ivec[n];
+ }
+
+ memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ memcpy(tmp, in, SEED_BLOCK_SIZE);
+ SEED_decrypt(tmp, tmp, ks);
+
+ for (n = 0; n < len; ++n) {
+ out[n] = tmp[n] ^ ivec[n];
+ }
+
+ memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+ }
+ }
+}
+
+SEEDContext *
+SEED_AllocateContext(void)
+{
+ return PORT_ZNew(SEEDContext);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv,
+ int mode, unsigned int encrypt, unsigned int unused)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (mode) {
+ case NSS_SEED:
+ SEED_set_key(key, &cx->ks);
+ cx->mode = NSS_SEED;
+ cx->encrypt = encrypt;
+ break;
+
+ case NSS_SEED_CBC:
+ memcpy(cx->iv, iv, 16);
+ SEED_set_key(key, &cx->ks);
+ cx->mode = NSS_SEED_CBC;
+ cx->encrypt = encrypt;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ SEEDContext *cx = PORT_ZNew(SEEDContext);
+ SECStatus rv = SEED_InitContext(cx, key, SEED_KEY_LENGTH, iv, mode,
+ encrypt, 0);
+
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof *cx);
+ cx = NULL;
+ }
+
+ return cx;
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+
+ if (freeit)
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+ unsigned int maxOutLen, const unsigned char *in,
+ unsigned int inLen)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if ((inLen % SEED_BLOCK_SIZE) != 0 || maxOutLen < SEED_BLOCK_SIZE ||
+ maxOutLen < inLen) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (!cx->encrypt) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (cx->mode) {
+ case NSS_SEED:
+ SEED_ecb_encrypt(in, out, inLen, &cx->ks, 1);
+ *outLen = inLen;
+ break;
+
+ case NSS_SEED_CBC:
+ SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 1);
+ *outLen = inLen;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+ unsigned int maxOutLen, const unsigned char *in,
+ unsigned int inLen)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if ((inLen % SEED_BLOCK_SIZE) != 0 || maxOutLen < SEED_BLOCK_SIZE ||
+ maxOutLen < inLen) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (cx->encrypt) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (cx->mode) {
+ case NSS_SEED:
+ SEED_ecb_encrypt(in, out, inLen, &cx->ks, 0);
+ *outLen = inLen;
+ break;
+
+ case NSS_SEED_CBC:
+ SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 0);
+ *outLen = inLen;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/deprecated/seed.h b/security/nss/lib/freebl/deprecated/seed.h
new file mode 100644
index 0000000000..717a1e74ed
--- /dev/null
+++ b/security/nss/lib/freebl/deprecated/seed.h
@@ -0,0 +1,125 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef HEADER_SEED_H
+#define HEADER_SEED_H
+
+#include <string.h>
+#include "blapi.h"
+
+#if !defined(NO_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+typedef PRUint32 seed_word;
+
+#define G_FUNC(v) \
+ SS[0][((v)&0xff)] ^ \
+ SS[1][((v) >> 8 & 0xff)] ^ \
+ SS[2][((v) >> 16 & 0xff)] ^ \
+ SS[3][((v) >> 24 & 0xff)]
+
+#define char2word(c, i) \
+ (i) = ((((seed_word)((c)[0])) << 24) | \
+ (((seed_word)((c)[1])) << 16) | \
+ (((seed_word)((c)[2])) << 8) | \
+ ((seed_word)((c)[3])))
+
+#define word2char(l, c) \
+ *((c) + 0) = (unsigned char)((l) >> 24); \
+ *((c) + 1) = (unsigned char)((l) >> 16); \
+ *((c) + 2) = (unsigned char)((l) >> 8); \
+ *((c) + 3) = (unsigned char)((l))
+
+#define KEYSCHEDULE_UPDATE0(T0, T1, K0, K1, K2, K3, KC) \
+ (T0) = (K2); \
+ (K2) = (((K2) << 8) ^ ((K3) >> 24)); \
+ (K3) = (((K3) << 8) ^ ((T0) >> 24)); \
+ (T0) = ((K0) + (K2) - (KC)); \
+ (T1) = ((K1) + (KC) - (K3))
+
+#define KEYSCHEDULE_UPDATE1(T0, T1, K0, K1, K2, K3, KC) \
+ (T0) = (K0); \
+ (K0) = (((K0) >> 8) ^ ((K1) << 24)); \
+ (K1) = (((K1) >> 8) ^ ((T0) << 24)); \
+ (T0) = ((K0) + (K2) - (KC)); \
+ (T1) = ((K1) + (KC) - (K3))
+
+#define KEYUPDATE_TEMP(T0, T1, K) \
+ (K)[0] = G_FUNC((T0)); \
+ (K)[1] = G_FUNC((T1))
+
+#define XOR_SEEDBLOCK(DST, SRC) \
+ (DST)[0] ^= (SRC)[0]; \
+ (DST)[1] ^= (SRC)[1]; \
+ (DST)[2] ^= (SRC)[2]; \
+ (DST)[3] ^= (SRC)[3]
+
+#define MOV_SEEDBLOCK(DST, SRC) \
+ (DST)[0] = (SRC)[0]; \
+ (DST)[1] = (SRC)[1]; \
+ (DST)[2] = (SRC)[2]; \
+ (DST)[3] = (SRC)[3]
+
+#define CHAR2WORD(C, I) \
+ char2word((C), (I)[0]); \
+ char2word((C) + 4, (I)[1]); \
+ char2word((C) + 8, (I)[2]); \
+ char2word((C) + 12, (I)[3])
+
+#define WORD2CHAR(I, C) \
+ word2char((I)[0], (C)); \
+ word2char((I)[1], (C + 4)); \
+ word2char((I)[2], (C + 8)); \
+ word2char((I)[3], (C + 12))
+
+#define E_SEED(T0, T1, X1, X2, X3, X4, rbase) \
+ (T0) = (X3) ^ (ks->data)[(rbase)]; \
+ (T1) = (X4) ^ (ks->data)[(rbase) + 1]; \
+ (T1) ^= (T0); \
+ (T1) = G_FUNC(T1); \
+ (T0) += (T1); \
+ (T0) = G_FUNC(T0); \
+ (T1) += (T0); \
+ (T1) = G_FUNC(T1); \
+ (T0) += (T1); \
+ (X1) ^= (T0); \
+ (X2) ^= (T1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct seed_key_st {
+ PRUint32 data[32];
+} SEED_KEY_SCHEDULE;
+
+struct SEEDContextStr {
+ unsigned char iv[SEED_BLOCK_SIZE];
+ SEED_KEY_SCHEDULE ks;
+ int mode;
+ unsigned int encrypt;
+};
+
+void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+ SEED_KEY_SCHEDULE *ks);
+
+void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks);
+void SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks);
+
+void SEED_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ size_t inLen, const SEED_KEY_SCHEDULE *ks, int enc);
+void SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SEED_KEY_SCHEDULE *ks,
+ unsigned char ivec[SEED_BLOCK_SIZE], int enc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_SEED_H */
diff --git a/security/nss/lib/freebl/des.c b/security/nss/lib/freebl/des.c
new file mode 100644
index 0000000000..fd433bbb2f
--- /dev/null
+++ b/security/nss/lib/freebl/des.c
@@ -0,0 +1,676 @@
+/*
+ * des.c
+ *
+ * core source file for DES-150 library
+ * Make key schedule from DES key.
+ * Encrypt/Decrypt one 8-byte block.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h> /* for ptrdiff_t */
+/* #define USE_INDEXING 1 */
+
+/*
+ * The tables below are the 8 sbox functions, with the 6-bit input permutation
+ * and the 32-bit output permutation pre-computed.
+ * They are shifted circularly to the left 3 bits, which removes 2 shifts
+ * and an or from each round by reducing the number of sboxes whose
+ * indices cross word broundaries from 2 to 1.
+ */
+
+static const HALF SP[8][64] = {
+ /* Box S1 */
+ { 0x04041000, 0x00000000, 0x00040000, 0x04041010,
+ 0x04040010, 0x00041010, 0x00000010, 0x00040000,
+ 0x00001000, 0x04041000, 0x04041010, 0x00001000,
+ 0x04001010, 0x04040010, 0x04000000, 0x00000010,
+ 0x00001010, 0x04001000, 0x04001000, 0x00041000,
+ 0x00041000, 0x04040000, 0x04040000, 0x04001010,
+ 0x00040010, 0x04000010, 0x04000010, 0x00040010,
+ 0x00000000, 0x00001010, 0x00041010, 0x04000000,
+ 0x00040000, 0x04041010, 0x00000010, 0x04040000,
+ 0x04041000, 0x04000000, 0x04000000, 0x00001000,
+ 0x04040010, 0x00040000, 0x00041000, 0x04000010,
+ 0x00001000, 0x00000010, 0x04001010, 0x00041010,
+ 0x04041010, 0x00040010, 0x04040000, 0x04001010,
+ 0x04000010, 0x00001010, 0x00041010, 0x04041000,
+ 0x00001010, 0x04001000, 0x04001000, 0x00000000,
+ 0x00040010, 0x00041000, 0x00000000, 0x04040010 },
+ /* Box S2 */
+ { 0x00420082, 0x00020002, 0x00020000, 0x00420080,
+ 0x00400000, 0x00000080, 0x00400082, 0x00020082,
+ 0x00000082, 0x00420082, 0x00420002, 0x00000002,
+ 0x00020002, 0x00400000, 0x00000080, 0x00400082,
+ 0x00420000, 0x00400080, 0x00020082, 0x00000000,
+ 0x00000002, 0x00020000, 0x00420080, 0x00400002,
+ 0x00400080, 0x00000082, 0x00000000, 0x00420000,
+ 0x00020080, 0x00420002, 0x00400002, 0x00020080,
+ 0x00000000, 0x00420080, 0x00400082, 0x00400000,
+ 0x00020082, 0x00400002, 0x00420002, 0x00020000,
+ 0x00400002, 0x00020002, 0x00000080, 0x00420082,
+ 0x00420080, 0x00000080, 0x00020000, 0x00000002,
+ 0x00020080, 0x00420002, 0x00400000, 0x00000082,
+ 0x00400080, 0x00020082, 0x00000082, 0x00400080,
+ 0x00420000, 0x00000000, 0x00020002, 0x00020080,
+ 0x00000002, 0x00400082, 0x00420082, 0x00420000 },
+ /* Box S3 */
+ { 0x00000820, 0x20080800, 0x00000000, 0x20080020,
+ 0x20000800, 0x00000000, 0x00080820, 0x20000800,
+ 0x00080020, 0x20000020, 0x20000020, 0x00080000,
+ 0x20080820, 0x00080020, 0x20080000, 0x00000820,
+ 0x20000000, 0x00000020, 0x20080800, 0x00000800,
+ 0x00080800, 0x20080000, 0x20080020, 0x00080820,
+ 0x20000820, 0x00080800, 0x00080000, 0x20000820,
+ 0x00000020, 0x20080820, 0x00000800, 0x20000000,
+ 0x20080800, 0x20000000, 0x00080020, 0x00000820,
+ 0x00080000, 0x20080800, 0x20000800, 0x00000000,
+ 0x00000800, 0x00080020, 0x20080820, 0x20000800,
+ 0x20000020, 0x00000800, 0x00000000, 0x20080020,
+ 0x20000820, 0x00080000, 0x20000000, 0x20080820,
+ 0x00000020, 0x00080820, 0x00080800, 0x20000020,
+ 0x20080000, 0x20000820, 0x00000820, 0x20080000,
+ 0x00080820, 0x00000020, 0x20080020, 0x00080800 },
+ /* Box S4 */
+ { 0x02008004, 0x00008204, 0x00008204, 0x00000200,
+ 0x02008200, 0x02000204, 0x02000004, 0x00008004,
+ 0x00000000, 0x02008000, 0x02008000, 0x02008204,
+ 0x00000204, 0x00000000, 0x02000200, 0x02000004,
+ 0x00000004, 0x00008000, 0x02000000, 0x02008004,
+ 0x00000200, 0x02000000, 0x00008004, 0x00008200,
+ 0x02000204, 0x00000004, 0x00008200, 0x02000200,
+ 0x00008000, 0x02008200, 0x02008204, 0x00000204,
+ 0x02000200, 0x02000004, 0x02008000, 0x02008204,
+ 0x00000204, 0x00000000, 0x00000000, 0x02008000,
+ 0x00008200, 0x02000200, 0x02000204, 0x00000004,
+ 0x02008004, 0x00008204, 0x00008204, 0x00000200,
+ 0x02008204, 0x00000204, 0x00000004, 0x00008000,
+ 0x02000004, 0x00008004, 0x02008200, 0x02000204,
+ 0x00008004, 0x00008200, 0x02000000, 0x02008004,
+ 0x00000200, 0x02000000, 0x00008000, 0x02008200 },
+ /* Box S5 */
+ { 0x00000400, 0x08200400, 0x08200000, 0x08000401,
+ 0x00200000, 0x00000400, 0x00000001, 0x08200000,
+ 0x00200401, 0x00200000, 0x08000400, 0x00200401,
+ 0x08000401, 0x08200001, 0x00200400, 0x00000001,
+ 0x08000000, 0x00200001, 0x00200001, 0x00000000,
+ 0x00000401, 0x08200401, 0x08200401, 0x08000400,
+ 0x08200001, 0x00000401, 0x00000000, 0x08000001,
+ 0x08200400, 0x08000000, 0x08000001, 0x00200400,
+ 0x00200000, 0x08000401, 0x00000400, 0x08000000,
+ 0x00000001, 0x08200000, 0x08000401, 0x00200401,
+ 0x08000400, 0x00000001, 0x08200001, 0x08200400,
+ 0x00200401, 0x00000400, 0x08000000, 0x08200001,
+ 0x08200401, 0x00200400, 0x08000001, 0x08200401,
+ 0x08200000, 0x00000000, 0x00200001, 0x08000001,
+ 0x00200400, 0x08000400, 0x00000401, 0x00200000,
+ 0x00000000, 0x00200001, 0x08200400, 0x00000401 },
+ /* Box S6 */
+ { 0x80000040, 0x81000000, 0x00010000, 0x81010040,
+ 0x81000000, 0x00000040, 0x81010040, 0x01000000,
+ 0x80010000, 0x01010040, 0x01000000, 0x80000040,
+ 0x01000040, 0x80010000, 0x80000000, 0x00010040,
+ 0x00000000, 0x01000040, 0x80010040, 0x00010000,
+ 0x01010000, 0x80010040, 0x00000040, 0x81000040,
+ 0x81000040, 0x00000000, 0x01010040, 0x81010000,
+ 0x00010040, 0x01010000, 0x81010000, 0x80000000,
+ 0x80010000, 0x00000040, 0x81000040, 0x01010000,
+ 0x81010040, 0x01000000, 0x00010040, 0x80000040,
+ 0x01000000, 0x80010000, 0x80000000, 0x00010040,
+ 0x80000040, 0x81010040, 0x01010000, 0x81000000,
+ 0x01010040, 0x81010000, 0x00000000, 0x81000040,
+ 0x00000040, 0x00010000, 0x81000000, 0x01010040,
+ 0x00010000, 0x01000040, 0x80010040, 0x00000000,
+ 0x81010000, 0x80000000, 0x01000040, 0x80010040 },
+ /* Box S7 */
+ { 0x00800000, 0x10800008, 0x10002008, 0x00000000,
+ 0x00002000, 0x10002008, 0x00802008, 0x10802000,
+ 0x10802008, 0x00800000, 0x00000000, 0x10000008,
+ 0x00000008, 0x10000000, 0x10800008, 0x00002008,
+ 0x10002000, 0x00802008, 0x00800008, 0x10002000,
+ 0x10000008, 0x10800000, 0x10802000, 0x00800008,
+ 0x10800000, 0x00002000, 0x00002008, 0x10802008,
+ 0x00802000, 0x00000008, 0x10000000, 0x00802000,
+ 0x10000000, 0x00802000, 0x00800000, 0x10002008,
+ 0x10002008, 0x10800008, 0x10800008, 0x00000008,
+ 0x00800008, 0x10000000, 0x10002000, 0x00800000,
+ 0x10802000, 0x00002008, 0x00802008, 0x10802000,
+ 0x00002008, 0x10000008, 0x10802008, 0x10800000,
+ 0x00802000, 0x00000000, 0x00000008, 0x10802008,
+ 0x00000000, 0x00802008, 0x10800000, 0x00002000,
+ 0x10000008, 0x10002000, 0x00002000, 0x00800008 },
+ /* Box S8 */
+ { 0x40004100, 0x00004000, 0x00100000, 0x40104100,
+ 0x40000000, 0x40004100, 0x00000100, 0x40000000,
+ 0x00100100, 0x40100000, 0x40104100, 0x00104000,
+ 0x40104000, 0x00104100, 0x00004000, 0x00000100,
+ 0x40100000, 0x40000100, 0x40004000, 0x00004100,
+ 0x00104000, 0x00100100, 0x40100100, 0x40104000,
+ 0x00004100, 0x00000000, 0x00000000, 0x40100100,
+ 0x40000100, 0x40004000, 0x00104100, 0x00100000,
+ 0x00104100, 0x00100000, 0x40104000, 0x00004000,
+ 0x00000100, 0x40100100, 0x00004000, 0x00104100,
+ 0x40004000, 0x00000100, 0x40000100, 0x40100000,
+ 0x40100100, 0x40000000, 0x00100000, 0x40004100,
+ 0x00000000, 0x40104100, 0x00100100, 0x40000100,
+ 0x40100000, 0x40004000, 0x40004100, 0x00000000,
+ 0x40104100, 0x00104000, 0x00104000, 0x00004100,
+ 0x00004100, 0x00100100, 0x40000000, 0x40104000 }
+};
+
+static const HALF PC2[8][64] = {
+ /* table 0 */
+ { 0x00000000, 0x00001000, 0x04000000, 0x04001000,
+ 0x00100000, 0x00101000, 0x04100000, 0x04101000,
+ 0x00008000, 0x00009000, 0x04008000, 0x04009000,
+ 0x00108000, 0x00109000, 0x04108000, 0x04109000,
+ 0x00000004, 0x00001004, 0x04000004, 0x04001004,
+ 0x00100004, 0x00101004, 0x04100004, 0x04101004,
+ 0x00008004, 0x00009004, 0x04008004, 0x04009004,
+ 0x00108004, 0x00109004, 0x04108004, 0x04109004,
+ 0x08000000, 0x08001000, 0x0c000000, 0x0c001000,
+ 0x08100000, 0x08101000, 0x0c100000, 0x0c101000,
+ 0x08008000, 0x08009000, 0x0c008000, 0x0c009000,
+ 0x08108000, 0x08109000, 0x0c108000, 0x0c109000,
+ 0x08000004, 0x08001004, 0x0c000004, 0x0c001004,
+ 0x08100004, 0x08101004, 0x0c100004, 0x0c101004,
+ 0x08008004, 0x08009004, 0x0c008004, 0x0c009004,
+ 0x08108004, 0x08109004, 0x0c108004, 0x0c109004 },
+ /* table 1 */
+ { 0x00000000, 0x00002000, 0x80000000, 0x80002000,
+ 0x00000008, 0x00002008, 0x80000008, 0x80002008,
+ 0x00200000, 0x00202000, 0x80200000, 0x80202000,
+ 0x00200008, 0x00202008, 0x80200008, 0x80202008,
+ 0x20000000, 0x20002000, 0xa0000000, 0xa0002000,
+ 0x20000008, 0x20002008, 0xa0000008, 0xa0002008,
+ 0x20200000, 0x20202000, 0xa0200000, 0xa0202000,
+ 0x20200008, 0x20202008, 0xa0200008, 0xa0202008,
+ 0x00000400, 0x00002400, 0x80000400, 0x80002400,
+ 0x00000408, 0x00002408, 0x80000408, 0x80002408,
+ 0x00200400, 0x00202400, 0x80200400, 0x80202400,
+ 0x00200408, 0x00202408, 0x80200408, 0x80202408,
+ 0x20000400, 0x20002400, 0xa0000400, 0xa0002400,
+ 0x20000408, 0x20002408, 0xa0000408, 0xa0002408,
+ 0x20200400, 0x20202400, 0xa0200400, 0xa0202400,
+ 0x20200408, 0x20202408, 0xa0200408, 0xa0202408 },
+ /* table 2 */
+ { 0x00000000, 0x00004000, 0x00000020, 0x00004020,
+ 0x00080000, 0x00084000, 0x00080020, 0x00084020,
+ 0x00000800, 0x00004800, 0x00000820, 0x00004820,
+ 0x00080800, 0x00084800, 0x00080820, 0x00084820,
+ 0x00000010, 0x00004010, 0x00000030, 0x00004030,
+ 0x00080010, 0x00084010, 0x00080030, 0x00084030,
+ 0x00000810, 0x00004810, 0x00000830, 0x00004830,
+ 0x00080810, 0x00084810, 0x00080830, 0x00084830,
+ 0x00400000, 0x00404000, 0x00400020, 0x00404020,
+ 0x00480000, 0x00484000, 0x00480020, 0x00484020,
+ 0x00400800, 0x00404800, 0x00400820, 0x00404820,
+ 0x00480800, 0x00484800, 0x00480820, 0x00484820,
+ 0x00400010, 0x00404010, 0x00400030, 0x00404030,
+ 0x00480010, 0x00484010, 0x00480030, 0x00484030,
+ 0x00400810, 0x00404810, 0x00400830, 0x00404830,
+ 0x00480810, 0x00484810, 0x00480830, 0x00484830 },
+ /* table 3 */
+ { 0x00000000, 0x40000000, 0x00000080, 0x40000080,
+ 0x00040000, 0x40040000, 0x00040080, 0x40040080,
+ 0x00000040, 0x40000040, 0x000000c0, 0x400000c0,
+ 0x00040040, 0x40040040, 0x000400c0, 0x400400c0,
+ 0x10000000, 0x50000000, 0x10000080, 0x50000080,
+ 0x10040000, 0x50040000, 0x10040080, 0x50040080,
+ 0x10000040, 0x50000040, 0x100000c0, 0x500000c0,
+ 0x10040040, 0x50040040, 0x100400c0, 0x500400c0,
+ 0x00800000, 0x40800000, 0x00800080, 0x40800080,
+ 0x00840000, 0x40840000, 0x00840080, 0x40840080,
+ 0x00800040, 0x40800040, 0x008000c0, 0x408000c0,
+ 0x00840040, 0x40840040, 0x008400c0, 0x408400c0,
+ 0x10800000, 0x50800000, 0x10800080, 0x50800080,
+ 0x10840000, 0x50840000, 0x10840080, 0x50840080,
+ 0x10800040, 0x50800040, 0x108000c0, 0x508000c0,
+ 0x10840040, 0x50840040, 0x108400c0, 0x508400c0 },
+ /* table 4 */
+ { 0x00000000, 0x00000008, 0x08000000, 0x08000008,
+ 0x00040000, 0x00040008, 0x08040000, 0x08040008,
+ 0x00002000, 0x00002008, 0x08002000, 0x08002008,
+ 0x00042000, 0x00042008, 0x08042000, 0x08042008,
+ 0x80000000, 0x80000008, 0x88000000, 0x88000008,
+ 0x80040000, 0x80040008, 0x88040000, 0x88040008,
+ 0x80002000, 0x80002008, 0x88002000, 0x88002008,
+ 0x80042000, 0x80042008, 0x88042000, 0x88042008,
+ 0x00080000, 0x00080008, 0x08080000, 0x08080008,
+ 0x000c0000, 0x000c0008, 0x080c0000, 0x080c0008,
+ 0x00082000, 0x00082008, 0x08082000, 0x08082008,
+ 0x000c2000, 0x000c2008, 0x080c2000, 0x080c2008,
+ 0x80080000, 0x80080008, 0x88080000, 0x88080008,
+ 0x800c0000, 0x800c0008, 0x880c0000, 0x880c0008,
+ 0x80082000, 0x80082008, 0x88082000, 0x88082008,
+ 0x800c2000, 0x800c2008, 0x880c2000, 0x880c2008 },
+ /* table 5 */
+ { 0x00000000, 0x00400000, 0x00008000, 0x00408000,
+ 0x40000000, 0x40400000, 0x40008000, 0x40408000,
+ 0x00000020, 0x00400020, 0x00008020, 0x00408020,
+ 0x40000020, 0x40400020, 0x40008020, 0x40408020,
+ 0x00001000, 0x00401000, 0x00009000, 0x00409000,
+ 0x40001000, 0x40401000, 0x40009000, 0x40409000,
+ 0x00001020, 0x00401020, 0x00009020, 0x00409020,
+ 0x40001020, 0x40401020, 0x40009020, 0x40409020,
+ 0x00100000, 0x00500000, 0x00108000, 0x00508000,
+ 0x40100000, 0x40500000, 0x40108000, 0x40508000,
+ 0x00100020, 0x00500020, 0x00108020, 0x00508020,
+ 0x40100020, 0x40500020, 0x40108020, 0x40508020,
+ 0x00101000, 0x00501000, 0x00109000, 0x00509000,
+ 0x40101000, 0x40501000, 0x40109000, 0x40509000,
+ 0x00101020, 0x00501020, 0x00109020, 0x00509020,
+ 0x40101020, 0x40501020, 0x40109020, 0x40509020 },
+ /* table 6 */
+ { 0x00000000, 0x00000040, 0x04000000, 0x04000040,
+ 0x00000800, 0x00000840, 0x04000800, 0x04000840,
+ 0x00800000, 0x00800040, 0x04800000, 0x04800040,
+ 0x00800800, 0x00800840, 0x04800800, 0x04800840,
+ 0x10000000, 0x10000040, 0x14000000, 0x14000040,
+ 0x10000800, 0x10000840, 0x14000800, 0x14000840,
+ 0x10800000, 0x10800040, 0x14800000, 0x14800040,
+ 0x10800800, 0x10800840, 0x14800800, 0x14800840,
+ 0x00000080, 0x000000c0, 0x04000080, 0x040000c0,
+ 0x00000880, 0x000008c0, 0x04000880, 0x040008c0,
+ 0x00800080, 0x008000c0, 0x04800080, 0x048000c0,
+ 0x00800880, 0x008008c0, 0x04800880, 0x048008c0,
+ 0x10000080, 0x100000c0, 0x14000080, 0x140000c0,
+ 0x10000880, 0x100008c0, 0x14000880, 0x140008c0,
+ 0x10800080, 0x108000c0, 0x14800080, 0x148000c0,
+ 0x10800880, 0x108008c0, 0x14800880, 0x148008c0 },
+ /* table 7 */
+ { 0x00000000, 0x00000010, 0x00000400, 0x00000410,
+ 0x00000004, 0x00000014, 0x00000404, 0x00000414,
+ 0x00004000, 0x00004010, 0x00004400, 0x00004410,
+ 0x00004004, 0x00004014, 0x00004404, 0x00004414,
+ 0x20000000, 0x20000010, 0x20000400, 0x20000410,
+ 0x20000004, 0x20000014, 0x20000404, 0x20000414,
+ 0x20004000, 0x20004010, 0x20004400, 0x20004410,
+ 0x20004004, 0x20004014, 0x20004404, 0x20004414,
+ 0x00200000, 0x00200010, 0x00200400, 0x00200410,
+ 0x00200004, 0x00200014, 0x00200404, 0x00200414,
+ 0x00204000, 0x00204010, 0x00204400, 0x00204410,
+ 0x00204004, 0x00204014, 0x00204404, 0x00204414,
+ 0x20200000, 0x20200010, 0x20200400, 0x20200410,
+ 0x20200004, 0x20200014, 0x20200404, 0x20200414,
+ 0x20204000, 0x20204010, 0x20204400, 0x20204410,
+ 0x20204004, 0x20204014, 0x20204404, 0x20204414 }
+};
+
+/*
+ * The PC-1 Permutation
+ * If we number the bits of the 8 bytes of key input like this (in octal):
+ * 00 01 02 03 04 05 06 07
+ * 10 11 12 13 14 15 16 17
+ * 20 21 22 23 24 25 26 27
+ * 30 31 32 33 34 35 36 37
+ * 40 41 42 43 44 45 46 47
+ * 50 51 52 53 54 55 56 57
+ * 60 61 62 63 64 65 66 67
+ * 70 71 72 73 74 75 76 77
+ * then after the PC-1 permutation,
+ * C0 is
+ * 70 60 50 40 30 20 10 00
+ * 71 61 51 41 31 21 11 01
+ * 72 62 52 42 32 22 12 02
+ * 73 63 53 43
+ * D0 is
+ * 76 66 56 46 36 26 16 06
+ * 75 65 55 45 35 25 15 05
+ * 74 64 54 44 34 24 14 04
+ * 33 23 13 03
+ * and these parity bits have been discarded:
+ * 77 67 57 47 37 27 17 07
+ *
+ * We achieve this by flipping the input matrix about the diagonal from 70-07,
+ * getting left =
+ * 77 67 57 47 37 27 17 07 (these are the parity bits)
+ * 76 66 56 46 36 26 16 06
+ * 75 65 55 45 35 25 15 05
+ * 74 64 54 44 34 24 14 04
+ * right =
+ * 73 63 53 43 33 23 13 03
+ * 72 62 52 42 32 22 12 02
+ * 71 61 51 41 31 21 11 01
+ * 70 60 50 40 30 20 10 00
+ * then byte swap right, ala htonl() on a little endian machine.
+ * right =
+ * 70 60 50 40 30 20 10 00
+ * 71 67 57 47 37 27 11 07
+ * 72 62 52 42 32 22 12 02
+ * 73 63 53 43 33 23 13 03
+ * then
+ * c0 = right >> 4;
+ * d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+*/
+
+#define FLIP_RIGHT_DIAGONAL(word, temp) \
+ temp = (word ^ (word >> 18)) & 0x00003333; \
+ word ^= temp | (temp << 18); \
+ temp = (word ^ (word >> 9)) & 0x00550055; \
+ word ^= temp | (temp << 9);
+
+#if defined(__GNUC__) && defined(NSS_X86_OR_X64)
+#define BYTESWAP(word, temp) \
+ __asm("bswap %0" \
+ : "+r"(word));
+#elif (_MSC_VER >= 1300) && defined(NSS_X86_OR_X64)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define BYTESWAP(word, temp) \
+ word = _byteswap_ulong(word);
+#elif defined(__GNUC__) && (defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__))))
+#define BYTESWAP(word, temp) \
+ __asm("rev %0, %0" \
+ : "+r"(word));
+#else
+#define BYTESWAP(word, temp) \
+ word = (word >> 16) | (word << 16); \
+ temp = 0x00ff00ff; \
+ word = ((word & temp) << 8) | ((word >> 8) & temp);
+#endif
+
+#define PC1(left, right, c0, d0, temp) \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4; \
+ FLIP_RIGHT_DIAGONAL(left, temp); \
+ FLIP_RIGHT_DIAGONAL(right, temp); \
+ BYTESWAP(right, temp); \
+ c0 = right >> 4; \
+ d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+
+#define LEFT_SHIFT_1(reg) (((reg << 1) | (reg >> 27)) & 0x0FFFFFFF)
+#define LEFT_SHIFT_2(reg) (((reg << 2) | (reg >> 26)) & 0x0FFFFFFF)
+
+/*
+ * setup key schedules from key
+ */
+
+void
+DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction)
+{
+ register HALF left, right;
+ register HALF c0, d0;
+ register HALF temp;
+ int delta;
+ unsigned int ls;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ left = HALFPTR(key)[0];
+ right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+#else
+ if (((ptrdiff_t)key & 0x03) == 0) {
+ left = HALFPTR(key)[0];
+ right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ } else {
+ left = ((HALF)key[0] << 24) | ((HALF)key[1] << 16) |
+ ((HALF)key[2] << 8) | key[3];
+ right = ((HALF)key[4] << 24) | ((HALF)key[5] << 16) |
+ ((HALF)key[6] << 8) | key[7];
+ }
+#endif
+
+ PC1(left, right, c0, d0, temp);
+
+ if (direction == DES_ENCRYPT) {
+ delta = 2 * (int)sizeof(HALF);
+ } else {
+ ks += 30;
+ delta = (-2) * (int)sizeof(HALF);
+ }
+
+ for (ls = 0x8103; ls; ls >>= 1) {
+ if (ls & 1) {
+ c0 = LEFT_SHIFT_1(c0);
+ d0 = LEFT_SHIFT_1(d0);
+ } else {
+ c0 = LEFT_SHIFT_2(c0);
+ d0 = LEFT_SHIFT_2(d0);
+ }
+
+#ifdef USE_INDEXING
+#define PC2LOOKUP(b, c) PC2[b][c]
+
+ left = PC2LOOKUP(0, ((c0 >> 22) & 0x3F));
+ left |= PC2LOOKUP(1, ((c0 >> 13) & 0x3F));
+ left |= PC2LOOKUP(2, ((c0 >> 4) & 0x38) | (c0 & 0x7));
+ left |= PC2LOOKUP(3, ((c0 >> 18) & 0xC) | ((c0 >> 11) & 0x3) | (c0 & 0x30));
+
+ right = PC2LOOKUP(4, ((d0 >> 22) & 0x3F));
+ right |= PC2LOOKUP(5, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf));
+ right |= PC2LOOKUP(6, ((d0 >> 7) & 0x3F));
+ right |= PC2LOOKUP(7, ((d0 >> 1) & 0x3C) | (d0 & 0x3));
+#else
+#define PC2LOOKUP(b, c) *(HALF *)((BYTE *)&PC2[b][0] + (c))
+
+ left = PC2LOOKUP(0, ((c0 >> 20) & 0xFC));
+ left |= PC2LOOKUP(1, ((c0 >> 11) & 0xFC));
+ left |= PC2LOOKUP(2, ((c0 >> 2) & 0xE0) | ((c0 << 2) & 0x1C));
+ left |= PC2LOOKUP(3, ((c0 >> 16) & 0x30) | ((c0 >> 9) & 0xC) | ((c0 << 2) & 0xC0));
+
+ right = PC2LOOKUP(4, ((d0 >> 20) & 0xFC));
+ right |= PC2LOOKUP(5, ((d0 >> 13) & 0xC0) | ((d0 >> 12) & 0x3C));
+ right |= PC2LOOKUP(6, ((d0 >> 5) & 0xFC));
+ right |= PC2LOOKUP(7, ((d0 << 1) & 0xF0) | ((d0 << 2) & 0x0C));
+#endif
+ /* left contains key bits for S1 S3 S2 S4 */
+ /* right contains key bits for S6 S8 S5 S7 */
+ temp = (left << 16) /* S2 S4 XX XX */
+ | (right >> 16); /* XX XX S6 S8 */
+ ks[0] = temp;
+
+ temp = (left & 0xffff0000) /* S1 S3 XX XX */
+ | (right & 0x0000ffff); /* XX XX S5 S7 */
+ ks[1] = temp;
+
+ ks = (HALF *)((BYTE *)ks + delta);
+ }
+}
+
+/*
+ * The DES Initial Permutation
+ * if we number the bits of the 8 bytes of input like this (in octal):
+ * 00 01 02 03 04 05 06 07
+ * 10 11 12 13 14 15 16 17
+ * 20 21 22 23 24 25 26 27
+ * 30 31 32 33 34 35 36 37
+ * 40 41 42 43 44 45 46 47
+ * 50 51 52 53 54 55 56 57
+ * 60 61 62 63 64 65 66 67
+ * 70 71 72 73 74 75 76 77
+ * then after the initial permutation, they will be in this order.
+ * 71 61 51 41 31 21 11 01
+ * 73 63 53 43 33 23 13 03
+ * 75 65 55 45 35 25 15 05
+ * 77 67 57 47 37 27 17 07
+ * 70 60 50 40 30 20 10 00
+ * 72 62 52 42 32 22 12 02
+ * 74 64 54 44 34 24 14 04
+ * 76 66 56 46 36 26 16 06
+ *
+ * One way to do this is in two steps:
+ * 1. Flip this matrix about the diagonal from 70-07 as done for PC1.
+ * 2. Rearrange the bytes (rows in the matrix above) with the following code.
+ *
+ * #define swapHiLo(word, temp) \
+ * temp = (word ^ (word >> 24)) & 0x000000ff; \
+ * word ^= temp | (temp << 24);
+ *
+ * right ^= temp = ((left << 8) ^ right) & 0xff00ff00;
+ * left ^= temp >> 8;
+ * swapHiLo(left, temp);
+ * swapHiLo(right,temp);
+ *
+ * However, the two steps can be combined, so that the rows are rearranged
+ * while the matrix is being flipped, reducing the number of bit exchange
+ * operations from 8 ot 5.
+ *
+ * Initial Permutation */
+#define IP(left, right, temp) \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4; \
+ right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+ left ^= temp << 16; \
+ right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \
+ left ^= temp >> 2; \
+ right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \
+ left ^= temp >> 8; \
+ right ^= temp = ((left >> 1) ^ right) & 0x55555555; \
+ left ^= temp << 1;
+
+/* The Final (Inverse Initial) permutation is done by reversing the
+** steps of the Initital Permutation
+*/
+
+#define FP(left, right, temp) \
+ right ^= temp = ((left >> 1) ^ right) & 0x55555555; \
+ left ^= temp << 1; \
+ right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \
+ left ^= temp >> 8; \
+ right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \
+ left ^= temp >> 2; \
+ right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+ left ^= temp << 16; \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4;
+
+void NO_SANITIZE_ALIGNMENT
+DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf)
+{
+ register HALF left, right;
+ register HALF temp;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ left = HALFPTR(inbuf)[0];
+ right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+#else
+ if (((ptrdiff_t)inbuf & 0x03) == 0) {
+ left = HALFPTR(inbuf)[0];
+ right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ } else {
+ left = ((HALF)inbuf[0] << 24) | ((HALF)inbuf[1] << 16) |
+ ((HALF)inbuf[2] << 8) | inbuf[3];
+ right = ((HALF)inbuf[4] << 24) | ((HALF)inbuf[5] << 16) |
+ ((HALF)inbuf[6] << 8) | inbuf[7];
+ }
+#endif
+
+ IP(left, right, temp);
+
+ /* shift the values left circularly 3 bits. */
+ left = (left << 3) | (left >> 29);
+ right = (right << 3) | (right >> 29);
+
+#ifdef USE_INDEXING
+#define KSLOOKUP(s, b) SP[s][((temp >> (b + 2)) & 0x3f)]
+#else
+#define KSLOOKUP(s, b) *(HALF *)((BYTE *)&SP[s][0] + ((temp >> b) & 0xFC))
+#endif
+#define ROUND(out, in, r) \
+ temp = in ^ ks[2 * r]; \
+ out ^= KSLOOKUP(1, 24); \
+ out ^= KSLOOKUP(3, 16); \
+ out ^= KSLOOKUP(5, 8); \
+ out ^= KSLOOKUP(7, 0); \
+ temp = ((in >> 4) | (in << 28)) ^ ks[2 * r + 1]; \
+ out ^= KSLOOKUP(0, 24); \
+ out ^= KSLOOKUP(2, 16); \
+ out ^= KSLOOKUP(4, 8); \
+ out ^= KSLOOKUP(6, 0);
+
+ /* Do the 16 Feistel rounds */
+ ROUND(left, right, 0)
+ ROUND(right, left, 1)
+ ROUND(left, right, 2)
+ ROUND(right, left, 3)
+ ROUND(left, right, 4)
+ ROUND(right, left, 5)
+ ROUND(left, right, 6)
+ ROUND(right, left, 7)
+ ROUND(left, right, 8)
+ ROUND(right, left, 9)
+ ROUND(left, right, 10)
+ ROUND(right, left, 11)
+ ROUND(left, right, 12)
+ ROUND(right, left, 13)
+ ROUND(left, right, 14)
+ ROUND(right, left, 15)
+
+ /* now shift circularly right 3 bits to undo the shifting done
+ ** above. switch left and right here.
+ */
+ temp = (left >> 3) | (left << 29);
+ left = (right >> 3) | (right << 29);
+ right = temp;
+
+ FP(left, right, temp);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ HALFPTR(outbuf)
+ [0] = left;
+ HALFPTR(outbuf)
+ [1] = right;
+#else
+ if (((ptrdiff_t)outbuf & 0x03) == 0) {
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ HALFPTR(outbuf)
+ [0] = left;
+ HALFPTR(outbuf)
+ [1] = right;
+ } else {
+ outbuf[0] = (BYTE)(left >> 24);
+ outbuf[1] = (BYTE)(left >> 16);
+ outbuf[2] = (BYTE)(left >> 8);
+ outbuf[3] = (BYTE)(left);
+
+ outbuf[4] = (BYTE)(right >> 24);
+ outbuf[5] = (BYTE)(right >> 16);
+ outbuf[6] = (BYTE)(right >> 8);
+ outbuf[7] = (BYTE)(right);
+ }
+#endif
+}
+
+/* Ackowledgements:
+** Two ideas used in this implementation were shown to me by Dennis Ferguson
+** in 1990. He credits them to Richard Outerbridge and Dan Hoey. They were:
+** 1. The method of computing the Initial and Final permutations.
+** 2. Circularly rotating the SP tables and the initial values of left and
+** right to reduce the number of shifts required during the 16 rounds.
+*/
diff --git a/security/nss/lib/freebl/des.h b/security/nss/lib/freebl/des.h
new file mode 100644
index 0000000000..70a17e5108
--- /dev/null
+++ b/security/nss/lib/freebl/des.h
@@ -0,0 +1,43 @@
+/*
+ * des.h
+ *
+ * header file for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _DES_H_
+#define _DES_H_ 1
+
+#include "blapi.h"
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define HALFPTR(x) ((HALF *)(x))
+#define SHORTPTR(x) ((unsigned short *)(x))
+#define BYTEPTR(x) ((BYTE *)(x))
+
+typedef enum {
+ DES_ENCRYPT = 0x5555,
+ DES_DECRYPT = 0xAAAA
+} DESDirection;
+
+typedef void DESFunc(struct DESContextStr *cx, BYTE *out, const BYTE *in,
+ unsigned int len);
+
+struct DESContextStr {
+ /* key schedule, 16 internal keys, each with 8 6-bit parts */
+ HALF ks0[32];
+ HALF ks1[32];
+ HALF ks2[32];
+ HALF iv[2];
+ DESDirection direction;
+ DESFunc *worker;
+};
+
+void DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction);
+void DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf);
+
+#endif
diff --git a/security/nss/lib/freebl/desblapi.c b/security/nss/lib/freebl/desblapi.c
new file mode 100644
index 0000000000..c03ab27cce
--- /dev/null
+++ b/security/nss/lib/freebl/desblapi.c
@@ -0,0 +1,256 @@
+/*
+ * desblapi.c
+ *
+ * core source file for DES-150 library
+ * Implement DES Modes of Operation and Triple-DES.
+ * Adapt DES-150 to blapi API.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h>
+#include "secerr.h"
+
+#if defined(NSS_X86_OR_X64)
+/* Intel X86 CPUs do unaligned loads and stores without complaint. */
+#define COPY8B(to, from, ptr) \
+ HALFPTR(to) \
+ [0] = HALFPTR(from)[0]; \
+ HALFPTR(to) \
+ [1] = HALFPTR(from)[1];
+#else
+#define COPY8B(to, from, ptr) memcpy(to, from, 8)
+#endif
+#define COPY8BTOHALF(to, from) COPY8B(to, from, from)
+#define COPY8BFROMHALF(to, from) COPY8B(to, from, to)
+
+static void
+DES_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ while (len) {
+ DES_Do1Block(cx->ks0, in, out);
+ len -= 8;
+ in += 8;
+ out += 8;
+ }
+}
+
+static void
+DES_EDE3_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ while (len) {
+ DES_Do1Block(cx->ks0, in, out);
+ len -= 8;
+ in += 8;
+ DES_Do1Block(cx->ks1, out, out);
+ DES_Do1Block(cx->ks2, out, out);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend = in + len;
+ HALF vec[2];
+
+ while (in != bufend) {
+ COPY8BTOHALF(vec, in);
+ in += 8;
+ vec[0] ^= cx->iv[0];
+ vec[1] ^= cx->iv[1];
+ DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+ COPY8BFROMHALF(out, cx->iv);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend;
+ HALF oldciphertext[2];
+ HALF plaintext[2];
+
+ for (bufend = in + len; in != bufend;) {
+ oldciphertext[0] = cx->iv[0];
+ oldciphertext[1] = cx->iv[1];
+ COPY8BTOHALF(cx->iv, in);
+ in += 8;
+ DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+ plaintext[0] ^= oldciphertext[0];
+ plaintext[1] ^= oldciphertext[1];
+ COPY8BFROMHALF(out, plaintext);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend = in + len;
+ HALF vec[2];
+
+ while (in != bufend) {
+ COPY8BTOHALF(vec, in);
+ in += 8;
+ vec[0] ^= cx->iv[0];
+ vec[1] ^= cx->iv[1];
+ DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+ DES_Do1Block(cx->ks1, (BYTE *)cx->iv, (BYTE *)cx->iv);
+ DES_Do1Block(cx->ks2, (BYTE *)cx->iv, (BYTE *)cx->iv);
+ COPY8BFROMHALF(out, cx->iv);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend;
+ HALF oldciphertext[2];
+ HALF plaintext[2];
+
+ for (bufend = in + len; in != bufend;) {
+ oldciphertext[0] = cx->iv[0];
+ oldciphertext[1] = cx->iv[1];
+ COPY8BTOHALF(cx->iv, in);
+ in += 8;
+ DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+ DES_Do1Block(cx->ks1, (BYTE *)plaintext, (BYTE *)plaintext);
+ DES_Do1Block(cx->ks2, (BYTE *)plaintext, (BYTE *)plaintext);
+ plaintext[0] ^= oldciphertext[0];
+ plaintext[1] ^= oldciphertext[1];
+ COPY8BFROMHALF(out, plaintext);
+ out += 8;
+ }
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+ return PORT_ZNew(DESContext);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key, unsigned int keylen,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int unused)
+{
+ DESDirection opposite;
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ cx->direction = encrypt ? DES_ENCRYPT : DES_DECRYPT;
+ opposite = encrypt ? DES_DECRYPT : DES_ENCRYPT;
+ switch (mode) {
+ case NSS_DES: /* DES ECB */
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ cx->worker = &DES_ECB;
+ break;
+
+ case NSS_DES_EDE3: /* DES EDE ECB */
+ cx->worker = &DES_EDE3_ECB;
+ if (encrypt) {
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+ } else {
+ DES_MakeSchedule(cx->ks2, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+ }
+ break;
+
+ case NSS_DES_CBC: /* DES CBC */
+ COPY8BTOHALF(cx->iv, iv);
+ cx->worker = encrypt ? &DES_CBCEn : &DES_CBCDe;
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ break;
+
+ case NSS_DES_EDE3_CBC: /* DES EDE CBC */
+ COPY8BTOHALF(cx->iv, iv);
+ if (encrypt) {
+ cx->worker = &DES_EDE3CBCEn;
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+ } else {
+ cx->worker = &DES_EDE3CBCDe;
+ DES_MakeSchedule(cx->ks2, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+ }
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+DESContext *
+DES_CreateContext(const BYTE *key, const BYTE *iv, int mode, PRBool encrypt)
+{
+ DESContext *cx = PORT_ZNew(DESContext);
+ SECStatus rv = DES_InitContext(cx, key, 0, iv, mode, encrypt, 0);
+
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof *cx);
+ cx = NULL;
+ }
+ return cx;
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+ if (freeit)
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+ unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+ if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+ cx->direction != DES_ENCRYPT) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->worker(cx, out, in, inLen);
+ if (outLen)
+ *outLen = inLen;
+ return SECSuccess;
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+ unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+ if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+ cx->direction != DES_DECRYPT) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->worker(cx, out, in, inLen);
+ if (outLen)
+ *outLen = inLen;
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c
new file mode 100644
index 0000000000..f50a3c4446
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.c
@@ -0,0 +1,163 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "blapi.h"
+#include "blapit.h"
+#include "Hacl_Chacha20.h"
+#include "nssilock.h"
+#include "seccomon.h"
+#include "secerr.h"
+#include "prinit.h"
+
+#define GLOBAL_BYTES_SIZE 100
+static PRUint8 globalBytes[GLOBAL_BYTES_SIZE];
+static unsigned long globalNumCalls = 0;
+static PZLock *rng_lock = NULL;
+static PRCallOnceType coRNGInit;
+static const PRCallOnceType pristineCallOnce;
+
+static PRStatus
+rng_init(void)
+{
+ rng_lock = PZ_NewLock(nssILockOther);
+ if (!rng_lock) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return PR_FAILURE;
+ }
+ /* --- LOCKED --- */
+ PZ_Lock(rng_lock);
+ memset(globalBytes, 0, GLOBAL_BYTES_SIZE);
+ PZ_Unlock(rng_lock);
+ /* --- UNLOCKED --- */
+
+ return PR_SUCCESS;
+}
+
+SECStatus
+RNG_RNGInit(void)
+{
+ /* Allow only one call to initialize the context */
+ if (PR_CallOnce(&coRNGInit, rng_init) != PR_SUCCESS) {
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/* Take min(size, GLOBAL_BYTES_SIZE) bytes from data and use as seed and reset
+ * the rng state. */
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+ /* Check for a valid RNG lock. */
+ PORT_Assert(rng_lock != NULL);
+ if (rng_lock == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* --- LOCKED --- */
+ PZ_Lock(rng_lock);
+ memset(globalBytes, 0, GLOBAL_BYTES_SIZE);
+ globalNumCalls = 0;
+ if (data) {
+ memcpy(globalBytes, (PRUint8 *)data, PR_MIN(bytes, GLOBAL_BYTES_SIZE));
+ }
+ PZ_Unlock(rng_lock);
+ /* --- UNLOCKED --- */
+
+ return SECSuccess;
+}
+
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+ static const uint8_t key[32] = { 0 };
+ uint8_t nonce[12] = { 0 };
+
+ /* Check for a valid RNG lock. */
+ PORT_Assert(rng_lock != NULL);
+ if (rng_lock == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* --- LOCKED --- */
+ PZ_Lock(rng_lock);
+
+ memcpy(nonce, &globalNumCalls, sizeof(globalNumCalls));
+ globalNumCalls++;
+
+ ChaCha20Poly1305Context *cx =
+ ChaCha20Poly1305_CreateContext(key, sizeof(key), 16);
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PZ_Unlock(rng_lock);
+ return SECFailure;
+ }
+
+ memset(dest, 0, len);
+ memcpy(dest, globalBytes, PR_MIN(len, GLOBAL_BYTES_SIZE));
+ Hacl_Chacha20_chacha20_encrypt(len, (uint8_t *)dest, (uint8_t *)dest,
+ (uint8_t *)key, nonce, 0);
+ ChaCha20Poly1305_DestroyContext(cx, PR_TRUE);
+
+ PZ_Unlock(rng_lock);
+ /* --- UNLOCKED --- */
+
+ return SECSuccess;
+}
+
+void
+RNG_RNGShutdown(void)
+{
+ if (rng_lock) {
+ PZ_DestroyLock(rng_lock);
+ rng_lock = NULL;
+ }
+ coRNGInit = pristineCallOnce;
+}
+
+/* Test functions are not implemented! */
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ return SECFailure;
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ return SECFailure;
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ return SECFailure;
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+ return SECFailure;
+}
+
+SECStatus
+PRNGTEST_RunHealthTests()
+{
+ return SECFailure;
+}
+
+SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ return SECFailure;
+}
diff --git a/security/nss/lib/freebl/det_rng.h b/security/nss/lib/freebl/det_rng.h
new file mode 100644
index 0000000000..599d726ca2
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.h
@@ -0,0 +1,12 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __det_rng_h_
+#define __det_rng_h_
+
+SECStatus prng_ResetForFuzzing(PZLock *rng_lock);
+SECStatus prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest,
+ size_t len);
+
+#endif /* __det_rng_h_ */
diff --git a/security/nss/lib/freebl/dh.c b/security/nss/lib/freebl/dh.c
new file mode 100644
index 0000000000..bdd5dd63ae
--- /dev/null
+++ b/security/nss/lib/freebl/dh.c
@@ -0,0 +1,480 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Diffie-Hellman parameter generation, key generation, and secret derivation.
+ * KEA secret generation and verification.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "blapi.h"
+#include "blapii.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "secmpi.h"
+
+#define KEA_DERIVED_SECRET_LEN 128
+
+/* Lengths are in bytes. */
+static unsigned int
+dh_GetSecretKeyLen(unsigned int primeLen)
+{
+ /* Based on Table 2 in NIST SP 800-57. */
+ if (primeLen >= 1920) { /* 15360 bits */
+ return 64; /* 512 bits */
+ }
+ if (primeLen >= 960) { /* 7680 bits */
+ return 48; /* 384 bits */
+ }
+ if (primeLen >= 384) { /* 3072 bits */
+ return 32; /* 256 bits */
+ }
+ if (primeLen >= 256) { /* 2048 bits */
+ return 28; /* 224 bits */
+ }
+ return 20; /* 160 bits */
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+ PLArenaPool *arena;
+ DHParams *dhparams;
+ unsigned char *ab = NULL;
+ mp_int p, q, a, h, psub1, test;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!params || primeLen < 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ dhparams = (DHParams *)PORT_ArenaZAlloc(arena, sizeof(DHParams));
+ if (!dhparams) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ dhparams->arena = arena;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&test) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&test));
+ /* generate prime with MPI, uses Miller-Rabin to generate safe prime. */
+ CHECK_SEC_OK(generate_prime(&p, primeLen));
+ /* construct Sophie-Germain prime q = (p-1)/2. */
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+ CHECK_MPI_OK(mp_div_2(&psub1, &q));
+ /* construct a generator from the prime. */
+ ab = PORT_Alloc(primeLen);
+ if (!ab) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* generate a candidate number a in p's field */
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(ab, primeLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&a, ab, primeLen));
+ /* force a < p (note that quot(a/p) <= 1) */
+ if (mp_cmp(&a, &p) > 0)
+ CHECK_MPI_OK(mp_sub(&a, &p, &a));
+ do {
+ /* check that a is in the range [2..p-1] */
+ if (mp_cmp_d(&a, 2) < 0 || mp_cmp(&a, &psub1) >= 0) {
+ /* a is outside of the allowed range. Set a=3 and keep going. */
+ mp_set(&a, 3);
+ }
+ /* if a**q mod p != 1 then a is a generator */
+ CHECK_MPI_OK(mp_exptmod(&a, &q, &p, &test));
+ if (mp_cmp_d(&test, 1) != 0)
+ break;
+ /* increment the candidate and try again. */
+ CHECK_MPI_OK(mp_add_d(&a, 1, &a));
+ } while (PR_TRUE);
+ MPINT_TO_SECITEM(&p, &dhparams->prime, arena);
+ MPINT_TO_SECITEM(&a, &dhparams->base, arena);
+ *params = dhparams;
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&a);
+ mp_clear(&h);
+ mp_clear(&psub1);
+ mp_clear(&test);
+ if (ab) {
+ PORT_ZFree(ab, primeLen);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv != SECSuccess) {
+ PORT_FreeArena(arena, PR_TRUE);
+ }
+ return rv;
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+ PLArenaPool *arena;
+ DHPrivateKey *key;
+ mp_int g, xa, p, Ya;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!params || !privKey) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ key = (DHPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DHPrivateKey));
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ key->arena = arena;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&xa) = 0;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Ya) = 0;
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&xa));
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Ya));
+ /* Set private key's p */
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->prime, &params->prime));
+ SECITEM_TO_MPINT(key->prime, &p);
+ /* Set private key's g */
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->base, &params->base));
+ SECITEM_TO_MPINT(key->base, &g);
+ /* Generate private key xa */
+ SECITEM_AllocItem(arena, &key->privateValue,
+ dh_GetSecretKeyLen(params->prime.len));
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(key->privateValue.data,
+ key->privateValue.len));
+ SECITEM_TO_MPINT(key->privateValue, &xa);
+ /* xa < p */
+ CHECK_MPI_OK(mp_mod(&xa, &p, &xa));
+ /* Compute public key Ya = g ** xa mod p */
+ CHECK_MPI_OK(mp_exptmod(&g, &xa, &p, &Ya));
+ MPINT_TO_SECITEM(&Ya, &key->publicValue, key->arena);
+ *privKey = key;
+cleanup:
+ mp_clear(&g);
+ mp_clear(&xa);
+ mp_clear(&p);
+ mp_clear(&Ya);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv) {
+ *privKey = NULL;
+ PORT_FreeArena(arena, PR_TRUE);
+ }
+ return rv;
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int outBytes)
+{
+ mp_int p, Xa, Yb, ZZ, psub1;
+ mp_err err = MP_OKAY;
+ unsigned int len = 0;
+ unsigned int nb;
+ unsigned char *secret = NULL;
+ if (!publicValue || !publicValue->len || !prime || !prime->len ||
+ !privateValue || !privateValue->len || !derivedSecret) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Xa) = 0;
+ MP_DIGITS(&Yb) = 0;
+ MP_DIGITS(&ZZ) = 0;
+ MP_DIGITS(&psub1) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Xa));
+ CHECK_MPI_OK(mp_init(&Yb));
+ CHECK_MPI_OK(mp_init(&ZZ));
+ CHECK_MPI_OK(mp_init(&psub1));
+ SECITEM_TO_MPINT(*publicValue, &Yb);
+ SECITEM_TO_MPINT(*privateValue, &Xa);
+ SECITEM_TO_MPINT(*prime, &p);
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+
+ /* We assume that the modulus, p, is a safe prime. That is, p = 2q+1 where
+ * q is also a prime. Thus the orders of the subgroups are factors of 2q:
+ * namely 1, 2, q and 2q.
+ *
+ * We check that the peer's public value isn't zero (which isn't in the
+ * group), one (subgroup of order one) or p-1 (subgroup of order 2). We
+ * also check that the public value is less than p, to avoid being fooled
+ * by values like p+1 or 2*p-1.
+ *
+ * Thus we must be operating in the subgroup of size q or 2q. */
+ if (mp_cmp_d(&Yb, 1) <= 0 ||
+ mp_cmp(&Yb, &psub1) >= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* ZZ = (Yb)**Xa mod p */
+ CHECK_MPI_OK(mp_exptmod(&Yb, &Xa, &p, &ZZ));
+ /* number of bytes in the derived secret */
+ len = mp_unsigned_octet_size(&ZZ);
+ if (len <= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /*
+ * We check to make sure that ZZ is not equal to 0, 1 or -1 mod p.
+ * This helps guard against small subgroup attacks, since an attacker
+ * using a subgroup of size N will produce 0, 1 or -1 with probability 1/N.
+ * When the protocol is executed within a properly large subgroup, the
+ * probability of this result will be negligibly small. For example,
+ * with a safe prime of the form 2q+1, the probability will be 1/q.
+ *
+ * We return MP_BADARG because this is probably the result of a bad
+ * public value or a bad prime having been provided.
+ */
+ if (mp_cmp_d(&ZZ, 0) == 0 || mp_cmp_d(&ZZ, 1) == 0 ||
+ mp_cmp(&ZZ, &psub1) == 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* allocate a buffer which can hold the entire derived secret. */
+ secret = PORT_Alloc(len);
+ if (secret == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ /* grab the derived secret */
+ err = mp_to_unsigned_octets(&ZZ, secret, len);
+ if (err >= 0)
+ err = MP_OKAY;
+ /*
+ ** if outBytes is 0 take all of the bytes from the derived secret.
+ ** if outBytes is not 0 take exactly outBytes from the derived secret, zero
+ ** pad at the beginning if necessary, and truncate beginning bytes
+ ** if necessary.
+ */
+ if (outBytes > 0)
+ nb = outBytes;
+ else
+ nb = len;
+ if (SECITEM_AllocItem(NULL, derivedSecret, nb) == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ if (len < nb) {
+ unsigned int offset = nb - len;
+ memset(derivedSecret->data, 0, offset);
+ memcpy(derivedSecret->data + offset, secret, len);
+ } else {
+ memcpy(derivedSecret->data, secret + len - nb, nb);
+ }
+cleanup:
+ mp_clear(&p);
+ mp_clear(&Xa);
+ mp_clear(&Yb);
+ mp_clear(&ZZ);
+ mp_clear(&psub1);
+ if (secret) {
+ /* free the buffer allocated for the full secret. */
+ PORT_ZFree(secret, len);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ if (derivedSecret->data)
+ PORT_ZFree(derivedSecret->data, derivedSecret->len);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+KEA_Derive(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret)
+{
+ mp_int p, Y, R, r, x, t, u, w;
+ mp_err err;
+ unsigned char *secret = NULL;
+ unsigned int len = 0, offset;
+ if (!prime || !public1 || !public2 || !private1 || !private2 ||
+ !derivedSecret) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Y) = 0;
+ MP_DIGITS(&R) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&u) = 0;
+ MP_DIGITS(&w) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Y));
+ CHECK_MPI_OK(mp_init(&R));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&u));
+ CHECK_MPI_OK(mp_init(&w));
+ SECITEM_TO_MPINT(*prime, &p);
+ SECITEM_TO_MPINT(*public1, &Y);
+ SECITEM_TO_MPINT(*public2, &R);
+ SECITEM_TO_MPINT(*private1, &r);
+ SECITEM_TO_MPINT(*private2, &x);
+ /* t = DH(Y, r, p) = Y ** r mod p */
+ CHECK_MPI_OK(mp_exptmod(&Y, &r, &p, &t));
+ /* u = DH(R, x, p) = R ** x mod p */
+ CHECK_MPI_OK(mp_exptmod(&R, &x, &p, &u));
+ /* w = (t + u) mod p */
+ CHECK_MPI_OK(mp_addmod(&t, &u, &p, &w));
+ /* allocate a buffer for the full derived secret */
+ len = mp_unsigned_octet_size(&w);
+ secret = PORT_Alloc(len);
+ if (secret == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ /* grab the secret */
+ err = mp_to_unsigned_octets(&w, secret, len);
+ if (err > 0)
+ err = MP_OKAY;
+ /* allocate output buffer */
+ if (SECITEM_AllocItem(NULL, derivedSecret, KEA_DERIVED_SECRET_LEN) == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ memset(derivedSecret->data, 0, derivedSecret->len);
+ /* copy in the 128 lsb of the secret */
+ if (len >= KEA_DERIVED_SECRET_LEN) {
+ memcpy(derivedSecret->data, secret + (len - KEA_DERIVED_SECRET_LEN),
+ KEA_DERIVED_SECRET_LEN);
+ } else {
+ offset = KEA_DERIVED_SECRET_LEN - len;
+ memcpy(derivedSecret->data + offset, secret, len);
+ }
+cleanup:
+ mp_clear(&p);
+ mp_clear(&Y);
+ mp_clear(&R);
+ mp_clear(&r);
+ mp_clear(&x);
+ mp_clear(&t);
+ mp_clear(&u);
+ mp_clear(&w);
+ if (secret)
+ PORT_ZFree(secret, len);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ if (derivedSecret->data)
+ PORT_ZFree(derivedSecret->data, derivedSecret->len);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* Test counts based on the fact the prime and subprime
+ * were given to us */
+static int
+dh_prime_testcount(int prime_length)
+{
+ if (prime_length < 1024) {
+ return 50;
+ } else if (prime_length < 2048) {
+ return 40;
+ } else if (prime_length < 3072) {
+ return 56;
+ }
+ return 64;
+}
+
+PRBool
+KEA_PrimeCheck(SECItem *prime)
+{
+ mp_int p;
+ mp_err err = 0;
+ MP_DIGITS(&p) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ SECITEM_TO_MPINT(*prime, &p);
+ CHECK_MPI_OK(mpp_pprime_secure(&p, dh_prime_testcount(prime->len)));
+cleanup:
+ mp_clear(&p);
+ return err ? PR_FALSE : PR_TRUE;
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+ mp_int p, q, y, r;
+ mp_err err;
+ int cmp = 1; /* default is false */
+ if (!Y || !prime || !subPrime) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&y) = 0;
+ MP_DIGITS(&r) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&y));
+ CHECK_MPI_OK(mp_init(&r));
+ SECITEM_TO_MPINT(*prime, &p);
+ SECITEM_TO_MPINT(*subPrime, &q);
+ SECITEM_TO_MPINT(*Y, &y);
+ /* compute r = y**q mod p */
+ CHECK_MPI_OK(mp_exptmod(&y, &q, &p, &r));
+ /* compare to 1 */
+ cmp = mp_cmp_d(&r, 1);
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&y);
+ mp_clear(&r);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return PR_FALSE;
+ }
+ return (cmp == 0) ? PR_TRUE : PR_FALSE;
+}
diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c
new file mode 100644
index 0000000000..3ed1751c3e
--- /dev/null
+++ b/security/nss/lib/freebl/drbg.c
@@ -0,0 +1,1024 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "blapii.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "sha_fast.h"
+#include "sha256.h"
+#include "secrng.h" /* for RNG_SystemRNG() */
+#include "secmpi.h"
+
+/* PRNG_SEEDLEN defined in NIST SP 800-90 section 10.1
+ * for SHA-1, SHA-224, and SHA-256 it's 440 bits.
+ * for SHA-384 and SHA-512 it's 888 bits */
+#define PRNG_SEEDLEN (440 / PR_BITS_PER_BYTE)
+#define PRNG_MAX_ADDITIONAL_BYTES PR_INT64(0x100000000)
+/* 2^35 bits or 2^32 bytes */
+#define PRNG_MAX_REQUEST_SIZE 0x10000 /* 2^19 bits or 2^16 bytes */
+#define PRNG_ADDITONAL_DATA_CACHE_SIZE (8 * 1024) /* must be less than \
+ * PRNG_MAX_ADDITIONAL_BYTES \
+ */
+#define PRNG_ENTROPY_BLOCK_SIZE SHA256_LENGTH
+
+/* RESEED_COUNT is how many calls to the prng before we need to reseed
+ * under normal NIST rules, you must return an error. In the NSS case, we
+ * self-reseed with RNG_SystemRNG(). Count can be a large number. For code
+ * simplicity, we specify count with 2 components: RESEED_BYTE (which is
+ * the same as LOG256(RESEED_COUNT)) and RESEED_VALUE (which is the same as
+ * RESEED_COUNT / (256 ^ RESEED_BYTE)). Another way to look at this is
+ * RESEED_COUNT = RESEED_VALUE * (256 ^ RESEED_BYTE). For Hash based DRBG
+ * we use the maximum count value, 2^48, or RESEED_BYTE=6 and RESEED_VALUE=1
+ */
+#define RESEED_BYTE 6
+#define RESEED_VALUE 1
+
+#define PRNG_RESET_RESEED_COUNT(rng) \
+ PORT_Memset((rng)->reseed_counter, 0, sizeof(rng)->reseed_counter); \
+ (rng)->reseed_counter[RESEED_BYTE] = 1;
+
+/*
+ * The actual values of this enum are specified in SP 800-90, 10.1.1.*
+ * The spec does not name the types, it only uses bare values
+ */
+typedef enum {
+ prngCGenerateType = 0, /* used when creating a new 'C' */
+ prngReseedType = 1, /* used in reseeding */
+ prngAdditionalDataType = 2, /* used in mixing additional data */
+ prngGenerateByteType = 3 /* used when mixing internal state while
+ * generating bytes */
+} prngVTypes;
+
+/*
+ * Global RNG context
+ */
+struct RNGContextStr {
+ PZLock *lock; /* Lock to serialize access to global rng */
+ /*
+ * NOTE, a number of steps in the drbg algorithm need to hash
+ * V_type || V. The code, therefore, depends on the V array following
+ * immediately after V_type to avoid extra copies. To accomplish this
+ * in a way that compiliers can't perturb, we declare V_type and V
+ * as a V_Data array and reference them by macros */
+ PRUint8 V_Data[PRNG_SEEDLEN + 1]; /* internal state variables */
+#define V_type V_Data[0]
+#define V(rng) (((rng)->V_Data) + 1)
+#define VSize(rng) ((sizeof(rng)->V_Data) - 1)
+ PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */
+ /* If we get calls for the PRNG to return less than the length of our
+ * hash, we extend the request for a full hash (since we'll be doing
+ * the full hash anyway). Future requests for random numbers are fulfilled
+ * from the remainder of the bytes we generated. Requests for bytes longer
+ * than the hash size are fulfilled directly from the HashGen function
+ * of the random number generator. */
+ PRUint8 reseed_counter[RESEED_BYTE + 1]; /* number of requests since the
+ * last reseed. Need only be
+ * big enough to hold the whole
+ * reseed count */
+ PRUint8 data[SHA256_LENGTH]; /* when we request less than a block
+ * save the rest of the rng output for
+ * another partial block */
+ PRUint8 dataAvail; /* # bytes of output available in our cache,
+ * [0...SHA256_LENGTH] */
+ /* store additional data that has been shovelled off to us by
+ * RNG_RandomUpdate. */
+ PRUint8 additionalDataCache[PRNG_ADDITONAL_DATA_CACHE_SIZE];
+ PRUint32 additionalAvail;
+ PRBool isValid; /* false if RNG reaches an invalid state */
+ PRBool isKatTest; /* true if running NIST PRNG KAT tests */
+ /* for continuous entropy check */
+ PRUint8 previousEntropyHash[SHA256_LENGTH];
+};
+
+typedef struct RNGContextStr RNGContext;
+static RNGContext *globalrng = NULL;
+static RNGContext theGlobalRng;
+
+/*
+ * The next several functions are derived from the NIST SP 800-90
+ * spec. In these functions, an attempt was made to use names consistent
+ * with the names in the spec, even if they differ from normal NSS usage.
+ */
+
+/*
+ * Hash Derive function defined in NISP SP 800-90 Section 10.4.1.
+ * This function is used in the Instantiate and Reseed functions.
+ *
+ * NOTE: requested_bytes cannot overlap with input_string_1 or input_string_2.
+ * input_string_1 and input_string_2 are logically concatentated.
+ * input_string_1 must be supplied.
+ * if input_string_2 is not supplied, NULL should be passed for this parameter.
+ */
+static SECStatus
+prng_Hash_df(PRUint8 *requested_bytes, unsigned int no_of_bytes_to_return,
+ const PRUint8 *input_string_1, unsigned int input_string_1_len,
+ const PRUint8 *input_string_2, unsigned int input_string_2_len)
+{
+ SHA256Context ctx;
+ PRUint32 tmp;
+ PRUint8 counter;
+
+ tmp = SHA_HTONL(no_of_bytes_to_return * 8);
+
+ for (counter = 1; no_of_bytes_to_return > 0; counter++) {
+ unsigned int hash_return_len;
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, &counter, 1);
+ SHA256_Update(&ctx, (unsigned char *)&tmp, sizeof tmp);
+ SHA256_Update(&ctx, input_string_1, input_string_1_len);
+ if (input_string_2) {
+ SHA256_Update(&ctx, input_string_2, input_string_2_len);
+ }
+ SHA256_End(&ctx, requested_bytes, &hash_return_len,
+ no_of_bytes_to_return);
+ requested_bytes += hash_return_len;
+ no_of_bytes_to_return -= hash_return_len;
+ }
+ SHA256_DestroyContext(&ctx, PR_FALSE);
+ return SECSuccess;
+}
+
+/*
+ * Hash_DRBG Instantiate NIST SP 800-90 10.1.1.2
+ *
+ * NOTE: bytes & len are entropy || nonce || personalization_string. In
+ * normal operation, NSS calculates them all together in a single call.
+ */
+static SECStatus
+prng_instantiate(RNGContext *rng, const PRUint8 *bytes, unsigned int len)
+{
+ if (!rng->isKatTest && len < PRNG_SEEDLEN) {
+ /* If the seedlen is too small, it's probably because we failed to get
+ * enough random data.
+ * This is stricter than NIST SP800-90A requires. Don't enforce it for
+ * tests. */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+ prng_Hash_df(V(rng), VSize(rng), bytes, len, NULL, 0);
+ rng->V_type = prngCGenerateType;
+ prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+ PRNG_RESET_RESEED_COUNT(rng)
+ return SECSuccess;
+}
+
+static PRCallOnceType coRNGInitEntropy;
+
+static PRStatus
+prng_initEntropy(void)
+{
+ size_t length;
+ PRUint8 block[PRNG_ENTROPY_BLOCK_SIZE];
+ SHA256Context ctx;
+
+ /* For FIPS 140-2 4.9.2 continuous random number generator test,
+ * fetch the initial entropy from the system RNG and keep it for
+ * later comparison. */
+ length = RNG_SystemRNG(block, sizeof(block));
+ if (length == 0) {
+ return PR_FAILURE; /* error is already set */
+ }
+ PORT_Assert(length == sizeof(block));
+
+ /* Store the hash of the entropy block rather than the block
+ * itself for backward secrecy. */
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, block, sizeof(block));
+ SHA256_End(&ctx, globalrng->previousEntropyHash, NULL,
+ sizeof(globalrng->previousEntropyHash));
+ PORT_Memset(block, 0, sizeof(block));
+ SHA256_DestroyContext(&ctx, PR_FALSE);
+ return PR_SUCCESS;
+}
+
+static SECStatus
+prng_getEntropy(PRUint8 *buffer, size_t requestLength)
+{
+ size_t total = 0;
+ PRUint8 block[PRNG_ENTROPY_BLOCK_SIZE];
+ PRUint8 hash[SHA256_LENGTH];
+ SHA256Context ctx;
+ SECStatus rv = SECSuccess;
+
+ if (PR_CallOnce(&coRNGInitEntropy, prng_initEntropy) != PR_SUCCESS) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ /* For FIPS 140-2 4.9.2 continuous random generator test,
+ * iteratively fetch fixed sized blocks from the system and
+ * compare consecutive blocks. */
+ while (total < requestLength) {
+ size_t length = RNG_SystemRNG(block, sizeof(block));
+ if (length == 0) {
+ rv = SECFailure; /* error is already set */
+ goto out;
+ }
+ PORT_Assert(length == sizeof(block));
+
+ /* Store the hash of the entropy block rather than the block
+ * itself for backward secrecy. */
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, block, sizeof(block));
+ SHA256_End(&ctx, hash, NULL, sizeof(hash));
+
+ if (PORT_Memcmp(globalrng->previousEntropyHash, hash, sizeof(hash)) == 0) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ rv = SECFailure;
+ goto out;
+ }
+ PORT_Memcpy(globalrng->previousEntropyHash, hash, sizeof(hash));
+ length = PR_MIN(requestLength - total, sizeof(block));
+ PORT_Memcpy(buffer, block, length);
+ total += length;
+ buffer += length;
+ }
+
+out:
+ PORT_Memset(hash, 0, sizeof hash);
+ PORT_Memset(block, 0, sizeof block);
+ return rv;
+}
+
+/*
+ * Update the global random number generator with more seeding
+ * material. Use the Hash_DRBG reseed algorithm from NIST SP-800-90
+ * section 10.1.1.3
+ *
+ * If entropy is NULL, it is fetched from the noise generator.
+ */
+static SECStatus
+prng_reseed(RNGContext *rng, const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional_input, unsigned int additional_input_len)
+{
+ PRUint8 noiseData[(sizeof rng->V_Data) + PRNG_SEEDLEN];
+ PRUint8 *noise = &noiseData[0];
+ SECStatus rv;
+
+ /* if entropy wasn't supplied, fetch it. (normal operation case) */
+ if (entropy == NULL) {
+ entropy_len = PRNG_SEEDLEN;
+ rv = prng_getEntropy(&noiseData[sizeof rng->V_Data], entropy_len);
+ if (rv != SECSuccess) {
+ return SECFailure; /* error is already set */
+ }
+ } else {
+ /* NOTE: this code is only available for testing, not to applications */
+ /* if entropy was too big for the stack variable, get it from malloc */
+ if (entropy_len > PRNG_SEEDLEN) {
+ noise = PORT_Alloc(entropy_len + (sizeof rng->V_Data));
+ if (noise == NULL) {
+ return SECFailure;
+ }
+ }
+ PORT_Memcpy(&noise[sizeof rng->V_Data], entropy, entropy_len);
+ }
+
+ if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+ /* noise == &noiseData[0] at this point, so nothing to free */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+
+ rng->V_type = prngReseedType;
+ PORT_Memcpy(noise, rng->V_Data, sizeof rng->V_Data);
+ prng_Hash_df(V(rng), VSize(rng), noise, (sizeof rng->V_Data) + entropy_len,
+ additional_input, additional_input_len);
+ /* clear potential CSP */
+ PORT_Memset(noise, 0, (sizeof rng->V_Data) + entropy_len);
+ rng->V_type = prngCGenerateType;
+ prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+ PRNG_RESET_RESEED_COUNT(rng)
+
+ if (noise != &noiseData[0]) {
+ PORT_Free(noise);
+ }
+ return SECSuccess;
+}
+
+/*
+ * SP 800-90 requires we rerun our health tests on reseed
+ */
+static SECStatus
+prng_reseed_test(RNGContext *rng, const PRUint8 *entropy,
+ unsigned int entropy_len, const PRUint8 *additional_input,
+ unsigned int additional_input_len)
+{
+ SECStatus rv;
+
+ /* do health checks in FIPS mode */
+ rv = PRNGTEST_RunHealthTests();
+ if (rv != SECSuccess) {
+ /* error set by PRNGTEST_RunHealTests() */
+ rng->isValid = PR_FALSE;
+ return SECFailure;
+ }
+ return prng_reseed(rng, entropy, entropy_len,
+ additional_input, additional_input_len);
+}
+
+/*
+ * build some fast inline functions for adding.
+ */
+#define PRNG_ADD_CARRY_ONLY(dest, start, carry) \
+ { \
+ int k1; \
+ for (k1 = start; carry && k1 >= 0; k1--) { \
+ carry = !(++dest[k1]); \
+ } \
+ }
+
+/*
+ * NOTE: dest must be an array for the following to work.
+ */
+#define PRNG_ADD_BITS(dest, dest_len, add, len, carry) \
+ carry = 0; \
+ PORT_Assert((dest_len) >= (len)); \
+ { \
+ int k1, k2; \
+ for (k1 = dest_len - 1, k2 = len - 1; k2 >= 0; --k1, --k2) { \
+ carry += dest[k1] + add[k2]; \
+ dest[k1] = (PRUint8)carry; \
+ carry >>= 8; \
+ } \
+ }
+
+#define PRNG_ADD_BITS_AND_CARRY(dest, dest_len, add, len, carry) \
+ PRNG_ADD_BITS(dest, dest_len, add, len, carry) \
+ PRNG_ADD_CARRY_ONLY(dest, dest_len - len - 1, carry)
+
+/*
+ * This function expands the internal state of the prng to fulfill any number
+ * of bytes we need for this request. We only use this call if we need more
+ * than can be supplied by a single call to SHA256_HashBuf.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4, Hashgen
+ */
+static void
+prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes,
+ unsigned int no_of_returned_bytes)
+{
+ PRUint8 data[VSize(rng)];
+ PRUint8 thisHash[SHA256_LENGTH];
+
+ PORT_Memcpy(data, V(rng), VSize(rng));
+ while (no_of_returned_bytes) {
+ SHA256Context ctx;
+ unsigned int len;
+ unsigned int carry;
+
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, data, sizeof data);
+ SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH);
+ if (no_of_returned_bytes < SHA256_LENGTH) {
+ len = no_of_returned_bytes;
+ }
+ PORT_Memcpy(returned_bytes, thisHash, len);
+ returned_bytes += len;
+ no_of_returned_bytes -= len;
+ /* The carry parameter is a bool (increment or not).
+ * This increments data if no_of_returned_bytes is not zero */
+ carry = no_of_returned_bytes;
+ PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry);
+ SHA256_DestroyContext(&ctx, PR_FALSE);
+ }
+ PORT_Memset(data, 0, sizeof data);
+ PORT_Memset(thisHash, 0, sizeof thisHash);
+}
+
+/*
+ * Generates new random bytes and advances the internal prng state.
+ * additional bytes are only used in algorithm testing.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4
+ */
+static SECStatus
+prng_generateNewBytes(RNGContext *rng,
+ PRUint8 *returned_bytes, unsigned int no_of_returned_bytes,
+ const PRUint8 *additional_input,
+ unsigned int additional_input_len)
+{
+ PRUint8 H[SHA256_LENGTH]; /* both H and w since they
+ * aren't used concurrently */
+ unsigned int carry;
+
+ if (!rng->isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This code only triggers during tests, normal
+ * prng operation does not use additional_input */
+ if (additional_input) {
+ SHA256Context ctx;
+/* NIST SP 800-90 defines two temporaries in their calculations,
+ * w and H. These temporaries are the same lengths, and used
+ * at different times, so we use the following macro to collapse
+ * them to the same variable, but keeping their unique names for
+ * easy comparison to the spec */
+#define w H
+ rng->V_type = prngAdditionalDataType;
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, rng->V_Data, sizeof rng->V_Data);
+ SHA256_Update(&ctx, additional_input, additional_input_len);
+ SHA256_End(&ctx, w, NULL, sizeof w);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), w, sizeof w, carry)
+ PORT_Memset(w, 0, sizeof w);
+ SHA256_DestroyContext(&ctx, PR_FALSE);
+#undef w
+ }
+
+ if (no_of_returned_bytes == SHA256_LENGTH) {
+ /* short_cut to hashbuf and a couple of copies and clears */
+ SHA256_HashBuf(returned_bytes, V(rng), VSize(rng));
+ } else {
+ prng_Hashgen(rng, returned_bytes, no_of_returned_bytes);
+ }
+ /* advance our internal state... */
+ rng->V_type = prngGenerateByteType;
+ SHA256_HashBuf(H, rng->V_Data, sizeof rng->V_Data);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), H, sizeof H, carry)
+ PRNG_ADD_BITS(V(rng), VSize(rng), rng->C, sizeof rng->C, carry);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), rng->reseed_counter,
+ sizeof rng->reseed_counter, carry)
+ carry = 1;
+ PRNG_ADD_CARRY_ONLY(rng->reseed_counter, (sizeof rng->reseed_counter) - 1, carry);
+
+ /* if the prng failed, don't return any output, signal softoken */
+ PORT_Memset(H, 0, sizeof H);
+ if (!rng->isValid) {
+ PORT_Memset(returned_bytes, 0, no_of_returned_bytes);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* Use NSPR to prevent RNG_RNGInit from being called from separate
+ * threads, creating a race condition.
+ */
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType coRNGInit;
+static PRStatus
+rng_init(void)
+{
+ PRUint8 bytes[PRNG_SEEDLEN * 2]; /* entropy + nonce */
+ SECStatus rv = SECSuccess;
+
+ if (globalrng == NULL) {
+ /* bytes needs to have enough space to hold
+ * a SHA256 hash value. Blow up at compile time if this isn't true */
+ PR_STATIC_ASSERT(sizeof(bytes) >= SHA256_LENGTH);
+ /* create a new global RNG context */
+ globalrng = &theGlobalRng;
+ PORT_Assert(NULL == globalrng->lock);
+ /* create a lock for it */
+ globalrng->lock = PZ_NewLock(nssILockOther);
+ if (globalrng->lock == NULL) {
+ globalrng = NULL;
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return PR_FAILURE;
+ }
+
+ /* Try to get some seed data for the RNG */
+ rv = prng_getEntropy(bytes, sizeof bytes);
+ if (rv == SECSuccess) {
+ /* if this is our first call, instantiate, otherwise reseed
+ * prng_instantiate gets a new clean state, we want to mix
+ * any previous entropy we may have collected */
+ if (V(globalrng)[0] == 0) {
+ rv = prng_instantiate(globalrng, bytes, sizeof bytes);
+ } else {
+ rv = prng_reseed_test(globalrng, bytes, sizeof bytes, NULL, 0);
+ }
+ memset(bytes, 0, sizeof bytes);
+ } else {
+ PZ_DestroyLock(globalrng->lock);
+ globalrng->lock = NULL;
+ globalrng = NULL;
+ return PR_FAILURE;
+ }
+ if (rv != SECSuccess) {
+ return PR_FAILURE;
+ }
+
+ /* the RNG is in a valid state */
+ globalrng->isValid = PR_TRUE;
+ globalrng->isKatTest = PR_FALSE;
+
+ /* fetch one random value so that we can populate rng->oldV for our
+ * continous random number test. */
+ prng_generateNewBytes(globalrng, bytes, SHA256_LENGTH, NULL, 0);
+
+ /* Fetch more entropy into the PRNG */
+ RNG_SystemInfoForRNG();
+ }
+ return PR_SUCCESS;
+}
+
+/*
+ * Clean up the global RNG context
+ */
+static void
+prng_freeRNGContext(RNGContext *rng)
+{
+ PRUint8 inputhash[VSize(rng) + (sizeof rng->C)];
+
+ /* destroy context lock */
+ SKIP_AFTER_FORK(PZ_DestroyLock(globalrng->lock));
+
+ /* zero global RNG context except for C & V to preserve entropy */
+ prng_Hash_df(inputhash, sizeof rng->C, rng->C, sizeof rng->C, NULL, 0);
+ prng_Hash_df(&inputhash[sizeof rng->C], VSize(rng), V(rng), VSize(rng),
+ NULL, 0);
+ memset(rng, 0, sizeof *rng);
+ memcpy(rng->C, inputhash, sizeof rng->C);
+ memcpy(V(rng), &inputhash[sizeof rng->C], VSize(rng));
+
+ memset(inputhash, 0, sizeof inputhash);
+}
+
+/*
+ * Public functions
+ */
+
+/*
+ * Initialize the global RNG context and give it some seed input taken
+ * from the system. This function is thread-safe and will only allow
+ * the global context to be initialized once. The seed input is likely
+ * small, so it is imperative that RNG_RandomUpdate() be called with
+ * additional seed data before the generator is used. A good way to
+ * provide the generator with additional entropy is to call
+ * RNG_SystemInfoForRNG(). Note that C_Initialize() does exactly that.
+ */
+SECStatus
+RNG_RNGInit(void)
+{
+ /* Allow only one call to initialize the context */
+ PR_CallOnce(&coRNGInit, rng_init);
+ /* Make sure there is a context */
+ return (globalrng != NULL) ? SECSuccess : SECFailure;
+}
+
+/*
+** Update the global random number generator with more seeding
+** material.
+*/
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+ SECStatus rv;
+
+ /* Make sure our assumption that size_t is unsigned is true */
+ PR_STATIC_ASSERT(((size_t)-1) > (size_t)1);
+
+#if defined(NS_PTR_GT_32) || (defined(NSS_USE_64) && !defined(NS_PTR_LE_32))
+ /*
+ * NIST 800-90 requires us to verify our inputs. This value can
+ * come from the application, so we need to make sure it's within the
+ * spec. The spec says it must be less than 2^32 bytes (2^35 bits).
+ * This can only happen if size_t is greater than 32 bits (i.e. on
+ * most 64 bit platforms). The 90% case (perhaps 100% case), size_t
+ * is less than or equal to 32 bits if the platform is not 64 bits, and
+ * greater than 32 bits if it is a 64 bit platform. The corner
+ * cases are handled with explicit defines NS_PTR_GT_32 and NS_PTR_LE_32.
+ *
+ * In general, neither NS_PTR_GT_32 nor NS_PTR_LE_32 will need to be
+ * defined. If you trip over the next two size ASSERTS at compile time,
+ * you will need to define them for your platform.
+ *
+ * if 'sizeof(size_t) > 4' is triggered it means that we were expecting
+ * sizeof(size_t) to be greater than 4, but it wasn't. Setting
+ * NS_PTR_LE_32 will correct that mistake.
+ *
+ * if 'sizeof(size_t) <= 4' is triggered, it means that we were expecting
+ * sizeof(size_t) to be less than or equal to 4, but it wasn't. Setting
+ * NS_PTR_GT_32 will correct that mistake.
+ */
+
+ PR_STATIC_ASSERT(sizeof(size_t) > 4);
+
+ if (bytes > (size_t)PRNG_MAX_ADDITIONAL_BYTES) {
+ bytes = PRNG_MAX_ADDITIONAL_BYTES;
+ }
+#else
+ PR_STATIC_ASSERT(sizeof(size_t) <= 4);
+#endif
+
+ PZ_Lock(globalrng->lock);
+ /* if we're passed more than our additionalDataCache, simply
+ * call reseed with that data */
+ if (bytes > sizeof(globalrng->additionalDataCache)) {
+ rv = prng_reseed_test(globalrng, NULL, 0, data, (unsigned int)bytes);
+ /* if we aren't going to fill or overflow the buffer, just cache it */
+ } else if (bytes < ((sizeof globalrng->additionalDataCache) - globalrng->additionalAvail)) {
+ PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+ data, bytes);
+ globalrng->additionalAvail += (PRUint32)bytes;
+ rv = SECSuccess;
+ } else {
+ /* we are going to fill or overflow the buffer. In this case we will
+ * fill the entropy buffer, reseed with it, start a new buffer with the
+ * remainder. We know the remainder will fit in the buffer because
+ * we already handled the case where bytes > the size of the buffer.
+ */
+ size_t bufRemain = (sizeof globalrng->additionalDataCache) - globalrng->additionalAvail;
+ /* fill the rest of the buffer */
+ if (bufRemain) {
+ PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+ data, bufRemain);
+ data = ((unsigned char *)data) + bufRemain;
+ bytes -= bufRemain;
+ }
+ /* reseed from buffer */
+ rv = prng_reseed_test(globalrng, NULL, 0,
+ globalrng->additionalDataCache,
+ sizeof globalrng->additionalDataCache);
+
+ /* copy the rest into the cache */
+ PORT_Memcpy(globalrng->additionalDataCache, data, bytes);
+ globalrng->additionalAvail = (PRUint32)bytes;
+ }
+
+ PZ_Unlock(globalrng->lock);
+ return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+static SECStatus
+prng_GenerateGlobalRandomBytes(RNGContext *rng,
+ void *dest, size_t len)
+{
+ SECStatus rv = SECSuccess;
+ PRUint8 *output = dest;
+ /* check for a valid global RNG context */
+ PORT_Assert(rng != NULL);
+ if (rng == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* FIPS limits the amount of entropy available in a single request */
+ if (len > PRNG_MAX_REQUEST_SIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* --- LOCKED --- */
+ PZ_Lock(rng->lock);
+ /* Check the amount of seed data in the generator. If not enough,
+ * don't produce any data.
+ */
+ if (rng->reseed_counter[0] >= RESEED_VALUE) {
+ rv = prng_reseed_test(rng, NULL, 0, NULL, 0);
+ PZ_Unlock(rng->lock);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ RNG_SystemInfoForRNG();
+ PZ_Lock(rng->lock);
+ }
+ /*
+ * see if we have enough bytes to fulfill the request.
+ */
+ if (len <= rng->dataAvail) {
+ memcpy(output, rng->data + ((sizeof rng->data) - rng->dataAvail), len);
+ memset(rng->data + ((sizeof rng->data) - rng->dataAvail), 0, len);
+ rng->dataAvail -= len;
+ rv = SECSuccess;
+ /* if we are asking for a small number of bytes, cache the rest of
+ * the bytes */
+ } else if (len < sizeof rng->data) {
+ rv = prng_generateNewBytes(rng, rng->data, sizeof rng->data,
+ rng->additionalAvail ? rng->additionalDataCache : NULL,
+ rng->additionalAvail);
+ rng->additionalAvail = 0;
+ if (rv == SECSuccess) {
+ memcpy(output, rng->data, len);
+ memset(rng->data, 0, len);
+ rng->dataAvail = (sizeof rng->data) - len;
+ }
+ /* we are asking for lots of bytes, just ask the generator to pass them */
+ } else {
+ rv = prng_generateNewBytes(rng, output, len,
+ rng->additionalAvail ? rng->additionalDataCache : NULL,
+ rng->additionalAvail);
+ rng->additionalAvail = 0;
+ }
+ PZ_Unlock(rng->lock);
+ /* --- UNLOCKED --- */
+ return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+ return prng_GenerateGlobalRandomBytes(globalrng, dest, len);
+}
+
+void
+RNG_RNGShutdown(void)
+{
+ /* check for a valid global RNG context */
+ PORT_Assert(globalrng != NULL);
+ if (globalrng == NULL) {
+ /* Should set a "not initialized" error code. */
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return;
+ }
+ /* clear */
+ prng_freeRNGContext(globalrng);
+ globalrng = NULL;
+ /* reset the callonce struct to allow a new call to RNG_RNGInit() */
+ coRNGInit = pristineCallOnce;
+}
+
+/*
+ * Test case interface. used by fips testing and power on self test
+ */
+/* make sure the test context is separate from the global context, This
+ * allows us to test the internal random number generator without losing
+ * entropy we may have previously collected. */
+RNGContext testContext;
+
+SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ testContext.isKatTest = PR_TRUE;
+ return PRNGTEST_Instantiate(entropy, entropy_len,
+ nonce, nonce_len,
+ personal_string, ps_len);
+}
+
+/*
+ * Test vector API. Use NIST SP 800-90 general interface so one of the
+ * other NIST SP 800-90 algorithms may be used in the future.
+ */
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ int bytes_len = entropy_len + nonce_len + ps_len;
+ PRUint8 *bytes = NULL;
+ SECStatus rv;
+
+ if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+
+ bytes = PORT_Alloc(bytes_len);
+ if (bytes == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ /* concatenate the various inputs, internally NSS only instantiates with
+ * a single long string */
+ PORT_Memcpy(bytes, entropy, entropy_len);
+ if (nonce) {
+ PORT_Memcpy(&bytes[entropy_len], nonce, nonce_len);
+ } else {
+ PORT_Assert(nonce_len == 0);
+ }
+ if (personal_string) {
+ PORT_Memcpy(&bytes[entropy_len + nonce_len], personal_string, ps_len);
+ } else {
+ PORT_Assert(ps_len == 0);
+ }
+ rv = prng_instantiate(&testContext, bytes, bytes_len);
+ PORT_ZFree(bytes, bytes_len);
+ if (rv == SECFailure) {
+ return SECFailure;
+ }
+ testContext.isValid = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This magic input tells us to set the reseed count to it's max count,
+ * so we can simulate PRNGTEST_Generate reaching max reseed count */
+ if ((entropy == NULL) && (entropy_len == 0) &&
+ (additional == NULL) && (additional_len == 0)) {
+ testContext.reseed_counter[0] = RESEED_VALUE;
+ return SECSuccess;
+ }
+ return prng_reseed(&testContext, entropy, entropy_len, additional,
+ additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ SECStatus rv;
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* replicate reseed test from prng_GenerateGlobalRandomBytes */
+ if (testContext.reseed_counter[0] >= RESEED_VALUE) {
+ rv = prng_reseed(&testContext, NULL, 0, NULL, 0);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ }
+ return prng_generateNewBytes(&testContext, bytes, bytes_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ PORT_Memset(&testContext, 0, sizeof testContext);
+ return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_RunHealthTests()
+{
+ static const PRUint8 entropy[] = {
+ 0x8e, 0x9c, 0x0d, 0x25, 0x75, 0x22, 0x04, 0xf9,
+ 0xc5, 0x79, 0x10, 0x8b, 0x23, 0x79, 0x37, 0x14,
+ 0x9f, 0x2c, 0xc7, 0x0b, 0x39, 0xf8, 0xee, 0xef,
+ 0x95, 0x0c, 0x97, 0x59, 0xfc, 0x0a, 0x85, 0x41,
+ 0x76, 0x9d, 0x6d, 0x67, 0x00, 0x4e, 0x19, 0x12,
+ 0x02, 0x16, 0x53, 0xea, 0xf2, 0x73, 0xd7, 0xd6,
+ 0x7f, 0x7e, 0xc8, 0xae, 0x9c, 0x09, 0x99, 0x7d,
+ 0xbb, 0x9e, 0x48, 0x7f, 0xbb, 0x96, 0x46, 0xb3,
+ 0x03, 0x75, 0xf8, 0xc8, 0x69, 0x45, 0x3f, 0x97,
+ 0x5e, 0x2e, 0x48, 0xe1, 0x5d, 0x58, 0x97, 0x4c
+ };
+ static const PRUint8 rng_known_result[] = {
+ 0x16, 0xe1, 0x8c, 0x57, 0x21, 0xd8, 0xf1, 0x7e,
+ 0x5a, 0xa0, 0x16, 0x0b, 0x7e, 0xa6, 0x25, 0xb4,
+ 0x24, 0x19, 0xdb, 0x54, 0xfa, 0x35, 0x13, 0x66,
+ 0xbb, 0xaa, 0x2a, 0x1b, 0x22, 0x33, 0x2e, 0x4a,
+ 0x14, 0x07, 0x9d, 0x52, 0xfc, 0x73, 0x61, 0x48,
+ 0xac, 0xc1, 0x22, 0xfc, 0xa4, 0xfc, 0xac, 0xa4,
+ 0xdb, 0xda, 0x5b, 0x27, 0x33, 0xc4, 0xb3
+ };
+ static const PRUint8 reseed_entropy[] = {
+ 0xc6, 0x0b, 0x0a, 0x30, 0x67, 0x07, 0xf4, 0xe2,
+ 0x24, 0xa7, 0x51, 0x6f, 0x5f, 0x85, 0x3e, 0x5d,
+ 0x67, 0x97, 0xb8, 0x3b, 0x30, 0x9c, 0x7a, 0xb1,
+ 0x52, 0xc6, 0x1b, 0xc9, 0x46, 0xa8, 0x62, 0x79
+ };
+ static const PRUint8 additional_input[] = {
+ 0x86, 0x82, 0x28, 0x98, 0xe7, 0xcb, 0x01, 0x14,
+ 0xae, 0x87, 0x4b, 0x1d, 0x99, 0x1b, 0xc7, 0x41,
+ 0x33, 0xff, 0x33, 0x66, 0x40, 0x95, 0x54, 0xc6,
+ 0x67, 0x4d, 0x40, 0x2a, 0x1f, 0xf9, 0xeb, 0x65
+ };
+ static const PRUint8 rng_reseed_result[] = {
+ 0x02, 0x0c, 0xc6, 0x17, 0x86, 0x49, 0xba, 0xc4,
+ 0x7b, 0x71, 0x35, 0x05, 0xf0, 0xdb, 0x4a, 0xc2,
+ 0x2c, 0x38, 0xc1, 0xa4, 0x42, 0xe5, 0x46, 0x4a,
+ 0x7d, 0xf0, 0xbe, 0x47, 0x88, 0xb8, 0x0e, 0xc6,
+ 0x25, 0x2b, 0x1d, 0x13, 0xef, 0xa6, 0x87, 0x96,
+ 0xa3, 0x7d, 0x5b, 0x80, 0xc2, 0x38, 0x76, 0x61,
+ 0xc7, 0x80, 0x5d, 0x0f, 0x05, 0x76, 0x85
+ };
+ static const PRUint8 rng_no_reseed_result[] = {
+ 0xc4, 0x40, 0x41, 0x8c, 0xbf, 0x2f, 0x70, 0x23,
+ 0x88, 0xf2, 0x7b, 0x30, 0xc3, 0xca, 0x1e, 0xf3,
+ 0xef, 0x53, 0x81, 0x5d, 0x30, 0xed, 0x4c, 0xf1,
+ 0xff, 0x89, 0xa5, 0xee, 0x92, 0xf8, 0xc0, 0x0f,
+ 0x88, 0x53, 0xdf, 0xb6, 0x76, 0xf0, 0xaa, 0xd3,
+ 0x2e, 0x1d, 0x64, 0x37, 0x3e, 0xe8, 0x4a, 0x02,
+ 0xff, 0x0a, 0x7f, 0xe5, 0xe9, 0x2b, 0x6d
+ };
+
+ SECStatus rng_status = SECSuccess;
+ PR_STATIC_ASSERT(sizeof(rng_known_result) >= sizeof(rng_reseed_result));
+ PRUint8 result[sizeof(rng_known_result)];
+
+ /********************************************/
+ /* First test instantiate error path. */
+ /* In this case we supply enough entropy, */
+ /* but not enough seed. This will trigger */
+ /* the code that checks for a entropy */
+ /* source failure. */
+ /********************************************/
+ rng_status = PRNGTEST_Instantiate(entropy, 256 / PR_BITS_PER_BYTE,
+ NULL, 0, NULL, 0);
+ if (rng_status == SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* we failed with the proper error code, we can continue */
+
+ /********************************************/
+ /* Generate random bytes with a known seed. */
+ /********************************************/
+ rng_status = PRNGTEST_Instantiate(entropy, sizeof entropy,
+ NULL, 0, NULL, 0);
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNGTEST_Instantiate */
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Generate(result, sizeof rng_known_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ (PORT_Memcmp(result, rng_known_result,
+ sizeof rng_known_result) != 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Reseed(reseed_entropy, sizeof reseed_entropy,
+ additional_input, sizeof additional_input);
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNG_Reseed */
+ PRNGTEST_Uninstantiate();
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ (PORT_Memcmp(result, rng_reseed_result,
+ sizeof rng_reseed_result) != 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This magic forces the reseed count to it's max count, so we can see if
+ * PRNGTEST_Generate will actually when it reaches it's count */
+ rng_status = PRNGTEST_Reseed(NULL, 0, NULL, 0);
+ if (rng_status != SECSuccess) {
+ PRNGTEST_Uninstantiate();
+ /* Error set by PRNG_Reseed */
+ return SECFailure;
+ }
+ /* This generate should now reseed */
+ rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ /* NOTE we fail if the result is equal to the no_reseed_result.
+ * no_reseed_result is the value we would have gotten if we didn't
+ * do an automatic reseed in PRNGTEST_Generate */
+ (PORT_Memcmp(result, rng_no_reseed_result,
+ sizeof rng_no_reseed_result) == 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* make sure reseed fails when we don't supply enough entropy */
+ rng_status = PRNGTEST_Reseed(reseed_entropy, 4, NULL, 0);
+ if (rng_status == SECSuccess) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Uninstantiate();
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNG_Uninstantiate */
+ return rng_status;
+ }
+ /* make sure uninstantiate fails if the contest is not initiated (also tests
+ * if the context was cleared in the previous Uninstantiate) */
+ rng_status = PRNGTEST_Uninstantiate();
+ if (rng_status == SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_LIBRARY_FAILURE) {
+ return rng_status;
+ }
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c
new file mode 100644
index 0000000000..b81d9a3700
--- /dev/null
+++ b/security/nss/lib/freebl/dsa.c
@@ -0,0 +1,691 @@
+/*
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "blapit.h"
+#include "mpi.h"
+#include "secmpi.h"
+#include "pqg.h"
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ *
+ * Input: w, 2*qLen bytes
+ * q, qLen bytes
+ * Output: xj, qLen bytes
+ */
+static SECStatus
+fips186Change_ReduceModQForDSA(const PRUint8 *w, const PRUint8 *q,
+ unsigned int qLen, PRUint8 *xj)
+{
+ mp_int W, Q, Xj;
+ mp_err err;
+ SECStatus rv = SECSuccess;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&W) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&Xj) = 0;
+ CHECK_MPI_OK(mp_init(&W));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&Xj));
+ /*
+ * Convert input arguments into MPI integers.
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&W, w, 2 * qLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Q, q, qLen));
+
+ /*
+ * Algorithm 1 of FIPS 186-2 Change Notice 1, Step 3.3
+ *
+ * xj = (w0 || w1) mod q
+ */
+ CHECK_MPI_OK(mp_mod(&W, &Q, &Xj));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Xj, xj, qLen));
+cleanup:
+ mp_clear(&W);
+ mp_clear(&Q);
+ mp_clear(&Xj);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ */
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj)
+{
+ return fips186Change_ReduceModQForDSA(w, q, DSA1_SUBPRIME_LEN, xj);
+}
+
+/*
+ * The core of Algorithm 1 of FIPS 186-2 Change Notice 1.
+ *
+ * We no longer support FIPS 186-2 RNG. This function was exported
+ * for power-up self tests and FIPS tests. Keep this stub, which fails,
+ * to prevent crashes, but also to signal to test code that FIPS 186-2
+ * RNG is no longer supported.
+ */
+SECStatus
+FIPS186Change_GenerateX(PRUint8 *XKEY, const PRUint8 *XSEEDj,
+ PRUint8 *x_j)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
+
+/*
+ * Specialized RNG for DSA
+ *
+ * As per Algorithm 1 of FIPS 186-2 Change Notice 1, in step 3.3 the value
+ * Xj should be reduced mod q, a 160-bit prime number. Since this parameter
+ * is only meaningful in the context of DSA, the above RNG functions
+ * were implemented without it. They are re-implemented below for use
+ * with DSA.
+ */
+
+/*
+** Generate some random bytes, using the global random number generator
+** object. In DSA mode, so there is a q.
+*/
+static SECStatus
+dsa_GenerateGlobalRandomBytes(const SECItem *qItem, PRUint8 *dest,
+ unsigned int *destLen, unsigned int maxDestLen)
+{
+ SECStatus rv;
+ SECItem w;
+ const PRUint8 *q = qItem->data;
+ unsigned int qLen = qItem->len;
+
+ if (*q == 0) {
+ ++q;
+ --qLen;
+ }
+ if (maxDestLen < qLen) {
+ /* This condition can occur when DSA_SignDigest is passed a group
+ with a subprime that is larger than DSA_MAX_SUBPRIME_LEN. */
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ w.data = NULL; /* otherwise SECITEM_AllocItem asserts */
+ if (!SECITEM_AllocItem(NULL, &w, 2 * qLen)) {
+ return SECFailure;
+ }
+ *destLen = qLen;
+
+ rv = RNG_GenerateGlobalRandomBytes(w.data, w.len);
+ if (rv == SECSuccess) {
+ rv = fips186Change_ReduceModQForDSA(w.data, q, qLen, dest);
+ }
+
+ SECITEM_FreeItem(&w, PR_FALSE);
+ return rv;
+}
+
+static void
+translate_mpi_error(mp_err err)
+{
+ MP_TO_SEC_ERROR(err);
+}
+
+static SECStatus
+dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed,
+ DSAPrivateKey **privKey)
+{
+ mp_int p, g;
+ mp_int x, y;
+ mp_err err;
+ PLArenaPool *arena;
+ DSAPrivateKey *key;
+ /* Check args. */
+ if (!params || !privKey || !seed || !seed->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* Initialize an arena for the DSA key. */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ key = (DSAPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DSAPrivateKey));
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ key->params.arena = arena;
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&y) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&y));
+ /* Copy over the PQG params */
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.prime,
+ &params->prime));
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.subPrime,
+ &params->subPrime));
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.base, &params->base));
+ /* Convert stored p, g, and received x into MPI integers. */
+ SECITEM_TO_MPINT(params->prime, &p);
+ SECITEM_TO_MPINT(params->base, &g);
+ OCTETS_TO_MPINT(seed->data, &x, seed->len);
+ /* Store x in private key */
+ SECITEM_AllocItem(arena, &key->privateValue, seed->len);
+ PORT_Memcpy(key->privateValue.data, seed->data, seed->len);
+ /* Compute public key y = g**x mod p */
+ CHECK_MPI_OK(mp_exptmod(&g, &x, &p, &y));
+ /* Store y in public key */
+ MPINT_TO_SECITEM(&y, &key->publicValue, arena);
+ *privKey = key;
+ key = NULL;
+cleanup:
+ mp_clear(&p);
+ mp_clear(&g);
+ mp_clear(&x);
+ mp_clear(&y);
+ if (key) {
+ PORT_FreeArena(key->params.arena, PR_TRUE);
+ }
+ if (err) {
+ translate_mpi_error(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+ int retries = 10;
+ unsigned int i;
+ PRBool good;
+
+ if (q == NULL || q->data == NULL || q->len == 0 ||
+ (q->data[0] == 0 && q->len == 1)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (!SECITEM_AllocItem(arena, seed, q->len)) {
+ return SECFailure;
+ }
+
+ do {
+ /* Generate seed bytes for x according to FIPS 186-1 appendix 3 */
+ if (dsa_GenerateGlobalRandomBytes(q, seed->data, &seed->len,
+ seed->len)) {
+ goto loser;
+ }
+ /* Disallow values of 0 and 1 for x. */
+ good = PR_FALSE;
+ for (i = 0; i < seed->len - 1; i++) {
+ if (seed->data[i] != 0) {
+ good = PR_TRUE;
+ break;
+ }
+ }
+ if (!good && seed->data[i] > 1) {
+ good = PR_TRUE;
+ }
+ } while (!good && --retries > 0);
+
+ if (!good) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ loser:
+ if (arena != NULL) {
+ SECITEM_ZfreeItem(seed, PR_FALSE);
+ }
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/*
+** Generate and return a new DSA public and private key pair,
+** both of which are encoded into a single DSAPrivateKey struct.
+** "params" is a pointer to the PQG parameters for the domain
+** Uses a random seed.
+*/
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+ SECItem seed;
+ SECStatus rv;
+
+ rv = PQG_Check(params);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ seed.data = NULL;
+
+ rv = DSA_NewRandom(NULL, &params->subPrime, &seed);
+ if (rv == SECSuccess) {
+ if (seed.len != PQG_GetLength(&params->subPrime)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ } else {
+ rv = dsa_NewKeyExtended(params, &seed, privKey);
+ }
+ }
+ SECITEM_ZfreeItem(&seed, PR_FALSE);
+ return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly the size of subPrime */
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey)
+{
+ SECItem seedItem;
+ seedItem.data = (unsigned char *)seed;
+ seedItem.len = PQG_GetLength(&params->subPrime);
+ return dsa_NewKeyExtended(params, &seedItem, privKey);
+}
+
+static SECStatus
+dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
+ const unsigned char *kbytes)
+{
+ mp_int p, q, g; /* PQG parameters */
+ mp_int x, k; /* private key & pseudo-random integer */
+ mp_int r, s; /* tuple (r, s) is signature) */
+ mp_int t; /* holding tmp values */
+ mp_int ar; /* holding blinding values */
+ mp_digit fuzz; /* blinding multiplier for q */
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ unsigned int dsa_subprime_len, dsa_signature_len, offset;
+ SECItem localDigest;
+ unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+ SECItem t2 = { siBuffer, NULL, 0 };
+
+ /* FIPS-compliance dictates that digest is a SHA hash. */
+ /* Check args. */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ dsa_signature_len = dsa_subprime_len * 2;
+ if ((signature->len < dsa_signature_len) ||
+ (digest->len > HASH_LENGTH_MAX) ||
+ (digest->len < SHA1_LENGTH)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* DSA accepts digests not equal to dsa_subprime_len, if the
+ * digests are greater, then they are truncated to the size of
+ * dsa_subprime_len, using the left most bits. If they are less
+ * then they are padded on the left.*/
+ PORT_Memset(localDigestData, 0, dsa_subprime_len);
+ offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+ PORT_Memcpy(localDigestData + offset, digest->data,
+ dsa_subprime_len - offset);
+ localDigest.data = localDigestData;
+ localDigest.len = dsa_subprime_len;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&ar) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&ar));
+
+ /*
+ ** Convert stored PQG and private key into MPI integers.
+ */
+ SECITEM_TO_MPINT(key->params.prime, &p);
+ SECITEM_TO_MPINT(key->params.subPrime, &q);
+ SECITEM_TO_MPINT(key->params.base, &g);
+ SECITEM_TO_MPINT(key->privateValue, &x);
+ OCTETS_TO_MPINT(kbytes, &k, dsa_subprime_len);
+
+ /* k blinding create a single value that has the high bit set in
+ * the mp_digit*/
+ if (RNG_GenerateGlobalRandomBytes(&fuzz, sizeof(mp_digit)) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ fuzz |= 1ULL << ((sizeof(mp_digit) * PR_BITS_PER_BYTE - 1));
+ /*
+ ** FIPS 186-1, Section 5, Step 1
+ **
+ ** r = (g**k mod p) mod q
+ */
+ CHECK_MPI_OK(mp_mul_d(&q, fuzz, &t)); /* t = q*fuzz */
+ CHECK_MPI_OK(mp_add(&k, &t, &t)); /* t = k+q*fuzz */
+ /* length of t is now fixed, bits in k have been blinded */
+ CHECK_MPI_OK(mp_exptmod(&g, &t, &p, &r)); /* r = g**t mod p */
+ /* r is now g**(k+q*fuzz) == g**k mod p */
+ CHECK_MPI_OK(mp_mod(&r, &q, &r)); /* r = r mod q */
+ /* make sure fuzz is cleared off the stack and not optimized away */
+ *(volatile mp_digit *)&fuzz = 0;
+
+ /*
+ ** FIPS 186-1, Section 5, Step 2
+ **
+ ** s = (k**-1 * (HASH(M) + x*r)) mod q
+ */
+ if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */
+ SECITEM_ZfreeItem(&t2, PR_FALSE);
+ if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(t2, &ar); /* ar <-$ Zq */
+ SECITEM_ZfreeItem(&t2, PR_FALSE);
+
+ /* Using mp_invmod on k directly would leak bits from k. */
+ CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+ /* k is now k*t*ar */
+ CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */
+ /* k is now (k*t*ar)**-1 */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+ /* k is now (k*ar)**-1 */
+ SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */
+ /* To avoid leaking secret bits here the addition is blinded. */
+ CHECK_MPI_OK(mp_mul(&x, &ar, &x)); /* x = x * ar */
+ /* x is now x*ar */
+ CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
+ /* x is now x*r*ar */
+ CHECK_MPI_OK(mp_mulmod(&s, &ar, &q, &t)); /* t = s * ar mod q */
+ /* t is now hash(M)*ar */
+ CHECK_MPI_OK(mp_add(&t, &x, &s)); /* s = t + x */
+ /* s is now (HASH(M)+x*r)*ar */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
+ /* s is now (HASH(M)+x*r)*ar*(k*ar)**-1 = (k**-1)*(HASH(M)+x*r) */
+
+ /*
+ ** verify r != 0 and s != 0
+ ** mentioned as optional in FIPS 186-1.
+ */
+ if (mp_cmp_z(&r) == 0 || mp_cmp_z(&s) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /*
+ ** Step 4
+ **
+ ** Signature is tuple (r, s)
+ */
+ err = mp_to_fixlen_octets(&r, signature->data, dsa_subprime_len);
+ if (err < 0)
+ goto cleanup;
+ err = mp_to_fixlen_octets(&s, signature->data + dsa_subprime_len,
+ dsa_subprime_len);
+ if (err < 0)
+ goto cleanup;
+ err = MP_OKAY;
+ signature->len = dsa_signature_len;
+cleanup:
+ PORT_Memset(localDigestData, 0, DSA_MAX_SUBPRIME_LEN);
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&x);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&s);
+ mp_clear(&t);
+ mp_clear(&ar);
+ if (err) {
+ translate_mpi_error(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* signature is caller-supplied buffer of at least 40 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ SECStatus rv;
+ int retries = 10;
+ unsigned char kSeed[DSA_MAX_SUBPRIME_LEN];
+ unsigned int kSeedLen = 0;
+ unsigned int i;
+ unsigned int dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ PRBool good;
+
+ PORT_SetError(0);
+ do {
+ rv = dsa_GenerateGlobalRandomBytes(&key->params.subPrime,
+ kSeed, &kSeedLen, sizeof kSeed);
+ if (rv != SECSuccess)
+ break;
+ if (kSeedLen != dsa_subprime_len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ break;
+ }
+ /* Disallow a value of 0 for k. */
+ good = PR_FALSE;
+ for (i = 0; i < kSeedLen; i++) {
+ if (kSeed[i] != 0) {
+ good = PR_TRUE;
+ break;
+ }
+ }
+ if (!good) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ continue;
+ }
+ rv = dsa_SignDigest(key, signature, digest, kSeed);
+ } while (rv != SECSuccess && PORT_GetError() == SEC_ERROR_NEED_RANDOM &&
+ --retries > 0);
+ PORT_Memset(kSeed, 0, sizeof kSeed);
+ return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed)
+{
+ SECStatus rv;
+ rv = dsa_SignDigest(key, signature, digest, seed);
+ return rv;
+}
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ /* FIPS-compliance dictates that digest is a SHA hash. */
+ mp_int p, q, g; /* PQG parameters */
+ mp_int r_, s_; /* tuple (r', s') is received signature) */
+ mp_int u1, u2, v, w; /* intermediate values used in verification */
+ mp_int y; /* public key */
+ mp_err err;
+ unsigned int dsa_subprime_len, dsa_signature_len, offset;
+ SECItem localDigest;
+ unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+ SECStatus verified = SECFailure;
+
+ /* Check args. */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ dsa_signature_len = dsa_subprime_len * 2;
+ if ((signature->len != dsa_signature_len) ||
+ (digest->len > HASH_LENGTH_MAX) ||
+ (digest->len < SHA1_LENGTH)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* DSA accepts digests not equal to dsa_subprime_len, if the
+ * digests are greater, than they are truncated to the size of
+ * dsa_subprime_len, using the left most bits. If they are less
+ * then they are padded on the left.*/
+ PORT_Memset(localDigestData, 0, dsa_subprime_len);
+ offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+ PORT_Memcpy(localDigestData + offset, digest->data,
+ dsa_subprime_len - offset);
+ localDigest.data = localDigestData;
+ localDigest.len = dsa_subprime_len;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&y) = 0;
+ MP_DIGITS(&r_) = 0;
+ MP_DIGITS(&s_) = 0;
+ MP_DIGITS(&u1) = 0;
+ MP_DIGITS(&u2) = 0;
+ MP_DIGITS(&v) = 0;
+ MP_DIGITS(&w) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&y));
+ CHECK_MPI_OK(mp_init(&r_));
+ CHECK_MPI_OK(mp_init(&s_));
+ CHECK_MPI_OK(mp_init(&u1));
+ CHECK_MPI_OK(mp_init(&u2));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_MPI_OK(mp_init(&w));
+ /*
+ ** Convert stored PQG and public key into MPI integers.
+ */
+ SECITEM_TO_MPINT(key->params.prime, &p);
+ SECITEM_TO_MPINT(key->params.subPrime, &q);
+ SECITEM_TO_MPINT(key->params.base, &g);
+ SECITEM_TO_MPINT(key->publicValue, &y);
+ /*
+ ** Convert received signature (r', s') into MPI integers.
+ */
+ OCTETS_TO_MPINT(signature->data, &r_, dsa_subprime_len);
+ OCTETS_TO_MPINT(signature->data + dsa_subprime_len, &s_, dsa_subprime_len);
+ /*
+ ** Verify that 0 < r' < q and 0 < s' < q
+ */
+ if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+ mp_cmp(&r_, &q) >= 0 || mp_cmp(&s_, &q) >= 0) {
+ /* err is zero here. */
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto cleanup; /* will return verified == SECFailure */
+ }
+ /*
+ ** FIPS 186-1, Section 6, Step 1
+ **
+ ** w = (s')**-1 mod q
+ */
+ CHECK_MPI_OK(mp_invmod(&s_, &q, &w)); /* w = (s')**-1 mod q */
+ /*
+ ** FIPS 186-1, Section 6, Step 2
+ **
+ ** u1 = ((Hash(M')) * w) mod q
+ */
+ SECITEM_TO_MPINT(localDigest, &u1); /* u1 = HASH(M') */
+ CHECK_MPI_OK(mp_mulmod(&u1, &w, &q, &u1)); /* u1 = u1 * w mod q */
+ /*
+ ** FIPS 186-1, Section 6, Step 3
+ **
+ ** u2 = ((r') * w) mod q
+ */
+ CHECK_MPI_OK(mp_mulmod(&r_, &w, &q, &u2));
+ /*
+ ** FIPS 186-1, Section 6, Step 4
+ **
+ ** v = ((g**u1 * y**u2) mod p) mod q
+ */
+ CHECK_MPI_OK(mp_exptmod(&g, &u1, &p, &g)); /* g = g**u1 mod p */
+ CHECK_MPI_OK(mp_exptmod(&y, &u2, &p, &y)); /* y = y**u2 mod p */
+ CHECK_MPI_OK(mp_mulmod(&g, &y, &p, &v)); /* v = g * y mod p */
+ CHECK_MPI_OK(mp_mod(&v, &q, &v)); /* v = v mod q */
+ /*
+ ** Verification: v == r'
+ */
+ if (mp_cmp(&v, &r_)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ verified = SECFailure; /* Signature failed to verify. */
+ } else {
+ verified = SECSuccess; /* Signature verified. */
+ }
+cleanup:
+ PORT_Memset(localDigestData, 0, sizeof localDigestData);
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&y);
+ mp_clear(&r_);
+ mp_clear(&s_);
+ mp_clear(&u1);
+ mp_clear(&u2);
+ mp_clear(&v);
+ mp_clear(&w);
+ if (err) {
+ translate_mpi_error(err);
+ }
+ return verified;
+}
diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c
new file mode 100644
index 0000000000..35a848395c
--- /dev/null
+++ b/security/nss/lib/freebl/ec.c
@@ -0,0 +1,1319 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "blapii.h"
+#include "prerr.h"
+#include "secerr.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "mplogic.h"
+#include "ec.h"
+#include "ecl.h"
+#include "verified/Hacl_P384.h"
+#include "verified/Hacl_P521.h"
+#include "secport.h"
+
+#define EC_DOUBLECHECK PR_FALSE
+
+SECStatus
+ec_secp384r1_scalar_validate(const SECItem *scalar)
+{
+ if (!scalar || !scalar->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (scalar->len != 48) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ bool b = Hacl_P384_validate_private_key(scalar->data);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+ec_secp521r1_scalar_validate(const SECItem *scalar)
+{
+ if (!scalar || !scalar->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (scalar->len != 66) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ bool b = Hacl_P521_validate_private_key(scalar->data);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static const ECMethod kMethods[] = {
+ { ECCurve25519,
+ ec_Curve25519_pt_mul,
+ ec_Curve25519_pt_validate,
+ ec_Curve25519_scalar_validate,
+ NULL,
+ NULL },
+ {
+ ECCurve_NIST_P256,
+ ec_secp256r1_pt_mul,
+ ec_secp256r1_pt_validate,
+ ec_secp256r1_scalar_validate,
+ ec_secp256r1_sign_digest,
+ ec_secp256r1_verify_digest,
+ },
+ {
+ ECCurve_NIST_P384,
+ NULL,
+ NULL,
+ ec_secp384r1_scalar_validate,
+ NULL,
+ NULL,
+ },
+ {
+ ECCurve_NIST_P521,
+ NULL,
+ NULL,
+ ec_secp521r1_scalar_validate,
+ NULL,
+ NULL,
+ },
+};
+
+static const ECMethod *
+ec_get_method_from_name(ECCurveName name)
+{
+ unsigned long i;
+ for (i = 0; i < sizeof(kMethods) / sizeof(kMethods[0]); ++i) {
+ if (kMethods[i].name == name) {
+ return &kMethods[i];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Returns true if pointP is the point at infinity, false otherwise
+ */
+PRBool
+ec_point_at_infinity(SECItem *pointP)
+{
+ unsigned int i;
+
+ for (i = 1; i < pointP->len; i++) {
+ if (pointP->data[i] != 0x00)
+ return PR_FALSE;
+ }
+
+ return PR_TRUE;
+}
+
+/*
+ * Computes scalar point multiplication pointQ = k1 * G + k2 * pointP for
+ * the curve whose parameters are encoded in params with base point G.
+ */
+SECStatus
+ec_points_mul(const ECParams *params, const mp_int *k1, const mp_int *k2,
+ const SECItem *pointP, SECItem *pointQ)
+{
+ mp_int Px, Py, Qx, Qy;
+ mp_int Gx, Gy, order, irreducible, a, b;
+ ECGroup *group = NULL;
+ SECStatus rv = SECFailure;
+ mp_err err = MP_OKAY;
+ unsigned int len;
+
+#if EC_DEBUG
+ int i;
+ char mpstr[256];
+
+ printf("ec_points_mul: params [len=%d]:", params->DEREncoding.len);
+ for (i = 0; i < params->DEREncoding.len; i++)
+ printf("%02x:", params->DEREncoding.data[i]);
+ printf("\n");
+
+ if (k1 != NULL) {
+ mp_tohex((mp_int *)k1, mpstr);
+ printf("ec_points_mul: scalar k1: %s\n", mpstr);
+ mp_todecimal((mp_int *)k1, mpstr);
+ printf("ec_points_mul: scalar k1: %s (dec)\n", mpstr);
+ }
+
+ if (k2 != NULL) {
+ mp_tohex((mp_int *)k2, mpstr);
+ printf("ec_points_mul: scalar k2: %s\n", mpstr);
+ mp_todecimal((mp_int *)k2, mpstr);
+ printf("ec_points_mul: scalar k2: %s (dec)\n", mpstr);
+ }
+
+ if (pointP != NULL) {
+ printf("ec_points_mul: pointP [len=%d]:", pointP->len);
+ for (i = 0; i < pointP->len; i++)
+ printf("%02x:", pointP->data[i]);
+ printf("\n");
+ }
+#endif
+
+ /* NOTE: We only support uncompressed points for now */
+ len = (((unsigned int)params->fieldID.size) + 7) >> 3;
+ if (pointP != NULL) {
+ if ((pointP->data[0] != EC_POINT_FORM_UNCOMPRESSED) ||
+ (pointP->len != (2 * len + 1))) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ return SECFailure;
+ };
+ }
+
+ MP_DIGITS(&Px) = 0;
+ MP_DIGITS(&Py) = 0;
+ MP_DIGITS(&Qx) = 0;
+ MP_DIGITS(&Qy) = 0;
+ MP_DIGITS(&Gx) = 0;
+ MP_DIGITS(&Gy) = 0;
+ MP_DIGITS(&order) = 0;
+ MP_DIGITS(&irreducible) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&b) = 0;
+ CHECK_MPI_OK(mp_init(&Px));
+ CHECK_MPI_OK(mp_init(&Py));
+ CHECK_MPI_OK(mp_init(&Qx));
+ CHECK_MPI_OK(mp_init(&Qy));
+ CHECK_MPI_OK(mp_init(&Gx));
+ CHECK_MPI_OK(mp_init(&Gy));
+ CHECK_MPI_OK(mp_init(&order));
+ CHECK_MPI_OK(mp_init(&irreducible));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&b));
+
+ if ((k2 != NULL) && (pointP != NULL)) {
+ /* Initialize Px and Py */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Px, pointP->data + 1, (mp_size)len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Py, pointP->data + 1 + len, (mp_size)len));
+ }
+
+ /* construct from named params, if possible */
+ if (params->name != ECCurve_noName) {
+ group = ECGroup_fromName(params->name);
+ }
+
+ if (group == NULL)
+ goto cleanup;
+
+ if ((k2 != NULL) && (pointP != NULL)) {
+ CHECK_MPI_OK(ECPoints_mul(group, k1, k2, &Px, &Py, &Qx, &Qy));
+ } else {
+ CHECK_MPI_OK(ECPoints_mul(group, k1, NULL, NULL, NULL, &Qx, &Qy));
+ }
+
+ /* our ECC codes uses large stack variables to store intermediate results,
+ * clear our stack before returning to prevent CSP leakage */
+ BLAPI_CLEAR_STACK(2048)
+
+ /* Construct the SECItem representation of point Q */
+ pointQ->data[0] = EC_POINT_FORM_UNCOMPRESSED;
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Qx, pointQ->data + 1,
+ (mp_size)len));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Qy, pointQ->data + 1 + len,
+ (mp_size)len));
+
+ rv = SECSuccess;
+
+#if EC_DEBUG
+ printf("ec_points_mul: pointQ [len=%d]:", pointQ->len);
+ for (i = 0; i < pointQ->len; i++)
+ printf("%02x:", pointQ->data[i]);
+ printf("\n");
+#endif
+
+cleanup:
+ ECGroup_free(group);
+ mp_clear(&Px);
+ mp_clear(&Py);
+ mp_clear(&Qx);
+ mp_clear(&Qy);
+ mp_clear(&Gx);
+ mp_clear(&Gy);
+ mp_clear(&order);
+ mp_clear(&irreducible);
+ mp_clear(&a);
+ mp_clear(&b);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+ return rv;
+}
+
+/* Generates a new EC key pair. The private key is a supplied
+ * value and the public key is the result of performing a scalar
+ * point multiplication of that value with the curve's base point.
+ */
+SECStatus
+ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey,
+ const unsigned char *privKeyBytes, int privKeyLen)
+{
+ SECStatus rv = SECFailure;
+ PLArenaPool *arena;
+ ECPrivateKey *key;
+ mp_int k;
+ mp_err err = MP_OKAY;
+ int len;
+
+#if EC_DEBUG
+ printf("ec_NewKey called\n");
+#endif
+ MP_DIGITS(&k) = 0;
+
+ if (!ecParams || ecParams->name == ECCurve_noName ||
+ !privKey || !privKeyBytes || privKeyLen <= 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Initialize an arena for the EC key. */
+ if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+ return SECFailure;
+
+ key = (ECPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(ECPrivateKey));
+ if (!key) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+
+ /* Set the version number (SEC 1 section C.4 says it should be 1) */
+ SECITEM_AllocItem(arena, &key->version, 1);
+ key->version.data[0] = 1;
+
+ /* Copy all of the fields from the ECParams argument to the
+ * ECParams structure within the private key.
+ */
+ key->ecParams.arena = arena;
+ key->ecParams.type = ecParams->type;
+ key->ecParams.fieldID.size = ecParams->fieldID.size;
+ key->ecParams.fieldID.type = ecParams->fieldID.type;
+ if (ecParams->fieldID.type == ec_field_GFp ||
+ ecParams->fieldID.type == ec_field_plain) {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.prime,
+ &ecParams->fieldID.u.prime));
+ } else {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.poly,
+ &ecParams->fieldID.u.poly));
+ }
+ key->ecParams.fieldID.k1 = ecParams->fieldID.k1;
+ key->ecParams.fieldID.k2 = ecParams->fieldID.k2;
+ key->ecParams.fieldID.k3 = ecParams->fieldID.k3;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.a,
+ &ecParams->curve.a));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.b,
+ &ecParams->curve.b));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.seed,
+ &ecParams->curve.seed));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.base,
+ &ecParams->base));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.order,
+ &ecParams->order));
+ key->ecParams.cofactor = ecParams->cofactor;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.DEREncoding,
+ &ecParams->DEREncoding));
+ key->ecParams.name = ecParams->name;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curveOID,
+ &ecParams->curveOID));
+
+ SECITEM_AllocItem(arena, &key->publicValue, EC_GetPointSize(ecParams));
+ len = ecParams->order.len;
+ SECITEM_AllocItem(arena, &key->privateValue, len);
+
+ /* Copy private key */
+ if (privKeyLen >= len) {
+ memcpy(key->privateValue.data, privKeyBytes, len);
+ } else {
+ memset(key->privateValue.data, 0, (len - privKeyLen));
+ memcpy(key->privateValue.data + (len - privKeyLen), privKeyBytes, privKeyLen);
+ }
+
+ /* Compute corresponding public key */
+
+ /* Use curve specific code for point multiplication */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->pt_mul == NULL) {
+ /* unknown curve */
+ rv = SECFailure;
+ goto cleanup;
+ }
+ rv = method->pt_mul(&key->publicValue, &key->privateValue, NULL);
+ NSS_DECLASSIFY(key->publicValue.data, key->publicValue.len); /* Declassifying public key to avoid false positive */
+ if (rv != SECSuccess) {
+ goto cleanup;
+ } else {
+ goto done;
+ }
+ }
+
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, key->privateValue.data,
+ (mp_size)len));
+
+ rv = ec_points_mul(ecParams, &k, NULL, NULL, &(key->publicValue));
+ NSS_DECLASSIFY(key->publicValue.data, key->publicValue.len); /* Declassifying public key to avoid false positive */
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+done:
+ *privKey = key;
+
+cleanup:
+ mp_clear(&k);
+ if (rv) {
+ PORT_FreeArena(arena, PR_TRUE);
+ }
+
+#if EC_DEBUG
+ printf("ec_NewKey returning %s\n",
+ (rv == SECSuccess) ? "success" : "failure");
+#endif
+
+ return rv;
+}
+
+/* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+SECStatus
+EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey,
+ const unsigned char *seed, int seedlen)
+{
+ SECStatus rv = SECFailure;
+ rv = ec_NewKey(ecParams, privKey, seed, seedlen);
+ return rv;
+}
+
+/* Generate a random private key using the algorithm A.4.1 or A.4.2 of ANSI X9.62,
+ * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the
+ * random number generator.
+ */
+
+SECStatus
+ec_GenerateRandomPrivateKey(ECParams *ecParams, SECItem *privKey)
+{
+ SECStatus rv = SECFailure;
+
+ unsigned int len = EC_GetScalarSize(ecParams);
+
+ if (privKey->len != len || privKey->data == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->scalar_validate == NULL) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ }
+
+ uint8_t leading_coeff_mask;
+ switch (ecParams->name) {
+ case ECCurve25519:
+ case ECCurve_NIST_P256:
+ case ECCurve_NIST_P384:
+ leading_coeff_mask = 0xff;
+ break;
+ case ECCurve_NIST_P521:
+ leading_coeff_mask = 0x01;
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ }
+
+ /* The rejection sampling method from FIPS 186-5 A.4.2 */
+ int count = 100;
+ do {
+ rv = RNG_GenerateGlobalRandomBytes(privKey->data, len);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+ privKey->data[0] &= leading_coeff_mask;
+ NSS_CLASSIFY(privKey->data, privKey->len);
+ rv = method->scalar_validate(privKey);
+ } while (rv != SECSuccess && --count > 0);
+
+ if (rv != SECSuccess) { // implies count == 0
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ }
+
+ return rv;
+}
+
+/* Generates a new EC key pair. The private key is a random value and
+ * the public key is the result of performing a scalar point multiplication
+ * of that value with the curve's base point.
+ */
+SECStatus
+EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey)
+{
+ SECStatus rv = SECFailure;
+ SECItem privKeyRand = { siBuffer, NULL, 0 };
+
+ if (!ecParams || ecParams->name == ECCurve_noName || !privKey) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ SECITEM_AllocItem(NULL, &privKeyRand, EC_GetScalarSize(ecParams));
+ if (privKeyRand.data == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ rv = ec_GenerateRandomPrivateKey(ecParams, &privKeyRand);
+ if (rv != SECSuccess || privKeyRand.data == NULL)
+ goto cleanup;
+ /* generate public key */
+ CHECK_SEC_OK(ec_NewKey(ecParams, privKey, privKeyRand.data, privKeyRand.len));
+
+cleanup:
+ if (privKeyRand.data) {
+ SECITEM_ZfreeItem(&privKeyRand, PR_FALSE);
+ }
+#if EC_DEBUG
+ printf("EC_NewKey returning %s\n",
+ (rv == SECSuccess) ? "success" : "failure");
+#endif
+
+ return rv;
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. The ECDH primitive when used without the cofactor does
+ * not address small subgroup attacks, which may occur when the
+ * public key is not valid. These attacks can be prevented by
+ * validating the public key before using ECDH.
+ */
+SECStatus
+EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue)
+{
+ mp_int Px, Py;
+ ECGroup *group = NULL;
+ SECStatus rv = SECFailure;
+ mp_err err = MP_OKAY;
+ unsigned int len;
+
+ if (!ecParams || ecParams->name == ECCurve_noName ||
+ !publicValue || !publicValue->len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ return rv;
+ }
+
+ /* Uses curve specific code for point validation. */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->pt_validate == NULL) {
+ /* unknown curve */
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ return rv;
+ }
+ rv = method->pt_validate(publicValue);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ }
+ return rv;
+ }
+
+ /* NOTE: We only support uncompressed points for now */
+ len = (((unsigned int)ecParams->fieldID.size) + 7) >> 3;
+ if (publicValue->data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ return SECFailure;
+ } else if (publicValue->len != (2 * len + 1)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&Px) = 0;
+ MP_DIGITS(&Py) = 0;
+ CHECK_MPI_OK(mp_init(&Px));
+ CHECK_MPI_OK(mp_init(&Py));
+
+ /* Initialize Px and Py */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Px, publicValue->data + 1, (mp_size)len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Py, publicValue->data + 1 + len, (mp_size)len));
+
+ /* construct from named params */
+ group = ECGroup_fromName(ecParams->name);
+ if (group == NULL) {
+ /*
+ * ECGroup_fromName fails if ecParams->name is not a valid
+ * ECCurveName value, or if we run out of memory, or perhaps
+ * for other reasons. Unfortunately if ecParams->name is a
+ * valid ECCurveName value, we don't know what the right error
+ * code should be because ECGroup_fromName doesn't return an
+ * error code to the caller. Set err to MP_UNDEF because
+ * that's what ECGroup_fromName uses internally.
+ */
+ if ((ecParams->name <= ECCurve_noName) ||
+ (ecParams->name >= ECCurve_pastLastCurve)) {
+ err = MP_BADARG;
+ } else {
+ err = MP_UNDEF;
+ }
+ goto cleanup;
+ }
+
+ /* validate public point */
+ if ((err = ECPoint_validate(group, &Px, &Py)) < MP_YES) {
+ if (err == MP_NO) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ rv = SECFailure;
+ err = MP_OKAY; /* don't change the error code */
+ }
+ goto cleanup;
+ }
+
+ rv = SECSuccess;
+
+cleanup:
+ ECGroup_free(group);
+ mp_clear(&Px);
+ mp_clear(&Py);
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Performs an ECDH key derivation by computing the scalar point
+** multiplication of privateValue and publicValue (with or without the
+** cofactor) and returns the x-coordinate of the resulting elliptic
+** curve point in derived secret. If successful, derivedSecret->data
+** is set to the address of the newly allocated buffer containing the
+** derived secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+SECStatus
+ECDH_Derive(SECItem *publicValue,
+ ECParams *ecParams,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret)
+{
+ SECStatus rv = SECFailure;
+ unsigned int len = 0;
+ mp_err err = MP_OKAY;
+
+ if (!publicValue || !publicValue->len ||
+ !ecParams || ecParams->name == ECCurve_noName ||
+ !privateValue || !privateValue->len || !derivedSecret) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ return rv;
+ }
+
+ /*
+ * Make sure the point is on the requested curve to avoid
+ * certain small subgroup attacks.
+ */
+ if (EC_ValidatePublicKey(ecParams, publicValue) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ rv = SECFailure;
+ return rv;
+ }
+
+ /* Perform curve specific multiplication using ECMethod */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method;
+ memset(derivedSecret, 0, sizeof(*derivedSecret));
+ derivedSecret = SECITEM_AllocItem(NULL, derivedSecret, EC_GetScalarSize(ecParams));
+ if (derivedSecret == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ rv = SECFailure;
+ return rv;
+ }
+ method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->pt_validate == NULL ||
+ method->pt_mul == NULL) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ rv = SECFailure;
+ goto done;
+ }
+ rv = method->pt_mul(derivedSecret, privateValue, publicValue);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ }
+ goto done;
+ }
+
+ SECItem pointQ = { siBuffer, NULL, 0 };
+ mp_int k; /* to hold the private value */
+#if EC_DEBUG
+ int i;
+#endif
+
+ /*
+ * We fail if the public value is the point at infinity, since
+ * this produces predictable results.
+ */
+ if (ec_point_at_infinity(publicValue)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&k) = 0;
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ len = (ecParams->fieldID.size + 7) >> 3;
+ pointQ.len = EC_GetPointSize(ecParams);
+ if ((pointQ.data = PORT_Alloc(pointQ.len)) == NULL)
+ goto cleanup;
+
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, privateValue->data,
+ (mp_size)privateValue->len));
+
+ if (withCofactor && (ecParams->cofactor != 1)) {
+ mp_int cofactor;
+ /* multiply k with the cofactor */
+ MP_DIGITS(&cofactor) = 0;
+ CHECK_MPI_OK(mp_init(&cofactor));
+ mp_set(&cofactor, ecParams->cofactor);
+ CHECK_MPI_OK(mp_mul(&k, &cofactor, &k));
+ mp_clear(&cofactor);
+ }
+
+ /* Multiply our private key and peer's public point */
+ if (ec_points_mul(ecParams, NULL, &k, publicValue, &pointQ) != SECSuccess) {
+ goto cleanup;
+ }
+ if (ec_point_at_infinity(&pointQ)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY); /* XXX better error code? */
+ goto cleanup;
+ }
+
+ /* Allocate memory for the derived secret and copy
+ * the x co-ordinate of pointQ into it.
+ */
+ SECITEM_AllocItem(NULL, derivedSecret, len);
+ memcpy(derivedSecret->data, pointQ.data + 1, len);
+
+ rv = SECSuccess;
+
+#if EC_DEBUG
+ printf("derived_secret:\n");
+ for (i = 0; i < derivedSecret->len; i++)
+ printf("%02x:", derivedSecret->data[i]);
+ printf("\n");
+#endif
+
+cleanup:
+ mp_clear(&k);
+
+ if (pointQ.data) {
+ PORT_ZFree(pointQ.data, pointQ.len);
+ }
+
+done:
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ }
+ if (rv != SECSuccess) {
+ SECITEM_ZfreeItem(derivedSecret, PR_FALSE);
+ }
+ return rv;
+}
+
+/* Computes the ECDSA signature (a concatenation of two values r and s)
+ * on the digest using the given key and the random value kb (used in
+ * computing s).
+ */
+
+static SECStatus
+ec_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *kb, const int kblen)
+{
+ SECStatus rv = SECFailure;
+ ECParams *ecParams = NULL;
+ mp_err err = MP_OKAY;
+ int flen = 0; /* length in bytes of the field size */
+ unsigned olen; /* length in bytes of the base point order */
+
+ /* Check args */
+ if (!key || !signature || !digest || !kb || (kblen <= 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ goto done;
+ }
+
+ ecParams = &(key->ecParams);
+ flen = (ecParams->fieldID.size + 7) >> 3;
+ olen = ecParams->order.len;
+ if (signature->data == NULL) {
+ /* a call to get the signature length only */
+ signature->len = 2 * olen;
+ rv = SECSuccess;
+ goto done;
+ }
+ if (signature->len < 2 * olen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ rv = SECFailure;
+ goto done;
+ }
+
+ /* Perform curve specific signature using ECMethod */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->sign_digest == NULL) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ rv = SECFailure;
+ goto done;
+ }
+ rv = method->sign_digest(key, signature, digest, kb, kblen);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ }
+ goto done;
+ }
+
+ mp_int x1;
+ mp_int d, k; /* private key, random integer */
+ mp_int r, s; /* tuple (r, s) is the signature */
+ mp_int t; /* holding tmp values */
+ mp_int n;
+ mp_int ar; /* blinding value */
+ SECItem kGpoint = { siBuffer, NULL, 0 };
+ unsigned char *t2 = NULL;
+ unsigned obits; /* length in bits of the base point order */
+
+#if EC_DEBUG
+ char mpstr[256];
+#endif
+
+ /* Initialize MPI integers. */
+ /* must happen before the first potential call to cleanup */
+ MP_DIGITS(&x1) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&ar) = 0;
+
+ CHECK_MPI_OK(mp_init(&x1));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&ar));
+
+ SECITEM_TO_MPINT(ecParams->order, &n);
+ SECITEM_TO_MPINT(key->privateValue, &d);
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, kblen));
+ /* Make sure k is in the interval [1, n-1] */
+ if ((mp_cmp_z(&k) <= 0) || (mp_cmp(&k, &n) >= 0)) {
+#if EC_DEBUG
+ printf("k is outside [1, n-1]\n");
+ mp_tohex(&k, mpstr);
+ printf("k : %s \n", mpstr);
+ mp_tohex(&n, mpstr);
+ printf("n : %s \n", mpstr);
+#endif
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.3.2, Step 2
+ **
+ ** Compute kG
+ */
+ kGpoint.len = EC_GetPointSize(ecParams);
+ kGpoint.data = PORT_Alloc(kGpoint.len);
+ if ((kGpoint.data == NULL) ||
+ (ec_points_mul(ecParams, &k, NULL, NULL, &kGpoint) != SECSuccess))
+ goto cleanup;
+ NSS_DECLASSIFY(kGpoint.data, kGpoint.len); /* Declassifying the r component */
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 1
+ **
+ ** Extract the x co-ordinate of kG into x1
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&x1, kGpoint.data + 1,
+ (mp_size)flen));
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 2
+ **
+ ** r = x1 mod n NOTE: n is the order of the curve
+ */
+ CHECK_MPI_OK(mp_mod(&x1, &n, &r));
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 3
+ **
+ ** verify r != 0
+ */
+ if (mp_cmp_z(&r) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 4
+ **
+ ** s = (k**-1 * (HASH(M) + d*r)) mod n
+ */
+ SECITEM_TO_MPINT(*digest, &s); /* s = HASH(M) */
+
+ /* In the definition of EC signing, digests are truncated
+ * to the length of n in bits.
+ * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+ CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+ if (digest->len * 8 > obits) {
+ mpl_rsh(&s, &s, digest->len * 8 - obits);
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&n, mpstr);
+ printf("n : %s (dec)\n", mpstr);
+ mp_todecimal(&d, mpstr);
+ printf("d : %s (dec)\n", mpstr);
+ mp_tohex(&x1, mpstr);
+ printf("x1: %s\n", mpstr);
+ mp_todecimal(&s, mpstr);
+ printf("digest: %s (decimal)\n", mpstr);
+ mp_todecimal(&r, mpstr);
+ printf("r : %s (dec)\n", mpstr);
+ mp_tohex(&r, mpstr);
+ printf("r : %s\n", mpstr);
+#endif
+
+ if ((t2 = PORT_Alloc(2 * ecParams->order.len)) == NULL) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */
+ PORT_Memset(t2, 0, 2 * ecParams->order.len);
+ if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_read_unsigned_octets(&ar, t2, 2 * ecParams->order.len)); /* ar <-$ Zn */
+
+ /* Using mp_invmod on k directly would leak bits from k. */
+ CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */
+ NSS_DECLASSIFY(MP_DIGITS(&k), MP_ALLOC(&k) * sizeof(mp_digit)); /* declassifying k here because it is masked by multiplying with ar */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ /* To avoid leaking secret bits here the addition is blinded. */
+ CHECK_MPI_OK(mp_mul(&d, &ar, &t)); /* t = d * ar */
+ NSS_DECLASSIFY(MP_DIGITS(&t), MP_ALLOC(&t) * sizeof(mp_digit)); /* declassifying d here because it is masked by multiplying with ar */
+ CHECK_MPI_OK(mp_mulmod(&t, &r, &n, &d)); /* d = t * r mod n */
+ CHECK_MPI_OK(mp_mulmod(&s, &ar, &n, &t)); /* t = s * ar mod n */
+ CHECK_MPI_OK(mp_add(&t, &d, &s)); /* s = t + d */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */
+
+#if EC_DEBUG
+ mp_todecimal(&s, mpstr);
+ printf("s : %s (dec)\n", mpstr);
+ mp_tohex(&s, mpstr);
+ printf("s : %s\n", mpstr);
+#endif
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 5
+ **
+ ** verify s != 0
+ */
+ if (mp_cmp_z(&s) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ **
+ ** Signature is tuple (r, s)
+ */
+ CHECK_MPI_OK(mp_to_fixlen_octets(&r, signature->data, olen));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&s, signature->data + olen, olen));
+
+ signature->len = 2 * olen;
+ rv = SECSuccess;
+ err = MP_OKAY;
+
+cleanup:
+ mp_clear(&x1);
+ mp_clear(&d);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&s);
+ mp_clear(&n);
+ mp_clear(&t);
+ mp_clear(&ar);
+
+ if (t2) {
+ PORT_ZFree(t2, 2 * ecParams->order.len);
+ }
+
+ if (kGpoint.data) {
+ PORT_ZFree(kGpoint.data, kGpoint.len);
+ }
+
+done:
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+#if EC_DEBUG
+ printf("ECDSA signing with seed %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+
+ return rv;
+}
+
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *kb, const int kblen)
+{
+#if EC_DEBUG || EC_DOUBLECHECK
+ SECItem *signature2 = SECITEM_AllocItem(NULL, NULL, signature->len);
+ SECStatus signSuccess = ec_SignDigestWithSeed(key, signature, digest, kb, kblen);
+ SECStatus signSuccessDouble = ec_SignDigestWithSeed(key, signature2, digest, kb, kblen);
+ int signaturesEqual = NSS_SecureMemcmp(signature->data, signature2->data, signature->len);
+ SECStatus rv;
+
+ if ((signaturesEqual == 0) && (signSuccess == SECSuccess) && (signSuccessDouble == SECSuccess)) {
+ rv = SECSuccess;
+ } else {
+ rv = SECFailure;
+ }
+
+#if EC_DEBUG
+ printf("ECDSA signing with seed %s after signing twice\n", (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+
+ SECITEM_FreeItem(signature2, PR_TRUE);
+ return rv;
+#else
+ return ec_SignDigestWithSeed(key, signature, digest, kb, kblen);
+#endif
+}
+
+/*
+** Computes the ECDSA signature on the digest using the given key
+** and a random seed.
+*/
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ SECStatus rv = SECFailure;
+ SECItem nonceRand = { siBuffer, NULL, 0 };
+
+ if (!key) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Generate random value k */
+ SECITEM_AllocItem(NULL, &nonceRand, EC_GetScalarSize(&key->ecParams));
+ if (nonceRand.data == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ rv = ec_GenerateRandomPrivateKey(&key->ecParams, &nonceRand);
+ if (rv != SECSuccess || nonceRand.data == NULL)
+ goto cleanup;
+
+ /* Generate ECDSA signature with the specified k value */
+ rv = ECDSA_SignDigestWithSeed(key, signature, digest, nonceRand.data, nonceRand.len);
+ NSS_DECLASSIFY(signature->data, signature->len);
+
+cleanup:
+ if (nonceRand.data) {
+ SECITEM_ZfreeItem(&nonceRand, PR_FALSE);
+ }
+
+#if EC_DEBUG
+ printf("ECDSA signing %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+
+ return rv;
+}
+
+/*
+** Checks the signature on the given digest using the key provided.
+**
+** The key argument must represent a valid EC public key (a point on
+** the relevant curve). If it is not a valid point, then the behavior
+** of this function is undefined. In cases where a public key might
+** not be valid, use EC_ValidatePublicKey to check.
+*/
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ SECStatus rv = SECFailure;
+ ECParams *ecParams = NULL;
+ mp_err err = MP_OKAY;
+
+ /* Check args */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ goto done;
+ }
+
+ ecParams = &(key->ecParams);
+
+ /* Perform curve specific signature verification using ECMethod */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->verify_digest == NULL) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ rv = SECFailure;
+ goto done;
+ }
+ rv = method->verify_digest(key, signature, digest);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ }
+ goto done;
+ }
+
+ mp_int r_, s_; /* tuple (r', s') is received signature) */
+ mp_int c, u1, u2, v; /* intermediate values used in verification */
+ mp_int x1;
+ mp_int n;
+ SECItem pointC = { siBuffer, NULL, 0 };
+ int slen; /* length in bytes of a half signature (r or s) */
+ int flen; /* length in bytes of the field size */
+ unsigned olen; /* length in bytes of the base point order */
+ unsigned obits; /* length in bits of the base point order */
+
+#if EC_DEBUG
+ char mpstr[256];
+ printf("ECDSA verification called\n");
+#endif
+
+ /* Initialize MPI integers. */
+ /* must happen before the first potential call to cleanup */
+ MP_DIGITS(&r_) = 0;
+ MP_DIGITS(&s_) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&u1) = 0;
+ MP_DIGITS(&u2) = 0;
+ MP_DIGITS(&x1) = 0;
+ MP_DIGITS(&v) = 0;
+ MP_DIGITS(&n) = 0;
+
+ CHECK_MPI_OK(mp_init(&r_));
+ CHECK_MPI_OK(mp_init(&s_));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&u1));
+ CHECK_MPI_OK(mp_init(&u2));
+ CHECK_MPI_OK(mp_init(&x1));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_MPI_OK(mp_init(&n));
+
+ flen = (ecParams->fieldID.size + 7) >> 3;
+ olen = ecParams->order.len;
+ if (signature->len == 0 || signature->len % 2 != 0 ||
+ signature->len > 2 * olen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ goto cleanup;
+ }
+ slen = signature->len / 2;
+
+ /*
+ * The incoming point has been verified in sftk_handlePublicKeyObject.
+ */
+
+ SECITEM_AllocItem(NULL, &pointC, EC_GetPointSize(ecParams));
+ if (pointC.data == NULL) {
+ goto cleanup;
+ }
+
+ /*
+ ** Convert received signature (r', s') into MPI integers.
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&r_, signature->data, slen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&s_, signature->data + slen, slen));
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Steps 1 and 2
+ **
+ ** Verify that 0 < r' < n and 0 < s' < n
+ */
+ SECITEM_TO_MPINT(ecParams->order, &n);
+ if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+ mp_cmp(&r_, &n) >= 0 || mp_cmp(&s_, &n) >= 0) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto cleanup; /* will return rv == SECFailure */
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 3
+ **
+ ** c = (s')**-1 mod n
+ */
+ CHECK_MPI_OK(mp_invmod(&s_, &n, &c)); /* c = (s')**-1 mod n */
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 4
+ **
+ ** u1 = ((HASH(M')) * c) mod n
+ */
+ SECITEM_TO_MPINT(*digest, &u1); /* u1 = HASH(M) */
+
+ /* In the definition of EC signing, digests are truncated
+ * to the length of n in bits.
+ * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+ CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+ if (digest->len * 8 > obits) { /* u1 = HASH(M') */
+ mpl_rsh(&u1, &u1, digest->len * 8 - obits);
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&r_, mpstr);
+ printf("r_: %s (dec)\n", mpstr);
+ mp_todecimal(&s_, mpstr);
+ printf("s_: %s (dec)\n", mpstr);
+ mp_todecimal(&c, mpstr);
+ printf("c : %s (dec)\n", mpstr);
+ mp_todecimal(&u1, mpstr);
+ printf("digest: %s (dec)\n", mpstr);
+#endif
+
+ CHECK_MPI_OK(mp_mulmod(&u1, &c, &n, &u1)); /* u1 = u1 * c mod n */
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 4
+ **
+ ** u2 = ((r') * c) mod n
+ */
+ CHECK_MPI_OK(mp_mulmod(&r_, &c, &n, &u2));
+
+ /*
+ ** ANSI X9.62, Section 5.4.3, Step 1
+ **
+ ** Compute u1*G + u2*Q
+ ** Here, A = u1.G B = u2.Q and C = A + B
+ ** If the result, C, is the point at infinity, reject the signature
+ */
+ if (ec_points_mul(ecParams, &u1, &u2, &key->publicValue, &pointC) != SECSuccess) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ if (ec_point_at_infinity(&pointC)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&x1, pointC.data + 1, flen));
+
+ /*
+ ** ANSI X9.62, Section 5.4.4, Step 2
+ **
+ ** v = x1 mod n
+ */
+ CHECK_MPI_OK(mp_mod(&x1, &n, &v));
+
+#if EC_DEBUG
+ mp_todecimal(&r_, mpstr);
+ printf("r_: %s (dec)\n", mpstr);
+ mp_todecimal(&v, mpstr);
+ printf("v : %s (dec)\n", mpstr);
+#endif
+
+ /*
+ ** ANSI X9.62, Section 5.4.4, Step 3
+ **
+ ** Verification: v == r'
+ */
+ if (mp_cmp(&v, &r_)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure; /* Signature failed to verify. */
+ } else {
+ rv = SECSuccess; /* Signature verified. */
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&u1, mpstr);
+ printf("u1: %s (dec)\n", mpstr);
+ mp_todecimal(&u2, mpstr);
+ printf("u2: %s (dec)\n", mpstr);
+ mp_tohex(&x1, mpstr);
+ printf("x1: %s\n", mpstr);
+ mp_todecimal(&v, mpstr);
+ printf("v : %s (dec)\n", mpstr);
+#endif
+
+cleanup:
+ mp_clear(&r_);
+ mp_clear(&s_);
+ mp_clear(&c);
+ mp_clear(&u1);
+ mp_clear(&u2);
+ mp_clear(&x1);
+ mp_clear(&v);
+ mp_clear(&n);
+
+ if (pointC.data)
+ SECITEM_ZfreeItem(&pointC, PR_FALSE);
+
+done:
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+#if EC_DEBUG
+ printf("ECDSA verification %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+
+ return rv;
+}
diff --git a/security/nss/lib/freebl/ec.h b/security/nss/lib/freebl/ec.h
new file mode 100644
index 0000000000..f4b8ee7334
--- /dev/null
+++ b/security/nss/lib/freebl/ec.h
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ec_h_
+#define __ec_h_
+
+#define EC_DEBUG 0
+
+#define ANSI_X962_CURVE_OID_TOTAL_LEN 10
+#define SECG_CURVE_OID_TOTAL_LEN 7
+#define PKIX_NEWCURVES_OID_TOTAL_LEN 11
+
+struct ECMethodStr {
+ ECCurveName name;
+ SECStatus (*pt_mul)(SECItem *result, SECItem *scalar, SECItem *point);
+ SECStatus (*pt_validate)(const SECItem *point);
+ SECStatus (*scalar_validate)(const SECItem *scalar);
+ SECStatus (*sign_digest)(ECPrivateKey *key, SECItem *signature, const SECItem *digest, const unsigned char *kb, const unsigned int kblen);
+ SECStatus (*verify_digest)(ECPublicKey *key, const SECItem *signature, const SECItem *digest);
+};
+typedef struct ECMethodStr ECMethod;
+
+#endif /* __ec_h_ */
diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c
new file mode 100644
index 0000000000..24ba11f3b1
--- /dev/null
+++ b/security/nss/lib/freebl/ecdecode.c
@@ -0,0 +1,269 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secoid.h"
+#include "secitem.h"
+#include "secerr.h"
+#include "ec.h"
+#include "ecl-curve.h"
+
+#define CHECK_OK(func) \
+ if (func == NULL) \
+ goto cleanup
+#define CHECK_SEC_OK(func) \
+ if (SECSuccess != (rv = func)) \
+ goto cleanup
+
+/* Copy all of the fields from srcParams into dstParams
+ */
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams)
+{
+ SECStatus rv = SECFailure;
+
+ dstParams->arena = arena;
+ dstParams->type = srcParams->type;
+ dstParams->fieldID.size = srcParams->fieldID.size;
+ dstParams->fieldID.type = srcParams->fieldID.type;
+ if (srcParams->fieldID.type == ec_field_GFp ||
+ srcParams->fieldID.type == ec_field_plain) {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.prime,
+ &srcParams->fieldID.u.prime));
+ } else {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.poly,
+ &srcParams->fieldID.u.poly));
+ }
+ dstParams->fieldID.k1 = srcParams->fieldID.k1;
+ dstParams->fieldID.k2 = srcParams->fieldID.k2;
+ dstParams->fieldID.k3 = srcParams->fieldID.k3;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.a,
+ &srcParams->curve.a));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.b,
+ &srcParams->curve.b));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.seed,
+ &srcParams->curve.seed));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->base,
+ &srcParams->base));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->order,
+ &srcParams->order));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->DEREncoding,
+ &srcParams->DEREncoding));
+ dstParams->name = srcParams->name;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curveOID,
+ &srcParams->curveOID));
+ dstParams->cofactor = srcParams->cofactor;
+
+ return SECSuccess;
+
+cleanup:
+ return SECFailure;
+}
+
+static SECStatus
+gf_populate_params_bytes(ECCurveName name, ECFieldType field_type, ECParams *params)
+{
+ SECStatus rv = SECFailure;
+ const ECCurveBytes *curveParams;
+
+ if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve))
+ goto cleanup;
+ params->name = name;
+ curveParams = ecCurve_map[params->name];
+ CHECK_OK(curveParams);
+ params->fieldID.size = curveParams->size;
+ params->fieldID.type = field_type;
+ if (field_type != ec_field_GFp && field_type != ec_field_plain) {
+ return SECFailure;
+ }
+ params->fieldID.u.prime.len = curveParams->scalarSize;
+ params->fieldID.u.prime.data = (unsigned char *)curveParams->irr;
+ params->curve.a.len = curveParams->scalarSize;
+ params->curve.a.data = (unsigned char *)curveParams->curvea;
+ params->curve.b.len = curveParams->scalarSize;
+ params->curve.b.data = (unsigned char *)curveParams->curveb;
+ params->base.len = curveParams->pointSize;
+ params->base.data = (unsigned char *)curveParams->base;
+ params->order.len = curveParams->scalarSize;
+ params->order.data = (unsigned char *)curveParams->order;
+ params->cofactor = curveParams->cofactor;
+
+ rv = SECSuccess;
+
+cleanup:
+ return rv;
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena, const SECItem *encodedParams,
+ ECParams *params)
+{
+ SECStatus rv = SECFailure;
+ SECOidTag tag;
+ SECItem oid = { siBuffer, NULL, 0 };
+
+#if EC_DEBUG
+ int i;
+
+ printf("Encoded params in EC_DecodeParams: ");
+ for (i = 0; i < encodedParams->len; i++) {
+ printf("%02x:", encodedParams->data[i]);
+ }
+ printf("\n");
+#endif
+
+ if ((encodedParams->len != ANSI_X962_CURVE_OID_TOTAL_LEN) &&
+ (encodedParams->len != SECG_CURVE_OID_TOTAL_LEN) &&
+ (encodedParams->len != PKIX_NEWCURVES_OID_TOTAL_LEN)) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ };
+
+ oid.len = encodedParams->len - 2;
+ oid.data = encodedParams->data + 2;
+ if ((encodedParams->data[0] != SEC_ASN1_OBJECT_ID) ||
+ ((tag = SECOID_FindOIDTag(&oid)) == SEC_OID_UNKNOWN)) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ }
+
+ params->arena = arena;
+ params->cofactor = 0;
+ params->type = ec_params_named;
+ params->name = ECCurve_noName;
+
+ /* Fill out curveOID */
+ params->curveOID.len = oid.len;
+ params->curveOID.data = (unsigned char *)PORT_ArenaAlloc(arena, oid.len);
+ if (params->curveOID.data == NULL)
+ goto cleanup;
+ memcpy(params->curveOID.data, oid.data, oid.len);
+
+#if EC_DEBUG
+ printf("Curve: %s\n", SECOID_FindOIDTagDescription(tag));
+#endif
+
+ switch (tag) {
+ case SEC_OID_ANSIX962_EC_PRIME256V1:
+ /* Populate params for prime256v1 aka secp256r1
+ * (the NIST P-256 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_X9_62_PRIME_256V1,
+ ec_field_plain, params));
+ break;
+
+ case SEC_OID_SECG_EC_SECP384R1:
+ /* Populate params for secp384r1
+ * (the NIST P-384 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_SECG_PRIME_384R1,
+ ec_field_GFp, params));
+ break;
+
+ case SEC_OID_SECG_EC_SECP521R1:
+ /* Populate params for secp521r1
+ * (the NIST P-521 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params_bytes(ECCurve_SECG_PRIME_521R1,
+ ec_field_GFp, params));
+ break;
+
+ case SEC_OID_CURVE25519:
+ /* Populate params for Curve25519 */
+ params->type = ec_params_montgomery_named;
+ CHECK_SEC_OK(gf_populate_params_bytes(ECCurve25519,
+ ec_field_plain,
+ params));
+ break;
+
+ default:
+ break;
+ };
+
+cleanup:
+ if (!params->cofactor) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+#if EC_DEBUG
+ printf("Unrecognized curve, returning NULL params\n");
+#endif
+ }
+
+ return rv;
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams, ECParams **ecparams)
+{
+ PLArenaPool *arena;
+ ECParams *params;
+ SECStatus rv = SECFailure;
+
+ /* Initialize an arena for the ECParams structure */
+ if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+ return SECFailure;
+
+ params = (ECParams *)PORT_ArenaZAlloc(arena, sizeof(ECParams));
+ if (!params) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+
+ /* Copy the encoded params */
+ SECITEM_AllocItem(arena, &(params->DEREncoding),
+ encodedParams->len);
+ memcpy(params->DEREncoding.data, encodedParams->data, encodedParams->len);
+
+ /* Fill out the rest of the ECParams structure based on
+ * the encoded params
+ */
+ rv = EC_FillParams(arena, encodedParams, params);
+ if (rv == SECFailure) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ } else {
+ *ecparams = params;
+ ;
+ return SECSuccess;
+ }
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+ ECCurveName name = params->name;
+ const ECCurveBytes *curveParams;
+
+ if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) ||
+ ((curveParams = ecCurve_map[name]) == NULL)) {
+ /* unknown curve, calculate point size from params. assume standard curves with 2 points
+ * and a point compression indicator byte */
+ int sizeInBytes = (params->fieldID.size + 7) / 8;
+ return sizeInBytes * 2 + 1;
+ }
+ if (name == ECCurve25519) {
+ /* Only X here */
+ return curveParams->scalarSize;
+ }
+ return curveParams->pointSize - 1;
+}
+
+int
+EC_GetScalarSize(const ECParams *params)
+{
+ ECCurveName name = params->name;
+ const ECCurveBytes *curveParams;
+
+ if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) ||
+ ((curveParams = ecCurve_map[name]) == NULL)) {
+ /* unknown curve, calculate scalar size from field size in params */
+ int sizeInBytes = (params->fieldID.size + 7) / 8;
+ return sizeInBytes;
+ }
+ return curveParams->scalarSize;
+}
diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README
new file mode 100644
index 0000000000..2996822c88
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/README
@@ -0,0 +1,163 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+The ECL exposes routines for constructing and converting curve
+parameters for internal use.
+
+
+HEADER FILES
+============
+
+ecl-exp.h - Exports data structures and curve names. For use by code
+that does not have access to mp_ints.
+
+ecl-curve.h - Provides hex encodings (in the form of ECCurveParams
+structs) of standardizes elliptic curve domain parameters and mappings
+from ECCurveName to ECCurveParams. For use by code that does not have
+access to mp_ints.
+
+ecl.h - Interface to constructors for curve parameters and group object,
+and point multiplication operations. Used by higher level algorithms
+(like ECDH and ECDSA) to actually perform elliptic curve cryptography.
+
+ecl-priv.h - Data structures and functions for internal use within the
+library.
+
+ecp.h - Internal header file that contains all functions for point
+arithmetic over prime fields.
+
+DATA STRUCTURES AND TYPES
+=========================
+
+ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic
+curve domain parameters.
+
+ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding
+of elliptic curve domain parameters. Can be generated by a user
+and passed to ECGroup_fromHex or can be generated from a name by
+EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for
+the standardized curves defined by ECCurveName.
+
+ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that
+represents a group of elliptic curve points for a particular set of
+elliptic curve domain parameters. Contains all domain parameters (curve
+a and b, field, base point) as well as pointers to the functions that
+should be used for point arithmetic and the underlying field GFMethod.
+Generated by either ECGroup_fromHex or ECGroup_fromName.
+
+GFMethod (from ecl-priv.h) - Represents a field underlying a set of
+elliptic curve domain parameters. Contains the irreducible that defines
+the field (either the prime or the binary polynomial) as well as
+pointers to the functions that should be used for field arithmetic.
+
+ARITHMETIC FUNCTIONS
+====================
+
+Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul
+or ECPoints_mul (from ecl.h) to do point arithmetic. These functions
+will choose which underlying algorithms to use, based on the ECGroup
+structure.
+
+Point Multiplication
+--------------------
+
+ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers.
+It also provides two implementations for the pts_mul operation -
+ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and
+ec_pts_mul_simul_w2 (which does a simultaneous point multiplication
+using a table with window size 2*2).
+
+ec_naf.c provides an implementation of an algorithm to calculate a
+non-adjacent form of a scalar, minimizing the number of point
+additions that need to be done in a point multiplication.
+
+Point Arithmetic over Prime Fields
+----------------------------------
+
+ecp_aff.c provides point arithmetic using affine coordinates.
+
+ecp_jac.c provides point arithmetic using Jacobian projective
+coordinates and mixed Jacobian-affine coordinates. (Jacobian projective
+coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2,
+y=Y/Z^3).
+
+ecp_jm.c provides point arithmetic using Modified Jacobian
+coordinates and mixed Modified_Jacobian-affine coordinates.
+(Modified Jacobian coordinates represent a point (x, y)
+as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is
+the linear coefficient in the curve defining equation).
+
+ecp_192.c and ecp_224.c provide optimized field arithmetic.
+
+Field Arithmetic
+----------------
+
+ecl_gf.c provides constructors for field objects (GFMethod) with the
+functions GFMethod_cons*. It also provides wrappers around the basic
+field operations.
+
+Prime Field Arithmetic
+----------------------
+
+The mpi library provides the basic prime field arithmetic.
+
+ecp_mont.c provides wrappers around the Montgomery multiplication
+functions from the mpi library and adds encoding and decoding functions.
+It also provides the function to construct a GFMethod object using
+Montgomery multiplication.
+
+ecp_192.c and ecp_224.c provide optimized modular reduction for the
+fields defined by nistp192 and nistp224 primes.
+
+ecl_gf.c provides wrappers around the basic field operations.
+
+Field Encoding
+--------------
+
+By default, field elements are encoded in their basic form. It is
+possible to use an alternative encoding, however. For example, it is
+possible to Montgomery representation of prime field elements and
+take advantage of the fast modular multiplication that Montgomery
+representation provides. The process of converting from basic form to
+Montgomery representation is called field encoding, and the opposite
+process would be field decoding. All internal point operations assume
+that the operands are field encoded as appropriate. By rewiring the
+underlying field arithmetic to perform operations on these encoded
+values, the same overlying point arithmetic operations can be used
+regardless of field representation.
+
+ALGORITHM WIRING
+================
+
+The EC library allows point and field arithmetic algorithms to be
+substituted ("wired-in") on a fine-grained basis. This allows for
+generic algorithms and algorithms that are optimized for a particular
+curve, field, or architecture, to coexist and to be automatically
+selected at runtime.
+
+Wiring Mechanism
+----------------
+
+The ECGroup and GFMethod structure contain pointers to the point and
+field arithmetic functions, respectively, that are to be used in
+operations.
+
+The selection of algorithms to use is handled in the function
+ecgroup_fromNameAndHex in ecl.c.
+
+Default Wiring
+--------------
+
+Curves over prime fields by default use montgomery field arithmetic,
+point multiplication using 5-bit window non-adjacent-form with
+Modified Jacobian coordinates, and 2*2-bit simultaneous point
+multiplication using Jacobian coordinates.
+(Wiring in function ECGroup_consGFp_mont in ecl.c.)
+
+Curves over prime fields that have optimized modular reduction (i.e.,
+secp160r1, nistp192, and nistp224) do not use Montgomery field
+arithmetic. Instead, they use basic field arithmetic with their
+optimized reduction (as in ecp_192.c and ecp_224.c). They
+use the same point multiplication and simultaneous point multiplication
+algorithms as other curves over prime fields.
diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c
new file mode 100644
index 0000000000..c282456809
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_32.c
@@ -0,0 +1,1213 @@
+// The MIT License (MIT)
+//
+// Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/*
+ * Derived from machine-generated code via Fiat-Crypto:
+ * https://github.com/mit-plv/fiat-crypto and https://github.com/briansmith/ring
+ *
+ * The below captures notable changes:
+ *
+ * 1. Convert custom integer types to stdint.h types
+ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "ecl-priv.h"
+
+/* fe means field element. Here the field is \Z/(2^255-19). An element t,
+ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+ * t[3]+2^102 t[4]+...+2^230 t[9].
+ * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
+ * Multiplication and carrying produce fe from fe_loose.
+ */
+typedef struct fe {
+ uint32_t v[10];
+} fe;
+
+/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
+ * Addition and subtraction produce fe_loose from (fe, fe).
+ */
+typedef struct fe_loose {
+ uint32_t v[10];
+} fe_loose;
+
+#define assert_fe(f) \
+ do { \
+ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
+ PORT_Assert(f[_assert_fe_i] <= \
+ ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \
+ } \
+ } while (0)
+
+#define assert_fe_loose(f) \
+ do { \
+ for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
+ PORT_Assert(f[_assert_fe_i] <= \
+ ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \
+ } \
+ } while (0)
+
+/*
+ * The function fiat_25519_subborrowx_u26 is a subtraction with borrow.
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^26
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^26⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x3ffffff]
+ * arg3: [0x0 ~> 0x3ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x3ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_25519_subborrowx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
+{
+ int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+ int8_t x2 = (int8_t)(x1 >> 26);
+ uint32_t x3 = (x1 & UINT32_C(0x3ffffff));
+ *out1 = x3;
+ *out2 = (uint8_t)(0x0 - x2);
+}
+
+/*
+ * The function fiat_25519_subborrowx_u25 is a subtraction with borrow.
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^25
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^25⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x1ffffff]
+ * arg3: [0x0 ~> 0x1ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x1ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_25519_subborrowx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
+{
+ int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+ int8_t x2 = (int8_t)(x1 >> 25);
+ uint32_t x3 = (x1 & UINT32_C(0x1ffffff));
+ *out1 = x3;
+ *out2 = (uint8_t)(0x0 - x2);
+}
+
+/*
+ * The function fiat_25519_addcarryx_u26 is an addition with carry.
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^26
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^26⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x3ffffff]
+ * arg3: [0x0 ~> 0x3ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x3ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_25519_addcarryx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
+{
+ uint32_t x1 = ((arg1 + arg2) + arg3);
+ uint32_t x2 = (x1 & UINT32_C(0x3ffffff));
+ uint8_t x3 = (uint8_t)(x1 >> 26);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_25519_addcarryx_u25 is an addition with carry.
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^25
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^25⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x1ffffff]
+ * arg3: [0x0 ~> 0x1ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x1ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_25519_addcarryx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
+{
+ uint32_t x1 = ((arg1 + arg2) + arg3);
+ uint32_t x2 = (x1 & UINT32_C(0x1ffffff));
+ uint8_t x3 = (uint8_t)(x1 >> 25);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_25519_cmovznz_u32 is a single-word conditional move.
+ * Postconditions:
+ * out1 = (if arg1 = 0 then arg2 else arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffff]
+ * arg3: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ */
+static void
+fiat_25519_cmovznz_u32(uint32_t *out1, uint8_t arg1, uint32_t arg2, uint32_t arg3)
+{
+ uint8_t x1 = (!(!arg1));
+ uint32_t x2 = ((int8_t)(0x0 - x1) & UINT32_C(0xffffffff));
+ uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2));
+ *out1 = x3;
+}
+
+/*
+ * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order.
+ * Postconditions:
+ * eval out1 mod m = bytes_eval arg1 mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ */
+static void
+fiat_25519_from_bytes(uint32_t out1[10], const uint8_t arg1[32])
+{
+ uint32_t x1 = ((uint32_t)(arg1[31]) << 18);
+ uint32_t x2 = ((uint32_t)(arg1[30]) << 10);
+ uint32_t x3 = ((uint32_t)(arg1[29]) << 2);
+ uint32_t x4 = ((uint32_t)(arg1[28]) << 20);
+ uint32_t x5 = ((uint32_t)(arg1[27]) << 12);
+ uint32_t x6 = ((uint32_t)(arg1[26]) << 4);
+ uint32_t x7 = ((uint32_t)(arg1[25]) << 21);
+ uint32_t x8 = ((uint32_t)(arg1[24]) << 13);
+ uint32_t x9 = ((uint32_t)(arg1[23]) << 5);
+ uint32_t x10 = ((uint32_t)(arg1[22]) << 23);
+ uint32_t x11 = ((uint32_t)(arg1[21]) << 15);
+ uint32_t x12 = ((uint32_t)(arg1[20]) << 7);
+ uint32_t x13 = ((uint32_t)(arg1[19]) << 24);
+ uint32_t x14 = ((uint32_t)(arg1[18]) << 16);
+ uint32_t x15 = ((uint32_t)(arg1[17]) << 8);
+ uint8_t x16 = (arg1[16]);
+ uint32_t x17 = ((uint32_t)(arg1[15]) << 18);
+ uint32_t x18 = ((uint32_t)(arg1[14]) << 10);
+ uint32_t x19 = ((uint32_t)(arg1[13]) << 2);
+ uint32_t x20 = ((uint32_t)(arg1[12]) << 19);
+ uint32_t x21 = ((uint32_t)(arg1[11]) << 11);
+ uint32_t x22 = ((uint32_t)(arg1[10]) << 3);
+ uint32_t x23 = ((uint32_t)(arg1[9]) << 21);
+ uint32_t x24 = ((uint32_t)(arg1[8]) << 13);
+ uint32_t x25 = ((uint32_t)(arg1[7]) << 5);
+ uint32_t x26 = ((uint32_t)(arg1[6]) << 22);
+ uint32_t x27 = ((uint32_t)(arg1[5]) << 14);
+ uint32_t x28 = ((uint32_t)(arg1[4]) << 6);
+ uint32_t x29 = ((uint32_t)(arg1[3]) << 24);
+ uint32_t x30 = ((uint32_t)(arg1[2]) << 16);
+ uint32_t x31 = ((uint32_t)(arg1[1]) << 8);
+ uint8_t x32 = (arg1[0]);
+ uint32_t x33 = (x32 + (x31 + (x30 + x29)));
+ uint8_t x34 = (uint8_t)(x33 >> 26);
+ uint32_t x35 = (x33 & UINT32_C(0x3ffffff));
+ uint32_t x36 = (x3 + (x2 + x1));
+ uint32_t x37 = (x6 + (x5 + x4));
+ uint32_t x38 = (x9 + (x8 + x7));
+ uint32_t x39 = (x12 + (x11 + x10));
+ uint32_t x40 = (x16 + (x15 + (x14 + x13)));
+ uint32_t x41 = (x19 + (x18 + x17));
+ uint32_t x42 = (x22 + (x21 + x20));
+ uint32_t x43 = (x25 + (x24 + x23));
+ uint32_t x44 = (x28 + (x27 + x26));
+ uint32_t x45 = (x34 + x44);
+ uint8_t x46 = (uint8_t)(x45 >> 25);
+ uint32_t x47 = (x45 & UINT32_C(0x1ffffff));
+ uint32_t x48 = (x46 + x43);
+ uint8_t x49 = (uint8_t)(x48 >> 26);
+ uint32_t x50 = (x48 & UINT32_C(0x3ffffff));
+ uint32_t x51 = (x49 + x42);
+ uint8_t x52 = (uint8_t)(x51 >> 25);
+ uint32_t x53 = (x51 & UINT32_C(0x1ffffff));
+ uint32_t x54 = (x52 + x41);
+ uint32_t x55 = (x54 & UINT32_C(0x3ffffff));
+ uint8_t x56 = (uint8_t)(x40 >> 25);
+ uint32_t x57 = (x40 & UINT32_C(0x1ffffff));
+ uint32_t x58 = (x56 + x39);
+ uint8_t x59 = (uint8_t)(x58 >> 26);
+ uint32_t x60 = (x58 & UINT32_C(0x3ffffff));
+ uint32_t x61 = (x59 + x38);
+ uint8_t x62 = (uint8_t)(x61 >> 25);
+ uint32_t x63 = (x61 & UINT32_C(0x1ffffff));
+ uint32_t x64 = (x62 + x37);
+ uint8_t x65 = (uint8_t)(x64 >> 26);
+ uint32_t x66 = (x64 & UINT32_C(0x3ffffff));
+ uint32_t x67 = (x65 + x36);
+ out1[0] = x35;
+ out1[1] = x47;
+ out1[2] = x50;
+ out1[3] = x53;
+ out1[4] = x55;
+ out1[5] = x57;
+ out1[6] = x60;
+ out1[7] = x63;
+ out1[8] = x66;
+ out1[9] = x67;
+}
+
+static void
+fe_frombytes_strict(fe *h, const uint8_t s[32])
+{
+ // |fiat_25519_from_bytes| requires the top-most bit be clear.
+ PORT_Assert((s[31] & 0x80) == 0);
+ fiat_25519_from_bytes(h->v, s);
+ assert_fe(h->v);
+}
+
+static inline void
+fe_frombytes(fe *h, const uint8_t *s)
+{
+ uint8_t s_copy[32];
+ memcpy(s_copy, s, 32);
+ s_copy[31] &= 0x7f;
+ fe_frombytes_strict(h, s_copy);
+}
+
+/*
+ * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order.
+ * Postconditions:
+ * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
+ */
+static void
+fiat_25519_to_bytes(uint8_t out1[32], const uint32_t arg1[10])
+{
+ uint32_t x1;
+ uint8_t x2;
+ fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), UINT32_C(0x3ffffed));
+ uint32_t x3;
+ uint8_t x4;
+ fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff));
+ uint32_t x5;
+ uint8_t x6;
+ fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff));
+ uint32_t x7;
+ uint8_t x8;
+ fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff));
+ uint32_t x9;
+ uint8_t x10;
+ fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), UINT32_C(0x3ffffff));
+ uint32_t x11;
+ uint8_t x12;
+ fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), UINT32_C(0x1ffffff));
+ uint32_t x13;
+ uint8_t x14;
+ fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), UINT32_C(0x3ffffff));
+ uint32_t x15;
+ uint8_t x16;
+ fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), UINT32_C(0x1ffffff));
+ uint32_t x17;
+ uint8_t x18;
+ fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), UINT32_C(0x3ffffff));
+ uint32_t x19;
+ uint8_t x20;
+ fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), UINT32_C(0x1ffffff));
+ uint32_t x21;
+ fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff));
+ uint32_t x22;
+ uint8_t x23;
+ fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, (x21 & UINT32_C(0x3ffffed)));
+ uint32_t x24;
+ uint8_t x25;
+ fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, (x21 & UINT32_C(0x1ffffff)));
+ uint32_t x26;
+ uint8_t x27;
+ fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, (x21 & UINT32_C(0x3ffffff)));
+ uint32_t x28;
+ uint8_t x29;
+ fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, (x21 & UINT32_C(0x1ffffff)));
+ uint32_t x30;
+ uint8_t x31;
+ fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, (x21 & UINT32_C(0x3ffffff)));
+ uint32_t x32;
+ uint8_t x33;
+ fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, (x21 & UINT32_C(0x1ffffff)));
+ uint32_t x34;
+ uint8_t x35;
+ fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, (x21 & UINT32_C(0x3ffffff)));
+ uint32_t x36;
+ uint8_t x37;
+ fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, (x21 & UINT32_C(0x1ffffff)));
+ uint32_t x38;
+ uint8_t x39;
+ fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, (x21 & UINT32_C(0x3ffffff)));
+ uint32_t x40;
+ uint8_t x41;
+ fiat_25519_addcarryx_u25(&x40, &x41, x39, x19, (x21 & UINT32_C(0x1ffffff)));
+ uint32_t x42 = (x40 << 6);
+ uint32_t x43 = (x38 << 4);
+ uint32_t x44 = (x36 << 3);
+ uint32_t x45 = (x34 * (uint32_t)0x2);
+ uint32_t x46 = (x30 << 6);
+ uint32_t x47 = (x28 << 5);
+ uint32_t x48 = (x26 << 3);
+ uint32_t x49 = (x24 << 2);
+ uint32_t x50 = (x22 >> 8);
+ uint8_t x51 = (uint8_t)(x22 & UINT8_C(0xff));
+ uint32_t x52 = (x50 >> 8);
+ uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
+ uint8_t x54 = (uint8_t)(x52 >> 8);
+ uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
+ uint32_t x56 = (x54 + x49);
+ uint32_t x57 = (x56 >> 8);
+ uint8_t x58 = (uint8_t)(x56 & UINT8_C(0xff));
+ uint32_t x59 = (x57 >> 8);
+ uint8_t x60 = (uint8_t)(x57 & UINT8_C(0xff));
+ uint8_t x61 = (uint8_t)(x59 >> 8);
+ uint8_t x62 = (uint8_t)(x59 & UINT8_C(0xff));
+ uint32_t x63 = (x61 + x48);
+ uint32_t x64 = (x63 >> 8);
+ uint8_t x65 = (uint8_t)(x63 & UINT8_C(0xff));
+ uint32_t x66 = (x64 >> 8);
+ uint8_t x67 = (uint8_t)(x64 & UINT8_C(0xff));
+ uint8_t x68 = (uint8_t)(x66 >> 8);
+ uint8_t x69 = (uint8_t)(x66 & UINT8_C(0xff));
+ uint32_t x70 = (x68 + x47);
+ uint32_t x71 = (x70 >> 8);
+ uint8_t x72 = (uint8_t)(x70 & UINT8_C(0xff));
+ uint32_t x73 = (x71 >> 8);
+ uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff));
+ uint8_t x75 = (uint8_t)(x73 >> 8);
+ uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff));
+ uint32_t x77 = (x75 + x46);
+ uint32_t x78 = (x77 >> 8);
+ uint8_t x79 = (uint8_t)(x77 & UINT8_C(0xff));
+ uint32_t x80 = (x78 >> 8);
+ uint8_t x81 = (uint8_t)(x78 & UINT8_C(0xff));
+ uint8_t x82 = (uint8_t)(x80 >> 8);
+ uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff));
+ uint8_t x84 = (uint8_t)(x82 & UINT8_C(0xff));
+ uint32_t x85 = (x32 >> 8);
+ uint8_t x86 = (uint8_t)(x32 & UINT8_C(0xff));
+ uint32_t x87 = (x85 >> 8);
+ uint8_t x88 = (uint8_t)(x85 & UINT8_C(0xff));
+ uint8_t x89 = (uint8_t)(x87 >> 8);
+ uint8_t x90 = (uint8_t)(x87 & UINT8_C(0xff));
+ uint32_t x91 = (x89 + x45);
+ uint32_t x92 = (x91 >> 8);
+ uint8_t x93 = (uint8_t)(x91 & UINT8_C(0xff));
+ uint32_t x94 = (x92 >> 8);
+ uint8_t x95 = (uint8_t)(x92 & UINT8_C(0xff));
+ uint8_t x96 = (uint8_t)(x94 >> 8);
+ uint8_t x97 = (uint8_t)(x94 & UINT8_C(0xff));
+ uint32_t x98 = (x96 + x44);
+ uint32_t x99 = (x98 >> 8);
+ uint8_t x100 = (uint8_t)(x98 & UINT8_C(0xff));
+ uint32_t x101 = (x99 >> 8);
+ uint8_t x102 = (uint8_t)(x99 & UINT8_C(0xff));
+ uint8_t x103 = (uint8_t)(x101 >> 8);
+ uint8_t x104 = (uint8_t)(x101 & UINT8_C(0xff));
+ uint32_t x105 = (x103 + x43);
+ uint32_t x106 = (x105 >> 8);
+ uint8_t x107 = (uint8_t)(x105 & UINT8_C(0xff));
+ uint32_t x108 = (x106 >> 8);
+ uint8_t x109 = (uint8_t)(x106 & UINT8_C(0xff));
+ uint8_t x110 = (uint8_t)(x108 >> 8);
+ uint8_t x111 = (uint8_t)(x108 & UINT8_C(0xff));
+ uint32_t x112 = (x110 + x42);
+ uint32_t x113 = (x112 >> 8);
+ uint8_t x114 = (uint8_t)(x112 & UINT8_C(0xff));
+ uint32_t x115 = (x113 >> 8);
+ uint8_t x116 = (uint8_t)(x113 & UINT8_C(0xff));
+ uint8_t x117 = (uint8_t)(x115 >> 8);
+ uint8_t x118 = (uint8_t)(x115 & UINT8_C(0xff));
+ out1[0] = x51;
+ out1[1] = x53;
+ out1[2] = x55;
+ out1[3] = x58;
+ out1[4] = x60;
+ out1[5] = x62;
+ out1[6] = x65;
+ out1[7] = x67;
+ out1[8] = x69;
+ out1[9] = x72;
+ out1[10] = x74;
+ out1[11] = x76;
+ out1[12] = x79;
+ out1[13] = x81;
+ out1[14] = x83;
+ out1[15] = x84;
+ out1[16] = x86;
+ out1[17] = x88;
+ out1[18] = x90;
+ out1[19] = x93;
+ out1[20] = x95;
+ out1[21] = x97;
+ out1[22] = x100;
+ out1[23] = x102;
+ out1[24] = x104;
+ out1[25] = x107;
+ out1[26] = x109;
+ out1[27] = x111;
+ out1[28] = x114;
+ out1[29] = x116;
+ out1[30] = x118;
+ out1[31] = x117;
+}
+
+static inline void
+fe_tobytes(uint8_t s[32], const fe *f)
+{
+ assert_fe(f->v);
+ fiat_25519_to_bytes(s, f->v);
+}
+
+/* h = f */
+static inline void
+fe_copy(fe *h, const fe *f)
+{
+ memmove(h, f, sizeof(fe));
+}
+
+static inline void
+fe_copy_lt(fe_loose *h, const fe *f)
+{
+ PORT_Assert(sizeof(fe) == sizeof(fe_loose));
+ memmove(h, f, sizeof(fe));
+}
+
+/*
+ * h = 0
+ */
+static inline void
+fe_0(fe *h)
+{
+ memset(h, 0, sizeof(fe));
+}
+
+/*
+ * h = 1
+ */
+static inline void
+fe_1(fe *h)
+{
+ memset(h, 0, sizeof(fe));
+ h->v[0] = 1;
+}
+/*
+ * The function fiat_25519_add adds two field elements.
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 + eval arg2) mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
+ */
+static void
+fiat_25519_add(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
+{
+ uint32_t x1 = ((arg1[0]) + (arg2[0]));
+ uint32_t x2 = ((arg1[1]) + (arg2[1]));
+ uint32_t x3 = ((arg1[2]) + (arg2[2]));
+ uint32_t x4 = ((arg1[3]) + (arg2[3]));
+ uint32_t x5 = ((arg1[4]) + (arg2[4]));
+ uint32_t x6 = ((arg1[5]) + (arg2[5]));
+ uint32_t x7 = ((arg1[6]) + (arg2[6]));
+ uint32_t x8 = ((arg1[7]) + (arg2[7]));
+ uint32_t x9 = ((arg1[8]) + (arg2[8]));
+ uint32_t x10 = ((arg1[9]) + (arg2[9]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+ out1[6] = x7;
+ out1[7] = x8;
+ out1[8] = x9;
+ out1[9] = x10;
+}
+
+/*
+ * Add two field elements.
+ * h = f + g
+ */
+static inline void
+fe_add(fe_loose *h, const fe *f, const fe *g)
+{
+ assert_fe(f->v);
+ assert_fe(g->v);
+ fiat_25519_add(h->v, f->v, g->v);
+ assert_fe_loose(h->v);
+}
+
+/*
+ * The function fiat_25519_sub subtracts two field elements.
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 - eval arg2) mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ * arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
+ */
+static void
+fiat_25519_sub(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
+{
+ uint32_t x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0]));
+ uint32_t x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1]));
+ uint32_t x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2]));
+ uint32_t x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3]));
+ uint32_t x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4]));
+ uint32_t x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5]));
+ uint32_t x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6]));
+ uint32_t x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7]));
+ uint32_t x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8]));
+ uint32_t x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+ out1[6] = x7;
+ out1[7] = x8;
+ out1[8] = x9;
+ out1[9] = x10;
+}
+
+/*
+ * Subtract two field elements.
+ * h = f - g
+ */
+static void
+fe_sub(fe_loose *h, const fe *f, const fe *g)
+{
+ assert_fe(f->v);
+ assert_fe(g->v);
+ fiat_25519_sub(h->v, f->v, g->v);
+ assert_fe_loose(h->v);
+}
+
+/*
+ * The function fiat_25519_carry_mul multiplies two field elements and reduces the result.
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * eval arg2) mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
+ * arg2: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
+ */
+static void
+fiat_25519_carry_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
+{
+ uint64_t x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
+ uint64_t x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
+ uint64_t x46 = ((uint64_t)(arg1[9]) * (arg2[0]));
+ uint64_t x47 = ((uint64_t)(arg1[8]) * (arg2[1]));
+ uint64_t x48 = ((uint64_t)(arg1[8]) * (arg2[0]));
+ uint64_t x49 = ((uint64_t)(arg1[7]) * (arg2[2]));
+ uint64_t x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * (uint32_t)0x2));
+ uint64_t x51 = ((uint64_t)(arg1[7]) * (arg2[0]));
+ uint64_t x52 = ((uint64_t)(arg1[6]) * (arg2[3]));
+ uint64_t x53 = ((uint64_t)(arg1[6]) * (arg2[2]));
+ uint64_t x54 = ((uint64_t)(arg1[6]) * (arg2[1]));
+ uint64_t x55 = ((uint64_t)(arg1[6]) * (arg2[0]));
+ uint64_t x56 = ((uint64_t)(arg1[5]) * (arg2[4]));
+ uint64_t x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * (uint32_t)0x2));
+ uint64_t x58 = ((uint64_t)(arg1[5]) * (arg2[2]));
+ uint64_t x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * (uint32_t)0x2));
+ uint64_t x60 = ((uint64_t)(arg1[5]) * (arg2[0]));
+ uint64_t x61 = ((uint64_t)(arg1[4]) * (arg2[5]));
+ uint64_t x62 = ((uint64_t)(arg1[4]) * (arg2[4]));
+ uint64_t x63 = ((uint64_t)(arg1[4]) * (arg2[3]));
+ uint64_t x64 = ((uint64_t)(arg1[4]) * (arg2[2]));
+ uint64_t x65 = ((uint64_t)(arg1[4]) * (arg2[1]));
+ uint64_t x66 = ((uint64_t)(arg1[4]) * (arg2[0]));
+ uint64_t x67 = ((uint64_t)(arg1[3]) * (arg2[6]));
+ uint64_t x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * (uint32_t)0x2));
+ uint64_t x69 = ((uint64_t)(arg1[3]) * (arg2[4]));
+ uint64_t x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * (uint32_t)0x2));
+ uint64_t x71 = ((uint64_t)(arg1[3]) * (arg2[2]));
+ uint64_t x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * (uint32_t)0x2));
+ uint64_t x73 = ((uint64_t)(arg1[3]) * (arg2[0]));
+ uint64_t x74 = ((uint64_t)(arg1[2]) * (arg2[7]));
+ uint64_t x75 = ((uint64_t)(arg1[2]) * (arg2[6]));
+ uint64_t x76 = ((uint64_t)(arg1[2]) * (arg2[5]));
+ uint64_t x77 = ((uint64_t)(arg1[2]) * (arg2[4]));
+ uint64_t x78 = ((uint64_t)(arg1[2]) * (arg2[3]));
+ uint64_t x79 = ((uint64_t)(arg1[2]) * (arg2[2]));
+ uint64_t x80 = ((uint64_t)(arg1[2]) * (arg2[1]));
+ uint64_t x81 = ((uint64_t)(arg1[2]) * (arg2[0]));
+ uint64_t x82 = ((uint64_t)(arg1[1]) * (arg2[8]));
+ uint64_t x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * (uint32_t)0x2));
+ uint64_t x84 = ((uint64_t)(arg1[1]) * (arg2[6]));
+ uint64_t x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * (uint32_t)0x2));
+ uint64_t x86 = ((uint64_t)(arg1[1]) * (arg2[4]));
+ uint64_t x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * (uint32_t)0x2));
+ uint64_t x88 = ((uint64_t)(arg1[1]) * (arg2[2]));
+ uint64_t x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * (uint32_t)0x2));
+ uint64_t x90 = ((uint64_t)(arg1[1]) * (arg2[0]));
+ uint64_t x91 = ((uint64_t)(arg1[0]) * (arg2[9]));
+ uint64_t x92 = ((uint64_t)(arg1[0]) * (arg2[8]));
+ uint64_t x93 = ((uint64_t)(arg1[0]) * (arg2[7]));
+ uint64_t x94 = ((uint64_t)(arg1[0]) * (arg2[6]));
+ uint64_t x95 = ((uint64_t)(arg1[0]) * (arg2[5]));
+ uint64_t x96 = ((uint64_t)(arg1[0]) * (arg2[4]));
+ uint64_t x97 = ((uint64_t)(arg1[0]) * (arg2[3]));
+ uint64_t x98 = ((uint64_t)(arg1[0]) * (arg2[2]));
+ uint64_t x99 = ((uint64_t)(arg1[0]) * (arg2[1]));
+ uint64_t x100 = ((uint64_t)(arg1[0]) * (arg2[0]));
+ uint64_t x101 = (x100 + (x45 + (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9)))))))));
+ uint64_t x102 = (x101 >> 26);
+ uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
+ uint64_t x104 = (x91 + (x82 + (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46)))))))));
+ uint64_t x105 = (x92 + (x83 + (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1)))))))));
+ uint64_t x106 = (x93 + (x84 + (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2)))))))));
+ uint64_t x107 = (x94 + (x85 + (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3)))))))));
+ uint64_t x108 = (x95 + (x86 + (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4)))))))));
+ uint64_t x109 = (x96 + (x87 + (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5)))))))));
+ uint64_t x110 = (x97 + (x88 + (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6)))))))));
+ uint64_t x111 = (x98 + (x89 + (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7)))))))));
+ uint64_t x112 = (x99 + (x90 + (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8)))))))));
+ uint64_t x113 = (x102 + x112);
+ uint64_t x114 = (x113 >> 25);
+ uint32_t x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff));
+ uint64_t x116 = (x114 + x111);
+ uint64_t x117 = (x116 >> 26);
+ uint32_t x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff));
+ uint64_t x119 = (x117 + x110);
+ uint64_t x120 = (x119 >> 25);
+ uint32_t x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff));
+ uint64_t x122 = (x120 + x109);
+ uint64_t x123 = (x122 >> 26);
+ uint32_t x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff));
+ uint64_t x125 = (x123 + x108);
+ uint64_t x126 = (x125 >> 25);
+ uint32_t x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff));
+ uint64_t x128 = (x126 + x107);
+ uint64_t x129 = (x128 >> 26);
+ uint32_t x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff));
+ uint64_t x131 = (x129 + x106);
+ uint64_t x132 = (x131 >> 25);
+ uint32_t x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff));
+ uint64_t x134 = (x132 + x105);
+ uint64_t x135 = (x134 >> 26);
+ uint32_t x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff));
+ uint64_t x137 = (x135 + x104);
+ uint64_t x138 = (x137 >> 25);
+ uint32_t x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff));
+ uint64_t x140 = (x138 * (uint64_t)UINT8_C(0x13));
+ uint64_t x141 = (x103 + x140);
+ uint32_t x142 = (uint32_t)(x141 >> 26);
+ uint32_t x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff));
+ uint32_t x144 = (x142 + x115);
+ uint32_t x145 = (x144 >> 25);
+ uint32_t x146 = (x144 & UINT32_C(0x1ffffff));
+ uint32_t x147 = (x145 + x118);
+ out1[0] = x143;
+ out1[1] = x146;
+ out1[2] = x147;
+ out1[3] = x121;
+ out1[4] = x124;
+ out1[5] = x127;
+ out1[6] = x130;
+ out1[7] = x133;
+ out1[8] = x136;
+ out1[9] = x139;
+}
+
+static void
+fe_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
+{
+ assert_fe_loose(arg1);
+ assert_fe_loose(arg2);
+ fiat_25519_carry_mul(out1, arg1, arg2);
+ assert_fe(out1);
+}
+
+static void
+fe_mul_ttt(fe *h, const fe *f, const fe *g)
+{
+ fe_mul(h->v, f->v, g->v);
+}
+
+static void
+fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
+{
+ fe_mul(h->v, f->v, g->v);
+}
+
+static void
+fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
+{
+ fe_mul(h->v, f->v, g->v);
+}
+
+static void
+fe_sq(uint32_t out[10], const uint32_t in1[10])
+{
+ const uint32_t x17 = in1[9];
+ const uint32_t x18 = in1[8];
+ const uint32_t x16 = in1[7];
+ const uint32_t x14 = in1[6];
+ const uint32_t x12 = in1[5];
+ const uint32_t x10 = in1[4];
+ const uint32_t x8 = in1[3];
+ const uint32_t x6 = in1[2];
+ const uint32_t x4 = in1[1];
+ const uint32_t x2 = in1[0];
+ uint64_t x19 = ((uint64_t)x2 * x2);
+ uint64_t x20 = ((uint64_t)(0x2 * x2) * x4);
+ uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6)));
+ uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8)));
+ uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10));
+ uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12)));
+ uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12)));
+ uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16)));
+ uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12))))));
+ uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17)));
+ uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17)))));
+ uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17)));
+ uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17))))));
+ uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17)));
+ uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17)));
+ uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17)));
+ uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17));
+ uint64_t x36 = ((uint64_t)(0x2 * x18) * x17);
+ uint64_t x37 = ((uint64_t)(0x2 * x17) * x17);
+ uint64_t x38 = (x27 + (x37 << 0x4));
+ uint64_t x39 = (x38 + (x37 << 0x1));
+ uint64_t x40 = (x39 + x37);
+ uint64_t x41 = (x26 + (x36 << 0x4));
+ uint64_t x42 = (x41 + (x36 << 0x1));
+ uint64_t x43 = (x42 + x36);
+ uint64_t x44 = (x25 + (x35 << 0x4));
+ uint64_t x45 = (x44 + (x35 << 0x1));
+ uint64_t x46 = (x45 + x35);
+ uint64_t x47 = (x24 + (x34 << 0x4));
+ uint64_t x48 = (x47 + (x34 << 0x1));
+ uint64_t x49 = (x48 + x34);
+ uint64_t x50 = (x23 + (x33 << 0x4));
+ uint64_t x51 = (x50 + (x33 << 0x1));
+ uint64_t x52 = (x51 + x33);
+ uint64_t x53 = (x22 + (x32 << 0x4));
+ uint64_t x54 = (x53 + (x32 << 0x1));
+ uint64_t x55 = (x54 + x32);
+ uint64_t x56 = (x21 + (x31 << 0x4));
+ uint64_t x57 = (x56 + (x31 << 0x1));
+ uint64_t x58 = (x57 + x31);
+ uint64_t x59 = (x20 + (x30 << 0x4));
+ uint64_t x60 = (x59 + (x30 << 0x1));
+ uint64_t x61 = (x60 + x30);
+ uint64_t x62 = (x19 + (x29 << 0x4));
+ uint64_t x63 = (x62 + (x29 << 0x1));
+ uint64_t x64 = (x63 + x29);
+ uint64_t x65 = (x64 >> 0x1a);
+ uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
+ uint64_t x67 = (x65 + x61);
+ uint64_t x68 = (x67 >> 0x19);
+ uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
+ uint64_t x70 = (x68 + x58);
+ uint64_t x71 = (x70 >> 0x1a);
+ uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
+ uint64_t x73 = (x71 + x55);
+ uint64_t x74 = (x73 >> 0x19);
+ uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
+ uint64_t x76 = (x74 + x52);
+ uint64_t x77 = (x76 >> 0x1a);
+ uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
+ uint64_t x79 = (x77 + x49);
+ uint64_t x80 = (x79 >> 0x19);
+ uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
+ uint64_t x82 = (x80 + x46);
+ uint64_t x83 = (x82 >> 0x1a);
+ uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
+ uint64_t x85 = (x83 + x43);
+ uint64_t x86 = (x85 >> 0x19);
+ uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
+ uint64_t x88 = (x86 + x40);
+ uint64_t x89 = (x88 >> 0x1a);
+ uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
+ uint64_t x91 = (x89 + x28);
+ uint64_t x92 = (x91 >> 0x19);
+ uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
+ uint64_t x94 = (x66 + (0x13 * x92));
+ uint32_t x95 = (uint32_t)(x94 >> 0x1a);
+ uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
+ uint32_t x97 = (x95 + x69);
+ uint32_t x98 = (x97 >> 0x19);
+ uint32_t x99 = (x97 & 0x1ffffff);
+ out[0] = x96;
+ out[1] = x99;
+ out[2] = (x98 + x72);
+ out[3] = x75;
+ out[4] = x78;
+ out[5] = x81;
+ out[6] = x84;
+ out[7] = x87;
+ out[8] = x90;
+ out[9] = x93;
+}
+
+static void
+fe_sq_tl(fe *h, const fe_loose *f)
+{
+ fe_sq(h->v, f->v);
+}
+
+static void
+fe_sq_tt(fe *h, const fe *f)
+{
+ fe_sq(h->v, f->v);
+}
+
+static inline void
+fe_loose_invert(fe *out, const fe_loose *z)
+{
+ fe t0, t1, t2, t3;
+ int i;
+
+ fe_sq_tl(&t0, z);
+ fe_sq_tt(&t1, &t0);
+ for (i = 1; i < 2; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_tlt(&t1, z, &t1);
+ fe_mul_ttt(&t0, &t0, &t1);
+ fe_sq_tt(&t2, &t0);
+ fe_mul_ttt(&t1, &t1, &t2);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 20; ++i) {
+ fe_sq_tt(&t3, &t3);
+ }
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 10; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t2, &t1);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t2, &t2, &t1);
+ fe_sq_tt(&t3, &t2);
+ for (i = 1; i < 100; ++i) {
+ fe_sq_tt(&t3, &t3);
+ }
+ fe_mul_ttt(&t2, &t3, &t2);
+ fe_sq_tt(&t2, &t2);
+ for (i = 1; i < 50; ++i) {
+ fe_sq_tt(&t2, &t2);
+ }
+ fe_mul_ttt(&t1, &t2, &t1);
+ fe_sq_tt(&t1, &t1);
+ for (i = 1; i < 5; ++i) {
+ fe_sq_tt(&t1, &t1);
+ }
+ fe_mul_ttt(out, &t1, &t0);
+}
+
+static inline void
+fe_invert(fe *out, const fe *z)
+{
+ fe_loose l;
+ fe_copy_lt(&l, z);
+ fe_loose_invert(out, &l);
+}
+
+/* Replace (f,g) with (g,f) if b == 1;
+ * replace (f,g) with (f,g) if b == 0.
+ *
+ * Preconditions: b in {0,1}
+ */
+static inline void
+fe_cswap(fe *f, fe *g, unsigned int b)
+{
+ PORT_Assert(b < 2);
+ unsigned int i;
+ b = 0 - b;
+ for (i = 0; i < 10; i++) {
+ uint32_t x = f->v[i] ^ g->v[i];
+ x &= b;
+ f->v[i] ^= x;
+ g->v[i] ^= x;
+ }
+}
+
+/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
+static inline void
+fe_mul_121666(uint32_t out[10], const uint32_t in1[10])
+{
+ const uint32_t x20 = in1[9];
+ const uint32_t x21 = in1[8];
+ const uint32_t x19 = in1[7];
+ const uint32_t x17 = in1[6];
+ const uint32_t x15 = in1[5];
+ const uint32_t x13 = in1[4];
+ const uint32_t x11 = in1[3];
+ const uint32_t x9 = in1[2];
+ const uint32_t x7 = in1[1];
+ const uint32_t x5 = in1[0];
+ const uint32_t x38 = 0;
+ const uint32_t x39 = 0;
+ const uint32_t x37 = 0;
+ const uint32_t x35 = 0;
+ const uint32_t x33 = 0;
+ const uint32_t x31 = 0;
+ const uint32_t x29 = 0;
+ const uint32_t x27 = 0;
+ const uint32_t x25 = 0;
+ const uint32_t x23 = 121666;
+ uint64_t x40 = ((uint64_t)x23 * x5);
+ uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
+ uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
+ uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
+ uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
+ uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
+ uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
+ uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
+ uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
+ uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
+ uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
+ uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
+ uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
+ uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
+ uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
+ uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
+ uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
+ uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
+ uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
+ uint64_t x59 = (x48 + (x58 << 0x4));
+ uint64_t x60 = (x59 + (x58 << 0x1));
+ uint64_t x61 = (x60 + x58);
+ uint64_t x62 = (x47 + (x57 << 0x4));
+ uint64_t x63 = (x62 + (x57 << 0x1));
+ uint64_t x64 = (x63 + x57);
+ uint64_t x65 = (x46 + (x56 << 0x4));
+ uint64_t x66 = (x65 + (x56 << 0x1));
+ uint64_t x67 = (x66 + x56);
+ uint64_t x68 = (x45 + (x55 << 0x4));
+ uint64_t x69 = (x68 + (x55 << 0x1));
+ uint64_t x70 = (x69 + x55);
+ uint64_t x71 = (x44 + (x54 << 0x4));
+ uint64_t x72 = (x71 + (x54 << 0x1));
+ uint64_t x73 = (x72 + x54);
+ uint64_t x74 = (x43 + (x53 << 0x4));
+ uint64_t x75 = (x74 + (x53 << 0x1));
+ uint64_t x76 = (x75 + x53);
+ uint64_t x77 = (x42 + (x52 << 0x4));
+ uint64_t x78 = (x77 + (x52 << 0x1));
+ uint64_t x79 = (x78 + x52);
+ uint64_t x80 = (x41 + (x51 << 0x4));
+ uint64_t x81 = (x80 + (x51 << 0x1));
+ uint64_t x82 = (x81 + x51);
+ uint64_t x83 = (x40 + (x50 << 0x4));
+ uint64_t x84 = (x83 + (x50 << 0x1));
+ uint64_t x85 = (x84 + x50);
+ uint64_t x86 = (x85 >> 0x1a);
+ uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
+ uint64_t x88 = (x86 + x82);
+ uint64_t x89 = (x88 >> 0x19);
+ uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
+ uint64_t x91 = (x89 + x79);
+ uint64_t x92 = (x91 >> 0x1a);
+ uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
+ uint64_t x94 = (x92 + x76);
+ uint64_t x95 = (x94 >> 0x19);
+ uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
+ uint64_t x97 = (x95 + x73);
+ uint64_t x98 = (x97 >> 0x1a);
+ uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
+ uint64_t x100 = (x98 + x70);
+ uint64_t x101 = (x100 >> 0x19);
+ uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
+ uint64_t x103 = (x101 + x67);
+ uint64_t x104 = (x103 >> 0x1a);
+ uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
+ uint64_t x106 = (x104 + x64);
+ uint64_t x107 = (x106 >> 0x19);
+ uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
+ uint64_t x109 = (x107 + x61);
+ uint64_t x110 = (x109 >> 0x1a);
+ uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
+ uint64_t x112 = (x110 + x49);
+ uint64_t x113 = (x112 >> 0x19);
+ uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
+ uint64_t x115 = (x87 + (0x13 * x113));
+ uint32_t x116 = (uint32_t)(x115 >> 0x1a);
+ uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
+ uint32_t x118 = (x116 + x90);
+ uint32_t x119 = (x118 >> 0x19);
+ uint32_t x120 = (x118 & 0x1ffffff);
+ out[0] = x117;
+ out[1] = x120;
+ out[2] = (x119 + x93);
+ out[3] = x96;
+ out[4] = x99;
+ out[5] = x102;
+ out[6] = x105;
+ out[7] = x108;
+ out[8] = x111;
+ out[9] = x114;
+}
+
+static void
+fe_mul_121666_tl(fe *h, const fe_loose *f)
+{
+ assert_fe_loose(f->v);
+ fe_mul_121666(h->v, f->v);
+ assert_fe(h->v);
+}
+
+SECStatus
+ec_Curve25519_mul(PRUint8 *out, const PRUint8 *scalar, const PRUint8 *point)
+{
+ fe x1, x2, z2, x3, z3, tmp0, tmp1;
+ fe_loose x2l, z2l, x3l, tmp0l, tmp1l;
+ unsigned int swap = 0;
+ unsigned int b;
+ int pos;
+ uint8_t e[32];
+
+ memcpy(e, scalar, 32);
+ e[0] &= 0xF8;
+ e[31] &= 0x7F;
+ e[31] |= 0x40;
+
+ fe_frombytes(&x1, point);
+ fe_1(&x2);
+ fe_0(&z2);
+ fe_copy(&x3, &x1);
+ fe_1(&z3);
+
+ for (pos = 254; pos >= 0; --pos) {
+ b = e[pos / 8] >> (pos & 7);
+ b &= 1;
+ swap ^= b;
+ fe_cswap(&x2, &x3, swap);
+ fe_cswap(&z2, &z3, swap);
+ swap = b;
+ fe_sub(&tmp0l, &x3, &z3);
+ fe_sub(&tmp1l, &x2, &z2);
+ fe_add(&x2l, &x2, &z2);
+ fe_add(&z2l, &x3, &z3);
+ fe_mul_tll(&z3, &tmp0l, &x2l);
+ fe_mul_tll(&z2, &z2l, &tmp1l);
+ fe_sq_tl(&tmp0, &tmp1l);
+ fe_sq_tl(&tmp1, &x2l);
+ fe_add(&x3l, &z3, &z2);
+ fe_sub(&z2l, &z3, &z2);
+ fe_mul_ttt(&x2, &tmp1, &tmp0);
+ fe_sub(&tmp1l, &tmp1, &tmp0);
+ fe_sq_tl(&z2, &z2l);
+ fe_mul_121666_tl(&z3, &tmp1l);
+ fe_sq_tl(&x3, &x3l);
+ fe_add(&tmp0l, &tmp0, &z3);
+ fe_mul_ttt(&z3, &x1, &z2);
+ fe_mul_tll(&z2, &tmp1l, &tmp0l);
+ }
+
+ fe_cswap(&x2, &x3, swap);
+ fe_cswap(&z2, &z3, swap);
+
+ fe_invert(&z2, &z2);
+ fe_mul_ttt(&x2, &x2, &z2);
+ fe_tobytes(out, &x2);
+
+ memset(x1.v, 0, sizeof(x1));
+ memset(x2.v, 0, sizeof(x2));
+ memset(z2.v, 0, sizeof(z2));
+ memset(x3.v, 0, sizeof(x3));
+ memset(z3.v, 0, sizeof(z3));
+ memset(x2l.v, 0, sizeof(x2l));
+ memset(z2l.v, 0, sizeof(z2l));
+ memset(x3l.v, 0, sizeof(x3l));
+ memset(e, 0, sizeof(e));
+ return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c
new file mode 100644
index 0000000000..e346bdb8b8
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_64.c
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-priv.h"
+
+#if HACL_CAN_COMPILE_INLINE_ASM
+#include "../verified/Hacl_Curve25519_64.h"
+#else
+#include "../verified/Hacl_Curve25519_51.h"
+#endif
+
+SECStatus
+ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint)
+{
+// Note: this cast is safe because HaCl* state has a post-condition that only "mypublic" changed.
+#if defined HACL_CAN_COMPILE_INLINE_ASM
+ Hacl_Curve25519_64_ecdh(mypublic, (uint8_t *)secret, (uint8_t *)basepoint);
+#else
+ Hacl_Curve25519_51_ecdh(mypublic, (uint8_t *)secret, (uint8_t *)basepoint);
+#endif
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c
new file mode 100644
index 0000000000..cad08cb278
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ec_naf.c
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-priv.h"
+
+/* Returns 2^e as an integer. This is meant to be used for small powers of
+ * two. */
+int
+ec_twoTo(int e)
+{
+ int a = 1;
+ int i;
+
+ for (i = 0; i < e; i++) {
+ a *= 2;
+ }
+ return a;
+}
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err
+ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w)
+{
+ mp_int k;
+ mp_err res = MP_OKAY;
+ int i, twowm1, mask;
+
+ twowm1 = ec_twoTo(w - 1);
+ mask = 2 * twowm1 - 1;
+
+ MP_DIGITS(&k) = 0;
+ MP_CHECKOK(mp_init_copy(&k, in));
+
+ i = 0;
+ /* Compute wNAF form */
+ while (mp_cmp_z(&k) > 0) {
+ if (mp_isodd(&k)) {
+ out[i] = MP_DIGIT(&k, 0) & mask;
+ if (out[i] >= twowm1)
+ out[i] -= 2 * twowm1;
+
+ /* Subtract off out[i]. Note mp_sub_d only works with
+ * unsigned digits */
+ if (out[i] >= 0) {
+ MP_CHECKOK(mp_sub_d(&k, out[i], &k));
+ } else {
+ MP_CHECKOK(mp_add_d(&k, -(out[i]), &k));
+ }
+ } else {
+ out[i] = 0;
+ }
+ MP_CHECKOK(mp_div_2(&k, &k));
+ i++;
+ }
+ /* Zero out the remaining elements of the out array. */
+ for (; i < bitsize + 1; i++) {
+ out[i] = 0;
+ }
+CLEANUP:
+ mp_clear(&k);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h
new file mode 100644
index 0000000000..dec3ce387d
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-curve.h
@@ -0,0 +1,245 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-exp.h"
+#include "eclt.h"
+#include <stdlib.h>
+
+#ifndef __ecl_curve_h_
+#define __ecl_curve_h_
+
+/* copied from certt.h */
+#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */
+#define KU_KEY_AGREEMENT (0x08) /* bit 4 */
+
+static const PRUint8 irr256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+static const PRUint8 a256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC };
+static const PRUint8 b256[32] = { 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55,
+ 0x76, 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6,
+ 0x3B, 0xCE, 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B };
+static const PRUint8 x256[32] = { 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5,
+ 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0,
+ 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96 };
+static const PRUint8 y256[32] = { 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A,
+ 0x7C, 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE,
+ 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 };
+static const PRUint8 order256[32] = { 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84,
+ 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51 };
+static const PRUint8 base256[66] = { 0x04, 0x00,
+ 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5,
+ 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0,
+ 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96,
+ 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A,
+ 0x7C, 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE,
+ 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 };
+
+static const ECCurveBytes ecCurve_NIST_P256 = {
+ "NIST-P256", ECField_GFp, 256,
+ irr256, a256, b256, x256, y256, order256, base256,
+ 1, 128, 66, 32,
+ KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const PRUint8 irr384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF };
+static const PRUint8 a384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC };
+static const PRUint8 b384[48] = { 0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B,
+ 0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12,
+ 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D,
+ 0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF };
+static const PRUint8 x384[48] = { 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E,
+ 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,
+ 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,
+ 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7 };
+static const PRUint8 y384[48] = { 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF,
+ 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,
+ 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,
+ 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F };
+static const PRUint8 order384[48] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, 0x58, 0x1A, 0x0D, 0xB2,
+ 0x48, 0xB0, 0xA7, 0x7A, 0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73 };
+static const PRUint8 base384[98] = { 0x04, 0x00,
+ 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E,
+ 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,
+ 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,
+ 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7,
+ 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF,
+ 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,
+ 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,
+ 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F };
+
+static const ECCurveBytes ecCurve_NIST_P384 = {
+ "NIST-P384", ECField_GFp, 384,
+ irr384, a384, b384, x384, y384, order384, base384,
+ 1, 192, 98, 48,
+ KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const PRUint8 irr521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+static const PRUint8 a521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC };
+static const PRUint8 b521[66] = { 0x00, 0x51, 0x95, 0x3E, 0xB9, 0x61, 0x8E, 0x1C, 0x9A, 0x1F, 0x92, 0x9A,
+ 0x21, 0xA0, 0xB6, 0x85, 0x40, 0xEE, 0xA2, 0xDA, 0x72, 0x5B, 0x99, 0xB3,
+ 0x15, 0xF3, 0xB8, 0xB4, 0x89, 0x91, 0x8E, 0xF1, 0x09, 0xE1, 0x56, 0x19,
+ 0x39, 0x51, 0xEC, 0x7E, 0x93, 0x7B, 0x16, 0x52, 0xC0, 0xBD, 0x3B, 0xB1,
+ 0xBF, 0x07, 0x35, 0x73, 0xDF, 0x88, 0x3D, 0x2C, 0x34, 0xF1, 0xEF, 0x45,
+ 0x1F, 0xD4, 0x6B, 0x50, 0x3F, 0x00 };
+static const PRUint8 x521[66] = { 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E, 0x3E,
+ 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F,
+ 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B,
+ 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF,
+ 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9, 0x7E,
+ 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66 };
+static const PRUint8 y521[66] = { 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A,
+ 0x5F, 0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B,
+ 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE,
+ 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD,
+ 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE,
+ 0x94, 0x76, 0x9F, 0xD1, 0x66, 0x50 };
+static const PRUint8 order521[66] = { 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFA, 0x51, 0x86,
+ 0x87, 0x83, 0xBF, 0x2F, 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
+ 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, 0x47, 0xAE, 0xBB, 0x6F,
+ 0xB7, 0x1E, 0x91, 0x38, 0x64, 0x09 };
+static const PRUint8 base521[134] = {
+ 0x04, 0x00,
+ 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, 0xE9, 0xCD, 0x9E, 0x3E,
+ 0xCB, 0x66, 0x23, 0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F,
+ 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, 0x3D, 0xBA, 0xA1, 0x4B,
+ 0x5E, 0x77, 0xEF, 0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF,
+ 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, 0x42, 0x9B, 0xF9, 0x7E,
+ 0x7E, 0x31, 0xC2, 0xE5, 0xBD, 0x66,
+ 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A,
+ 0x5F, 0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B,
+ 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE,
+ 0x72, 0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD,
+ 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE,
+ 0x94, 0x76, 0x9F, 0xD1, 0x66, 0x50
+};
+
+static const ECCurveBytes ecCurve_NIST_P521 = {
+ "NIST-P521", ECField_GFp, 521,
+ irr521, a521, b521, x521, y521, order521, base521,
+ 1, 256, 134, 66,
+ KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const PRUint8 irr25519[32] = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f };
+static const PRUint8 a25519[32] = { 0x06, 0x6d, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static const PRUint8 b25519[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static const PRUint8 x25519[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09 };
+static const PRUint8 y25519[32] = { 0xd9, 0xd3, 0xce, 0x7e, 0xa2, 0xc5, 0xe9, 0x29, 0xb2, 0x61, 0x7c, 0x6d,
+ 0x7e, 0x4d, 0x3d, 0x92, 0x4c, 0xd1, 0x48, 0x77, 0x2c, 0xdd, 0x1e, 0xe0,
+ 0xb4, 0x86, 0xa0, 0xb8, 0xa1, 0x19, 0xae, 0x20 };
+static const PRUint8 order25519[32] = { 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2,
+ 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 };
+static const PRUint8 base25519[66] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+ 0xd9, 0xd3, 0xce, 0x7e, 0xa2, 0xc5, 0xe9, 0x29, 0xb2, 0x61, 0x7c, 0x6d,
+ 0x7e, 0x4d, 0x3d, 0x92, 0x4c, 0xd1, 0x48, 0x77, 0x2c, 0xdd, 0x1e, 0xe0,
+ 0xb4, 0x86, 0xa0, 0xb8, 0xa1, 0x19, 0xae, 0x20, 0x00, 0x04 };
+
+static const ECCurveBytes ecCurve_25519 = {
+ "Curve25519", ECField_GFp, 255,
+ irr25519, a25519, b25519, x25519, y25519, order25519, base25519,
+ 8, 128, 66, 32,
+ KU_KEY_AGREEMENT
+};
+
+/* mapping between ECCurveName enum and pointers to ECCurveParams */
+static const ECCurveBytes *ecCurve_map[] = {
+ NULL, /* ECCurve_noName */
+ NULL, /* ECCurve_NIST_P192 */
+ NULL, /* ECCurve_NIST_P224 */
+ &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */
+ &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */
+ &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */
+ NULL, /* ECCurve_NIST_K163 */
+ NULL, /* ECCurve_NIST_B163 */
+ NULL, /* ECCurve_NIST_K233 */
+ NULL, /* ECCurve_NIST_B233 */
+ NULL, /* ECCurve_NIST_K283 */
+ NULL, /* ECCurve_NIST_B283 */
+ NULL, /* ECCurve_NIST_K409 */
+ NULL, /* ECCurve_NIST_B409 */
+ NULL, /* ECCurve_NIST_K571 */
+ NULL, /* ECCurve_NIST_B571 */
+ NULL, /* ECCurve_X9_62_PRIME_192V2 */
+ NULL, /* ECCurve_X9_62_PRIME_192V3 */
+ NULL, /* ECCurve_X9_62_PRIME_239V1 */
+ NULL, /* ECCurve_X9_62_PRIME_239V2 */
+ NULL, /* ECCurve_X9_62_PRIME_239V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB176V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB208W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB272W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB304W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB359V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB368W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB431R1 */
+ NULL, /* ECCurve_SECG_PRIME_112R1 */
+ NULL, /* ECCurve_SECG_PRIME_112R2 */
+ NULL, /* ECCurve_SECG_PRIME_128R1 */
+ NULL, /* ECCurve_SECG_PRIME_128R2 */
+ NULL, /* ECCurve_SECG_PRIME_160K1 */
+ NULL, /* ECCurve_SECG_PRIME_160R1 */
+ NULL, /* ECCurve_SECG_PRIME_160R2 */
+ NULL, /* ECCurve_SECG_PRIME_192K1 */
+ NULL, /* ECCurve_SECG_PRIME_224K1 */
+ NULL, /* ECCurve_SECG_PRIME_256K1 */
+ NULL, /* ECCurve_SECG_CHAR2_113R1 */
+ NULL, /* ECCurve_SECG_CHAR2_113R2 */
+ NULL, /* ECCurve_SECG_CHAR2_131R1 */
+ NULL, /* ECCurve_SECG_CHAR2_131R2 */
+ NULL, /* ECCurve_SECG_CHAR2_163R1 */
+ NULL, /* ECCurve_SECG_CHAR2_193R1 */
+ NULL, /* ECCurve_SECG_CHAR2_193R2 */
+ NULL, /* ECCurve_SECG_CHAR2_239K1 */
+ NULL, /* ECCurve_WTLS_1 */
+ NULL, /* ECCurve_WTLS_8 */
+ NULL, /* ECCurve_WTLS_9 */
+ &ecCurve_25519, /* ECCurve25519 */
+ NULL /* ECCurve_pastLastCurve */
+};
+
+#endif
diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h
new file mode 100644
index 0000000000..44adb8a1cd
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-exp.h
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_exp_h_
+#define __ecl_exp_h_
+
+/* Curve field type */
+typedef enum {
+ ECField_GFp,
+ ECField_GF2m
+} ECField;
+
+/* Hexadecimal encoding of curve parameters */
+struct ECCurveParamsStr {
+ char *text;
+ ECField field;
+ unsigned int size;
+ char *irr;
+ char *curvea;
+ char *curveb;
+ char *genx;
+ char *geny;
+ char *order;
+ int cofactor;
+ int security;
+ int pointSize;
+ unsigned int usage;
+};
+typedef struct ECCurveParamsStr ECCurveParams;
+
+/* Named curve parameters */
+typedef enum {
+
+ ECCurve_noName = 0,
+
+ /* NIST prime curves */
+ ECCurve_NIST_P192, /* not supported */
+ ECCurve_NIST_P224, /* not supported */
+ ECCurve_NIST_P256,
+ ECCurve_NIST_P384,
+ ECCurve_NIST_P521,
+
+ /* NIST binary curves */
+ ECCurve_NIST_K163, /* not supported */
+ ECCurve_NIST_B163, /* not supported */
+ ECCurve_NIST_K233, /* not supported */
+ ECCurve_NIST_B233, /* not supported */
+ ECCurve_NIST_K283, /* not supported */
+ ECCurve_NIST_B283, /* not supported */
+ ECCurve_NIST_K409, /* not supported */
+ ECCurve_NIST_B409, /* not supported */
+ ECCurve_NIST_K571, /* not supported */
+ ECCurve_NIST_B571, /* not supported */
+
+ /* ANSI X9.62 prime curves */
+ /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */
+ ECCurve_X9_62_PRIME_192V2, /* not supported */
+ ECCurve_X9_62_PRIME_192V3, /* not supported */
+ ECCurve_X9_62_PRIME_239V1, /* not supported */
+ ECCurve_X9_62_PRIME_239V2, /* not supported */
+ ECCurve_X9_62_PRIME_239V3, /* not supported */
+ /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */
+
+ /* ANSI X9.62 binary curves */
+ ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */
+
+ /* SEC2 prime curves */
+ ECCurve_SECG_PRIME_112R1, /* not supported */
+ ECCurve_SECG_PRIME_112R2, /* not supported */
+ ECCurve_SECG_PRIME_128R1, /* not supported */
+ ECCurve_SECG_PRIME_128R2, /* not supported */
+ ECCurve_SECG_PRIME_160K1, /* not supported */
+ ECCurve_SECG_PRIME_160R1, /* not supported */
+ ECCurve_SECG_PRIME_160R2, /* not supported */
+ ECCurve_SECG_PRIME_192K1, /* not supported */
+ /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */
+ ECCurve_SECG_PRIME_224K1, /* not supported */
+ /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */
+ ECCurve_SECG_PRIME_256K1, /* not supported */
+ /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */
+ /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */
+ /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */
+
+ /* SEC2 binary curves */
+ ECCurve_SECG_CHAR2_113R1, /* not supported */
+ ECCurve_SECG_CHAR2_113R2, /* not supported */
+ ECCurve_SECG_CHAR2_131R1, /* not supported */
+ ECCurve_SECG_CHAR2_131R2, /* not supported */
+ /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */
+ ECCurve_SECG_CHAR2_163R1, /* not supported */
+ /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */
+ ECCurve_SECG_CHAR2_193R1, /* not supported */
+ ECCurve_SECG_CHAR2_193R2, /* not supported */
+ /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */
+ /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */
+ ECCurve_SECG_CHAR2_239K1, /* not supported */
+ /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */
+ /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */
+ /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */
+ /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */
+ /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */
+ /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */
+
+ /* WTLS curves */
+ ECCurve_WTLS_1, /* not supported */
+ /* there is no WTLS 2 curve */
+ /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */
+ /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */
+ /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */
+ /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */
+ /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */
+ ECCurve_WTLS_8, /* not supported */
+ ECCurve_WTLS_9, /* not supported */
+ /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */
+ /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */
+ /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */
+
+ ECCurve25519,
+
+ ECCurve_pastLastCurve
+} ECCurveName;
+
+/* Aliased named curves */
+
+#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */
+#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384
+#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521
+#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163 /* not supported */
+#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163 /* not supported */
+#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233 /* not supported */
+#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233 /* not supported */
+#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283 /* not supported */
+#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283 /* not supported */
+#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409 /* not supported */
+#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409 /* not supported */
+#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571 /* not supported */
+#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571 /* not supported */
+#define ECCurve_WTLS_3 ECCurve_NIST_K163 /* not supported */
+#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1 /* not supported */
+#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */
+#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1 /* not supported */
+#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1 /* not supported */
+#define ECCurve_WTLS_10 ECCurve_NIST_K233 /* not supported */
+#define ECCurve_WTLS_11 ECCurve_NIST_B233 /* not supported */
+#define ECCurve_WTLS_12 ECCurve_NIST_P224 /* not supported */
+
+#endif /* __ecl_exp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h
new file mode 100644
index 0000000000..c1e0e856b3
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-priv.h
@@ -0,0 +1,252 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_priv_h_
+#define __ecl_priv_h_
+
+#include "ecl.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "../blapii.h"
+
+/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */
+/* the following needs to go away... */
+#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT)
+#define ECL_SIXTY_FOUR_BIT
+#else
+#define ECL_THIRTY_TWO_BIT
+#endif
+
+#define ECL_CURVE_DIGITS(curve_size_in_bits) \
+ (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8))
+#define ECL_BITS (sizeof(mp_digit) * 8)
+#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit))
+
+/* Gets the i'th bit in the binary representation of a. If i >= length(a),
+ * then return 0. (The above behaviour differs from mpl_get_bit, which
+ * causes an error if i >= length(a).) */
+#define MP_GET_BIT(a, i) \
+ ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i))
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+#define MP_ADD_CARRY(a1, a2, s, carry) \
+ { \
+ mp_word w; \
+ w = ((mp_word)carry) + (a1) + (a2); \
+ s = ACCUM(w); \
+ carry = CARRYOUT(w); \
+ }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow) \
+ { \
+ mp_word w; \
+ w = ((mp_word)(a1)) - (a2)-borrow; \
+ s = ACCUM(w); \
+ borrow = (w >> MP_DIGIT_BIT) & 1; \
+ }
+
+#else
+/* NOTE,
+ * carry and borrow are both read and written.
+ * a1 or a2 and s could be the same variable.
+ * don't trash those outputs until their respective inputs have
+ * been read. */
+#define MP_ADD_CARRY(a1, a2, s, carry) \
+ { \
+ mp_digit tmp, sum; \
+ tmp = (a1); \
+ sum = tmp + (a2); \
+ tmp = (sum < tmp); /* detect overflow */ \
+ s = sum += carry; \
+ carry = tmp + (sum < carry); \
+ }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow) \
+ { \
+ mp_digit tmp; \
+ tmp = (a1); \
+ s = tmp - (a2); \
+ tmp = (s > tmp); /* detect borrow */ \
+ if (borrow && !s--) \
+ tmp++; \
+ borrow = tmp; \
+ }
+#endif
+
+struct GFMethodStr;
+typedef struct GFMethodStr GFMethod;
+struct GFMethodStr {
+ /* Indicates whether the structure was constructed from dynamic memory
+ * or statically created. */
+ int constructed;
+ /* Irreducible that defines the field. For prime fields, this is the
+ * prime p. For binary polynomial fields, this is the bitstring
+ * representation of the irreducible polynomial. */
+ mp_int irr;
+ /* For prime fields, the value irr_arr[0] is the number of bits in the
+ * field. For binary polynomial fields, the irreducible polynomial
+ * f(t) is represented as an array of unsigned int[], where f(t) is
+ * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0]
+ * > p[1] > ... > p[4] = 0. */
+ unsigned int irr_arr[5];
+ /* Field arithmetic methods. All methods (except field_enc and
+ * field_dec) are assumed to take field-encoded parameters and return
+ * field-encoded values. All methods (except field_enc and field_dec)
+ * are required to be implemented. */
+ mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ /* Extra storage for implementation-specific data. Any memory
+ * allocated to these extra fields will be cleared by extra_free. */
+ void *extra1;
+ void *extra2;
+ void (*extra_free)(GFMethod *meth);
+};
+
+/* Construct generic GFMethods. */
+GFMethod *GFMethod_consGFp(const mp_int *irr);
+GFMethod *GFMethod_consGFp_mont(const mp_int *irr);
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void GFMethod_free(GFMethod *meth);
+
+struct ECGroupStr {
+ /* Indicates whether the structure was constructed from dynamic memory
+ * or statically created. */
+ int constructed;
+ /* Field definition and arithmetic. */
+ GFMethod *meth;
+ /* Textual representation of curve name, if any. */
+ char *text;
+ /* Curve parameters, field-encoded. */
+ mp_int curvea, curveb;
+ /* x and y coordinates of the base point, field-encoded. */
+ mp_int genx, geny;
+ /* Order and cofactor of the base point. */
+ mp_int order;
+ int cofactor;
+ /* Point arithmetic methods. All methods are assumed to take
+ * field-encoded parameters and return field-encoded values. All
+ * methods (except base_point_mul and points_mul) are required to be
+ * implemented. */
+ mp_err (*point_add)(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_sub)(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_mul)(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+ mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+ mp_err (*points_mul)(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group);
+ /* Extra storage for implementation-specific data. Any memory
+ * allocated to these extra fields will be cleared by extra_free. */
+ void *extra1;
+ void *extra2;
+ void (*extra_free)(ECGroup *group);
+};
+
+/* Wrapper functions for generic prime field arithmetic. */
+mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+/* fixed length in-line adds. Count is in words */
+mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+/* Montgomery prime field arithmetic. */
+mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+void ec_GFp_extra_free_mont(GFMethod *meth);
+
+/* point multiplication */
+mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in,
+ int w);
+
+/* Optimized field arithmetic */
+mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name);
+
+/* Optimized point multiplication */
+mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_secp384r1(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_secp521r1(ECGroup *group, ECCurveName name);
+
+SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p);
+#endif /* __ecl_priv_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c
new file mode 100644
index 0000000000..e34a73c661
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.c
@@ -0,0 +1,329 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include "ecl-curve.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Allocate memory for a new ECGroup object. */
+ECGroup *
+ECGroup_new()
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group;
+ group = (ECGroup *)malloc(sizeof(ECGroup));
+ if (group == NULL)
+ return NULL;
+ group->constructed = MP_YES;
+ group->meth = NULL;
+ group->text = NULL;
+ MP_DIGITS(&group->curvea) = 0;
+ MP_DIGITS(&group->curveb) = 0;
+ MP_DIGITS(&group->genx) = 0;
+ MP_DIGITS(&group->geny) = 0;
+ MP_DIGITS(&group->order) = 0;
+ group->base_point_mul = NULL;
+ group->points_mul = NULL;
+ group->validate_point = NULL;
+ group->extra1 = NULL;
+ group->extra2 = NULL;
+ group->extra_free = NULL;
+ MP_CHECKOK(mp_init(&group->curvea));
+ MP_CHECKOK(mp_init(&group->curveb));
+ MP_CHECKOK(mp_init(&group->genx));
+ MP_CHECKOK(mp_init(&group->geny));
+ MP_CHECKOK(mp_init(&group->order));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields. */
+ECGroup *
+ECGroup_consGFp(const mp_int *irr, const mp_int *curvea,
+ const mp_int *curveb, const mp_int *genx,
+ const mp_int *geny, const mp_int *order, int cofactor)
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group = NULL;
+
+ group = ECGroup_new();
+ if (group == NULL)
+ return NULL;
+
+ group->meth = GFMethod_consGFp(irr);
+ if (group->meth == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(mp_copy(curvea, &group->curvea));
+ MP_CHECKOK(mp_copy(curveb, &group->curveb));
+ MP_CHECKOK(mp_copy(genx, &group->genx));
+ MP_CHECKOK(mp_copy(geny, &group->geny));
+ MP_CHECKOK(mp_copy(order, &group->order));
+ group->cofactor = cofactor;
+ group->point_add = &ec_GFp_pt_add_aff;
+ group->point_sub = &ec_GFp_pt_sub_aff;
+ group->point_dbl = &ec_GFp_pt_dbl_aff;
+ group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+ group->base_point_mul = NULL;
+ group->points_mul = &ec_GFp_pts_mul_jac;
+ group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields with
+ * field arithmetic implemented in Montgomery coordinates. */
+ECGroup *
+ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea,
+ const mp_int *curveb, const mp_int *genx,
+ const mp_int *geny, const mp_int *order, int cofactor)
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group = NULL;
+
+ group = ECGroup_new();
+ if (group == NULL)
+ return NULL;
+
+ group->meth = GFMethod_consGFp_mont(irr);
+ if (group->meth == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth));
+ MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth));
+ MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth));
+ MP_CHECKOK(mp_copy(order, &group->order));
+ group->cofactor = cofactor;
+ group->point_add = &ec_GFp_pt_add_aff;
+ group->point_sub = &ec_GFp_pt_sub_aff;
+ group->point_dbl = &ec_GFp_pt_dbl_aff;
+ group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+ group->base_point_mul = NULL;
+ group->points_mul = &ec_GFp_pts_mul_jac;
+ group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct an ECGroup. */
+ECGroup *
+construct_ecgroup(const ECCurveName name, mp_int irr, mp_int curvea,
+ mp_int curveb, mp_int genx, mp_int geny, mp_int order,
+ int cofactor, ECField field, const char *text)
+{
+ int bits;
+ ECGroup *group = NULL;
+ mp_err res = MP_OKAY;
+
+ /* determine number of bits */
+ bits = mpl_significant_bits(&irr) - 1;
+ if (bits < MP_OKAY) {
+ res = bits;
+ goto CLEANUP;
+ }
+
+ /* determine which optimizations (if any) to use */
+ if (field == ECField_GFp) {
+ switch (name) {
+ case ECCurve_SECG_PRIME_256R1:
+ group =
+ ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+ &order, cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(ec_group_set_gfp256(group, name));
+ MP_CHECKOK(ec_group_set_gfp256_32(group, name));
+ break;
+ case ECCurve_SECG_PRIME_384R1:
+ group =
+ ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+ &order, cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(ec_group_set_secp384r1(group, name));
+ break;
+ case ECCurve_SECG_PRIME_521R1:
+ group =
+ ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+ &order, cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(ec_group_set_gfp521(group, name));
+ MP_CHECKOK(ec_group_set_secp521r1(group, name));
+ break;
+ default:
+ /* use generic arithmetic */
+ group =
+ ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
+ &order, cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ }
+ } else {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ /* set name, if any */
+ if ((group != NULL) && (text != NULL)) {
+ group->text = strdup(text);
+ if (group->text == NULL) {
+ res = MP_MEM;
+ }
+ }
+
+CLEANUP:
+ if (group && res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct ECGroup from parameters and name, if any. */
+ECGroup *
+ecgroup_fromName(const ECCurveName name,
+ const ECCurveBytes *params)
+{
+ mp_int irr, curvea, curveb, genx, geny, order;
+ ECGroup *group = NULL;
+ mp_err res = MP_OKAY;
+
+ /* initialize values */
+ MP_DIGITS(&irr) = 0;
+ MP_DIGITS(&curvea) = 0;
+ MP_DIGITS(&curveb) = 0;
+ MP_DIGITS(&genx) = 0;
+ MP_DIGITS(&geny) = 0;
+ MP_DIGITS(&order) = 0;
+ MP_CHECKOK(mp_init(&irr));
+ MP_CHECKOK(mp_init(&curvea));
+ MP_CHECKOK(mp_init(&curveb));
+ MP_CHECKOK(mp_init(&genx));
+ MP_CHECKOK(mp_init(&geny));
+ MP_CHECKOK(mp_init(&order));
+ MP_CHECKOK(mp_read_unsigned_octets(&irr, params->irr, params->scalarSize));
+ MP_CHECKOK(mp_read_unsigned_octets(&curvea, params->curvea, params->scalarSize));
+ MP_CHECKOK(mp_read_unsigned_octets(&curveb, params->curveb, params->scalarSize));
+ MP_CHECKOK(mp_read_unsigned_octets(&genx, params->genx, params->scalarSize));
+ MP_CHECKOK(mp_read_unsigned_octets(&geny, params->geny, params->scalarSize));
+ MP_CHECKOK(mp_read_unsigned_octets(&order, params->order, params->scalarSize));
+
+ group = construct_ecgroup(name, irr, curvea, curveb, genx, geny, order,
+ params->cofactor, params->field, params->text);
+
+CLEANUP:
+ mp_clear(&irr);
+ mp_clear(&curvea);
+ mp_clear(&curveb);
+ mp_clear(&genx);
+ mp_clear(&geny);
+ mp_clear(&order);
+ if (group && res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct ECCurveBytes from an ECCurveName */
+const ECCurveBytes *
+ec_GetNamedCurveParams(const ECCurveName name)
+{
+ if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) ||
+ (ecCurve_map[name] == NULL)) {
+ return NULL;
+ } else {
+ return ecCurve_map[name];
+ }
+}
+
+/* Construct ECGroup from named parameters. */
+ECGroup *
+ECGroup_fromName(const ECCurveName name)
+{
+ const ECCurveBytes *params = NULL;
+
+ /* This doesn't work with Curve25519 but it's not necessary to. */
+ PORT_Assert(name != ECCurve25519);
+
+ params = ec_GetNamedCurveParams(name);
+ if (params == NULL) {
+ return NULL;
+ }
+
+ /* construct actual group */
+ return ecgroup_fromName(name, params);
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62. */
+mp_err
+ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py)
+{
+ /* 1: Verify that publicValue is not the point at infinity */
+ /* 2: Verify that the coordinates of publicValue are elements
+ * of the field.
+ */
+ /* 3: Verify that publicValue is on the curve. */
+ /* 4: Verify that the order of the curve times the publicValue
+ * is the point at infinity.
+ */
+ return group->validate_point(px, py, group);
+}
+
+/* Free the memory allocated (if any) to an ECGroup object. */
+void
+ECGroup_free(ECGroup *group)
+{
+ if (group == NULL)
+ return;
+ GFMethod_free(group->meth);
+ if (group->constructed == MP_NO)
+ return;
+ mp_clear(&group->curvea);
+ mp_clear(&group->curveb);
+ mp_clear(&group->genx);
+ mp_clear(&group->geny);
+ mp_clear(&group->order);
+ if (group->text != NULL)
+ free(group->text);
+ if (group->extra_free != NULL)
+ group->extra_free(group);
+ free(group);
+}
diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h
new file mode 100644
index 0000000000..3783f75782
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.h
@@ -0,0 +1,64 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Although this is not an exported header file, code which uses elliptic
+ * curve point operations will need to include it. */
+
+#ifndef __ecl_h_
+#define __ecl_h_
+
+#include "blapi.h"
+#include "ecl-exp.h"
+#include "mpi.h"
+#include "eclt.h"
+
+struct ECGroupStr;
+typedef struct ECGroupStr ECGroup;
+
+/* Construct ECGroup from named parameters. */
+ECGroup *ECGroup_fromName(const ECCurveName name);
+
+/* Free an allocated ECGroup. */
+void ECGroup_free(ECGroup *group);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x,
+ * y). If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+ const mp_int *py, mp_int *qx, mp_int *qy);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Input and output values are assumed to
+ * be NOT field-encoded. */
+mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1,
+ const mp_int *k2, const mp_int *px, const mp_int *py,
+ mp_int *qx, mp_int *qy);
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62.
+ * Returns MP_YES if the public key is valid, MP_NO if the public key
+ * is invalid, or an error code if the validation could not be
+ * performed. */
+mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py);
+
+SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P);
+SECStatus ec_Curve25519_pt_validate(const SECItem *px);
+SECStatus ec_Curve25519_scalar_validate(const SECItem *scalar);
+
+SECStatus ec_secp256r1_pt_mul(SECItem *X, SECItem *k, SECItem *P);
+SECStatus ec_secp256r1_pt_validate(const SECItem *px);
+SECStatus ec_secp256r1_scalar_validate(const SECItem *scalar);
+
+SECStatus ec_secp256r1_sign_digest(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *kb,
+ const unsigned int kblen);
+SECStatus ec_secp256r1_verify_digest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest);
+
+SECStatus ec_secp384r1_scalar_validate(const SECItem *scalar);
+
+SECStatus ec_secp521r1_scalar_validate(const SECItem *scalar);
+
+#endif /* __ecl_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c
new file mode 100644
index 0000000000..81b0077055
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_gf.c
@@ -0,0 +1,958 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mp_gf2m.h"
+#include "ecl-priv.h"
+#include "mpi-priv.h"
+#include <stdlib.h>
+
+/* Allocate memory for a new GFMethod object. */
+GFMethod *
+GFMethod_new()
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth;
+ meth = (GFMethod *)malloc(sizeof(GFMethod));
+ if (meth == NULL)
+ return NULL;
+ meth->constructed = MP_YES;
+ MP_DIGITS(&meth->irr) = 0;
+ meth->extra_free = NULL;
+ MP_CHECKOK(mp_init(&meth->irr));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp(const mp_int *irr)
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth = NULL;
+
+ meth = GFMethod_new();
+ if (meth == NULL)
+ return NULL;
+
+ MP_CHECKOK(mp_copy(irr, &meth->irr));
+ meth->irr_arr[0] = mpl_significant_bits(irr);
+ meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
+ meth->irr_arr[4] = 0;
+ switch (MP_USED(&meth->irr)) {
+ /* maybe we need 1 and 2 words here as well?*/
+ case 3:
+ meth->field_add = &ec_GFp_add_3;
+ meth->field_sub = &ec_GFp_sub_3;
+ break;
+ case 4:
+ meth->field_add = &ec_GFp_add_4;
+ meth->field_sub = &ec_GFp_sub_4;
+ break;
+ case 5:
+ meth->field_add = &ec_GFp_add_5;
+ meth->field_sub = &ec_GFp_sub_5;
+ break;
+ case 6:
+ meth->field_add = &ec_GFp_add_6;
+ meth->field_sub = &ec_GFp_sub_6;
+ break;
+ default:
+ meth->field_add = &ec_GFp_add;
+ meth->field_sub = &ec_GFp_sub;
+ }
+ meth->field_neg = &ec_GFp_neg;
+ meth->field_mod = &ec_GFp_mod;
+ meth->field_mul = &ec_GFp_mul;
+ meth->field_sqr = &ec_GFp_sqr;
+ meth->field_div = &ec_GFp_div;
+ meth->field_enc = NULL;
+ meth->field_dec = NULL;
+ meth->extra1 = NULL;
+ meth->extra2 = NULL;
+ meth->extra_free = NULL;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void
+GFMethod_free(GFMethod *meth)
+{
+ if (meth == NULL)
+ return;
+ if (meth->constructed == MP_NO)
+ return;
+ mp_clear(&meth->irr);
+ if (meth->extra_free != NULL)
+ meth->extra_free(meth);
+ free(meth);
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Add two field elements. Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
+ mp_err res;
+
+ if ((res = mp_add(a, b, r)) != MP_OKAY) {
+ return res;
+ }
+ if (mp_cmp(r, &meth->irr) >= 0) {
+ return mp_sub(r, &meth->irr, r);
+ }
+ return res;
+}
+
+/* Negates a field element. Assumes that 0 <= a < meth->irr */
+mp_err
+ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
+
+ if (mp_cmp_z(a) == 0) {
+ mp_zero(r);
+ return MP_OKAY;
+ }
+ return mp_sub(&meth->irr, a, r);
+}
+
+/* Subtracts two field elements. Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
+ res = mp_sub(a, b, r);
+ if (res == MP_RANGE) {
+ MP_CHECKOK(mp_sub(b, a, r));
+ if (mp_cmp_z(r) < 0) {
+ MP_CHECKOK(mp_add(r, &meth->irr, r));
+ }
+ MP_CHECKOK(ec_GFp_neg(r, r, meth));
+ }
+ if (mp_cmp_z(r) < 0) {
+ MP_CHECKOK(mp_add(r, &meth->irr, r));
+ }
+CLEANUP:
+ return res;
+}
+/*
+ * Inline adds for small curve lengths.
+ */
+/* 3 words */
+mp_err
+ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+#else
+ __asm__(
+ "xorq %3,%3 \n\t"
+ "addq %4,%0 \n\t"
+ "adcq %5,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
+ : "r"(a0), "r"(a1), "r"(a2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a2 = MP_DIGIT(&meth->irr, 2);
+ if (carry || r2 > a2 ||
+ ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+#else
+ __asm__(
+ "subq %3,%0 \n\t"
+ "sbbq %4,%1 \n\t"
+ "sbbq %5,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "r"(a0), "r"(a1), "r"(a2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+#else
+ __asm__(
+ "xorq %4,%4 \n\t"
+ "addq %5,%0 \n\t"
+ "adcq %6,%1 \n\t"
+ "adcq %7,%2 \n\t"
+ "adcq %8,%3 \n\t"
+ "adcq $0,%4 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
+ : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+
+ MP_CHECKOK(s_mp_pad(r, 4));
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a3 = MP_DIGIT(&meth->irr, 3);
+ if (carry || r3 > a3 ||
+ ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+#else
+ __asm__(
+ "subq %4,%0 \n\t"
+ "sbbq %5,%1 \n\t"
+ "sbbq %6,%2 \n\t"
+ "sbbq %7,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+ : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 5:
+ r4 = MP_DIGIT(b, 4);
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+ MP_ADD_CARRY(a4, r4, r4, carry);
+
+ MP_CHECKOK(s_mp_pad(r, 5));
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 5;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a4 = MP_DIGIT(&meth->irr, 4);
+ if (carry || r4 > a4 ||
+ ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a3 = MP_DIGIT(&meth->irr, 3);
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+ MP_SUB_BORROW(r4, a4, r4, carry);
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 6:
+ a5 = MP_DIGIT(a, 5);
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 6:
+ r5 = MP_DIGIT(b, 5);
+ case 5:
+ r4 = MP_DIGIT(b, 4);
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+ MP_ADD_CARRY(a4, r4, r4, carry);
+ MP_ADD_CARRY(a5, r5, r5, carry);
+
+ MP_CHECKOK(s_mp_pad(r, 6));
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 6;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a5 = MP_DIGIT(&meth->irr, 5);
+ if (carry || r5 > a5 ||
+ ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a4 = MP_DIGIT(&meth->irr, 4);
+ a3 = MP_DIGIT(&meth->irr, 3);
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+ MP_SUB_BORROW(r4, a4, r4, carry);
+ MP_SUB_BORROW(r5, a5, r5, carry);
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/*
+ * The following subraction functions do in-line subractions based
+ * on our curve size.
+ *
+ * ... 3 words
+ */
+mp_err
+ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+#else
+ __asm__(
+ "xorq %3,%3 \n\t"
+ "subq %4,%0 \n\t"
+ "sbbq %5,%1 \n\t"
+ "sbbq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
+ : "r"(b0), "r"(b1), "r"(b2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+#else
+ __asm__(
+ "addq %3,%0 \n\t"
+ "adcq %4,%1 \n\t"
+ "adcq %5,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "r"(b0), "r"(b1), "r"(b2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+ }
+
+#ifdef MPI_AMD64_ADD
+ /* compiler fakeout? */
+ if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+#endif
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+#else
+ __asm__(
+ "xorq %4,%4 \n\t"
+ "subq %5,%0 \n\t"
+ "sbbq %6,%1 \n\t"
+ "sbbq %7,%2 \n\t"
+ "sbbq %8,%3 \n\t"
+ "adcq $0,%4 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow)
+ : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+#else
+ __asm__(
+ "addq %4,%0 \n\t"
+ "adcq %5,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq %7,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+ : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+ }
+#ifdef MPI_AMD64_ADD
+ /* compiler fakeout? */
+ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+#endif
+ MP_CHECKOK(s_mp_pad(r, 4));
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 5:
+ r4 = MP_DIGIT(a, 4);
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 5:
+ b4 = MP_DIGIT(b, 4);
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+ MP_SUB_BORROW(r4, b4, r4, borrow);
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b4 = MP_DIGIT(&meth->irr, 4);
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+ MP_ADD_CARRY(b4, r4, r4, borrow);
+ }
+ MP_CHECKOK(s_mp_pad(r, 5));
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 5;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 6:
+ r5 = MP_DIGIT(a, 5);
+ case 5:
+ r4 = MP_DIGIT(a, 4);
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 6:
+ b5 = MP_DIGIT(b, 5);
+ case 5:
+ b4 = MP_DIGIT(b, 4);
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+ MP_SUB_BORROW(r4, b4, r4, borrow);
+ MP_SUB_BORROW(r5, b5, r5, borrow);
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b5 = MP_DIGIT(&meth->irr, 5);
+ b4 = MP_DIGIT(&meth->irr, 4);
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+ MP_ADD_CARRY(b4, r4, r4, borrow);
+ MP_ADD_CARRY(b5, r5, r5, borrow);
+ }
+
+ MP_CHECKOK(s_mp_pad(r, 6));
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 6;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* Reduces an integer to a field element. */
+mp_err
+ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_mod(a, &meth->irr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_mulmod(a, b, &meth->irr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_sqrmod(a, &meth->irr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ return mp_invmod(b, &meth->irr, r);
+ } else {
+ /* MPI doesn't support divmod, so we implement it using invmod and
+ * mulmod. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+ MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ }
+}
+
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+
+/* Adds two field elements. */
+mp_err
+ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_badd(a, b, r);
+}
+
+/* Negates a field element. Note that for binary polynomial fields, the
+ * negation of a field element is the field element itself. */
+mp_err
+ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ if (a == r) {
+ return MP_OKAY;
+ } else {
+ return mp_copy(a, r);
+ }
+}
+
+/* Reduces a binary polynomial to a field element. */
+mp_err
+ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_bmod(a, meth->irr_arr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_bmulmod(a, b, meth->irr_arr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_bsqrmod(a, meth->irr_arr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ /* The GF(2^m) portion of MPI doesn't support invmod, so we
+ * compute 1/b. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_set_int(&t, 1));
+ MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ } else {
+ return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
+ }
+}
diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c
new file mode 100644
index 0000000000..ffbcbf1d98
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_mult.c
@@ -0,0 +1,305 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include <stdlib.h>
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x,
+ * y). If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err
+ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry)
+{
+ mp_err res = MP_OKAY;
+ mp_int kt;
+
+ ARGCHK((k != NULL) && (group != NULL), MP_BADARG);
+ MP_DIGITS(&kt) = 0;
+
+ /* want scalar to be less than or equal to group order */
+ if (mp_cmp(k, &group->order) > 0) {
+ MP_CHECKOK(mp_init(&kt));
+ MP_CHECKOK(mp_mod(k, &group->order, &kt));
+ } else {
+ MP_SIGN(&kt) = MP_ZPOS;
+ MP_USED(&kt) = MP_USED(k);
+ MP_ALLOC(&kt) = MP_ALLOC(k);
+ MP_DIGITS(&kt) = MP_DIGITS(k);
+ }
+
+ if ((px == NULL) || (py == NULL)) {
+ if (group->base_point_mul) {
+ MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group));
+ } else {
+ MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry,
+ group));
+ }
+ } else {
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, rx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, ry, group->meth));
+ MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group));
+ } else {
+ MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group));
+ }
+ }
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ if (MP_DIGITS(&kt) != MP_DIGITS(k)) {
+ mp_clear(&kt);
+ }
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int sx, sy;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ MP_DIGITS(&sx) = 0;
+ MP_DIGITS(&sy) = 0;
+ MP_CHECKOK(mp_init(&sx));
+ MP_CHECKOK(mp_init(&sy));
+
+ MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy));
+ MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry));
+
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth));
+ MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth));
+ }
+
+ MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group));
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&sx);
+ mp_clear(&sy);
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. Uses
+ * algorithm 15 (simultaneous multiple point multiplication) from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST
+ * Elliptic Curves over Prime Fields. */
+mp_err
+ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[4][4][2];
+ const mp_int *a, *b;
+ unsigned int i, j;
+ int ai, bi, d;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ /* initialize precomputation table */
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_DIGITS(&precomp[i][j][0]) = 0;
+ MP_DIGITS(&precomp[i][j][1]) = 0;
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_CHECKOK(mp_init_size(&precomp[i][j][0],
+ ECL_MAX_FIELD_SIZE_DIGITS));
+ MP_CHECKOK(mp_init_size(&precomp[i][j][1],
+ ECL_MAX_FIELD_SIZE_DIGITS));
+ }
+ }
+
+ /* fill precomputation table */
+ /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+ if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+ a = k2;
+ b = k1;
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+ }
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+ } else {
+ a = k1;
+ b = k2;
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+ }
+ }
+ /* precompute [*][0][*] */
+ mp_zero(&precomp[0][0][0]);
+ mp_zero(&precomp[0][0][1]);
+ MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1], group));
+ MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1],
+ &precomp[3][0][0], &precomp[3][0][1], group));
+ /* precompute [*][1][*] */
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][1][0], &precomp[i][1][1], group));
+ }
+ /* precompute [*][2][*] */
+ MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][2][0], &precomp[i][2][1], group));
+ }
+ /* precompute [*][3][*] */
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1],
+ &precomp[0][3][0], &precomp[0][3][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][3][0], &precomp[i][3][1], group));
+ }
+
+ d = (mpl_significant_bits(a) + 1) / 2;
+
+ /* R = inf */
+ mp_zero(rx);
+ mp_zero(ry);
+
+ for (i = d; i-- > 0;) {
+ ai = MP_GET_BIT(a, 2 * i + 1);
+ ai <<= 1;
+ ai |= MP_GET_BIT(a, 2 * i);
+ bi = MP_GET_BIT(b, 2 * i + 1);
+ bi <<= 1;
+ bi |= MP_GET_BIT(b, 2 * i);
+ /* R = 2^2 * R */
+ MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+ MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+ /* R = R + (ai * A + bi * B) */
+ MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0],
+ &precomp[ai][bi][1], rx, ry, group));
+ }
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ mp_clear(&precomp[i][j][0]);
+ mp_clear(&precomp[i][j][1]);
+ }
+ }
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry)
+{
+ mp_err res = MP_OKAY;
+ mp_int k1t, k2t;
+ const mp_int *k1p, *k2p;
+
+ MP_DIGITS(&k1t) = 0;
+ MP_DIGITS(&k2t) = 0;
+
+ ARGCHK(group != NULL, MP_BADARG);
+
+ /* want scalar to be less than or equal to group order */
+ if (k1 != NULL) {
+ if (mp_cmp(k1, &group->order) >= 0) {
+ MP_CHECKOK(mp_init(&k1t));
+ MP_CHECKOK(mp_mod(k1, &group->order, &k1t));
+ k1p = &k1t;
+ } else {
+ k1p = k1;
+ }
+ } else {
+ k1p = k1;
+ }
+ if (k2 != NULL) {
+ if (mp_cmp(k2, &group->order) >= 0) {
+ MP_CHECKOK(mp_init(&k2t));
+ MP_CHECKOK(mp_mod(k2, &group->order, &k2t));
+ k2p = &k2t;
+ } else {
+ k2p = k2;
+ }
+ } else {
+ k2p = k2;
+ }
+
+ /* if points_mul is defined, then use it */
+ if (group->points_mul) {
+ res = group->points_mul(k1p, k2p, px, py, rx, ry, group);
+ } else {
+ res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group);
+ }
+
+CLEANUP:
+ mp_clear(&k1t);
+ mp_clear(&k2t);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/eclt.h b/security/nss/lib/freebl/ecl/eclt.h
new file mode 100644
index 0000000000..e763706f26
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/eclt.h
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This header holds ECC types and must not be exported publicly. */
+
+#ifndef __eclt_h_
+#define __eclt_h_
+
+/* byte encoding of curve parameters */
+struct ECCurveBytesStr {
+ char *text;
+ ECField field;
+ size_t size;
+ const PRUint8 *irr;
+ const PRUint8 *curvea;
+ const PRUint8 *curveb;
+ const PRUint8 *genx;
+ const PRUint8 *geny;
+ const PRUint8 *order;
+ const PRUint8 *base;
+ int cofactor;
+ int security;
+ size_t pointSize;
+ size_t scalarSize;
+ unsigned int usage;
+};
+typedef struct ECCurveBytesStr ECCurveBytes;
+
+#endif /* __ecl_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h
new file mode 100644
index 0000000000..7e54e4e072
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp.h
@@ -0,0 +1,106 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecp_h_
+#define __ecp_h_
+
+#include "ecl-priv.h"
+
+/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */
+mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py);
+
+/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */
+mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py);
+
+/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx,
+ * qy). Uses affine coordinates. */
+mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes R = P - Q. Uses affine coordinates. */
+mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes R = 2P. Uses affine coordinates. */
+mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Validates a point on a GFp curve. */
+mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Uses affine coordinates. */
+mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). */
+mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group);
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry). */
+mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py,
+ const mp_int *pz, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+
+/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py,
+ const mp_int *pz);
+
+/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz);
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, qz). Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py,
+ const mp_int *pz, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ mp_int *rz, const ECGroup *group);
+
+/* Computes R = 2P. Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py,
+ const mp_int *pz, mp_int *rx, mp_int *ry,
+ mp_int *rz, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+#endif
+
+/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator
+ * (base point) of the group of points on the elliptic curve. Allows k1 =
+ * NULL or { k2, P } = NULL. Implemented using mixed Jacobian-affine
+ * coordinates. Input and output values are assumed to be NOT
+ * field-encoded and are in affine form. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group);
+
+#endif /* __ecp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c
new file mode 100644
index 0000000000..a417068e1c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_25519.c
@@ -0,0 +1,145 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "ecl-priv.h"
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "secerr.h"
+#include "secport.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/*
+ * point validation is not necessary in general. But this checks a point (px)
+ * against some known bad values.
+ */
+SECStatus
+ec_Curve25519_pt_validate(const SECItem *px)
+{
+ PRUint8 *p;
+ PRUint64 i;
+ PRUint8 forbiddenValues[12][32] = {
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
+ { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
+ { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
+ { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
+ { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+
+ if (px->len == 32) {
+ p = px->data;
+ } else {
+ return SECFailure;
+ }
+
+ for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) {
+ if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) {
+ return SECFailure;
+ }
+ }
+
+ return SECSuccess;
+}
+
+/*
+ * scalar validation is not necessary.
+ */
+SECStatus
+ec_Curve25519_scalar_validate(const SECItem *scalar)
+{
+ if (!scalar || !scalar->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (scalar->len != 32) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/*
+ * Scalar multiplication for Curve25519.
+ * If P == NULL, the base point is used.
+ * Returns X = k*P
+ */
+SECStatus
+ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P)
+{
+ PRUint8 *px;
+ PRUint8 basePoint[32] = { 9 };
+
+ if (!P) {
+ px = basePoint;
+ } else {
+ PORT_Assert(P->len == 32);
+ if (P->len != 32) {
+ return SECFailure;
+ }
+ px = P->data;
+ }
+ if (k->len != 32) {
+ return SECFailure;
+ }
+
+ SECStatus rv = ec_Curve25519_mul(X->data, k->data, px);
+ if (NSS_SecureMemcmpZero(X->data, X->len) == 0) {
+ return SECFailure;
+ }
+ return rv;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c
new file mode 100644
index 0000000000..ad4e630c17
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256.c
@@ -0,0 +1,401 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r.
+ * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_size a_used = MP_USED(a);
+ int a_bits = mpl_significant_bits(a);
+ mp_digit carry;
+
+#ifdef ECL_THIRTY_TWO_BIT
+ mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0;
+ mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
+ int r8; /* must be a signed value ! */
+#else
+ mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0;
+ mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
+ mp_digit r0, r1, r2, r3;
+ int r4; /* must be a signed value ! */
+#endif
+ /* for polynomials larger than twice the field size
+ * use regular reduction */
+ if (a_bits < 256) {
+ if (a == r)
+ return MP_OKAY;
+ return mp_copy(a, r);
+ }
+ if (a_bits > 512) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+
+#ifdef ECL_THIRTY_TWO_BIT
+ switch (a_used) {
+ case 16:
+ a15 = MP_DIGIT(a, 15);
+ case 15:
+ a14 = MP_DIGIT(a, 14);
+ case 14:
+ a13 = MP_DIGIT(a, 13);
+ case 13:
+ a12 = MP_DIGIT(a, 12);
+ case 12:
+ a11 = MP_DIGIT(a, 11);
+ case 11:
+ a10 = MP_DIGIT(a, 10);
+ case 10:
+ a9 = MP_DIGIT(a, 9);
+ case 9:
+ a8 = MP_DIGIT(a, 8);
+ }
+
+ r0 = MP_DIGIT(a, 0);
+ r1 = MP_DIGIT(a, 1);
+ r2 = MP_DIGIT(a, 2);
+ r3 = MP_DIGIT(a, 3);
+ r4 = MP_DIGIT(a, 4);
+ r5 = MP_DIGIT(a, 5);
+ r6 = MP_DIGIT(a, 6);
+ r7 = MP_DIGIT(a, 7);
+
+ /* sum 1 */
+ carry = 0;
+ MP_ADD_CARRY(r3, a11, r3, carry);
+ MP_ADD_CARRY(r4, a12, r4, carry);
+ MP_ADD_CARRY(r5, a13, r5, carry);
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry);
+ r8 = carry;
+ carry = 0;
+ MP_ADD_CARRY(r3, a11, r3, carry);
+ MP_ADD_CARRY(r4, a12, r4, carry);
+ MP_ADD_CARRY(r5, a13, r5, carry);
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* sum 2 */
+ MP_ADD_CARRY(r3, a12, r3, carry);
+ MP_ADD_CARRY(r4, a13, r4, carry);
+ MP_ADD_CARRY(r5, a14, r5, carry);
+ MP_ADD_CARRY(r6, a15, r6, carry);
+ MP_ADD_CARRY(r7, 0, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* combine last bottom of sum 3 with second sum 2 */
+ MP_ADD_CARRY(r0, a8, r0, carry);
+ MP_ADD_CARRY(r1, a9, r1, carry);
+ MP_ADD_CARRY(r2, a10, r2, carry);
+ MP_ADD_CARRY(r3, a12, r3, carry);
+ MP_ADD_CARRY(r4, a13, r4, carry);
+ MP_ADD_CARRY(r5, a14, r5, carry);
+ MP_ADD_CARRY(r6, a15, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */
+ r8 += carry;
+ carry = 0;
+ /* sum 3 (rest of it)*/
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, 0, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* sum 4 (rest of it)*/
+ MP_ADD_CARRY(r0, a9, r0, carry);
+ MP_ADD_CARRY(r1, a10, r1, carry);
+ MP_ADD_CARRY(r2, a11, r2, carry);
+ MP_ADD_CARRY(r3, a13, r3, carry);
+ MP_ADD_CARRY(r4, a14, r4, carry);
+ MP_ADD_CARRY(r5, a15, r5, carry);
+ MP_ADD_CARRY(r6, a13, r6, carry);
+ MP_ADD_CARRY(r7, a8, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* diff 5 */
+ MP_SUB_BORROW(r0, a11, r0, carry);
+ MP_SUB_BORROW(r1, a12, r1, carry);
+ MP_SUB_BORROW(r2, a13, r2, carry);
+ MP_SUB_BORROW(r3, 0, r3, carry);
+ MP_SUB_BORROW(r4, 0, r4, carry);
+ MP_SUB_BORROW(r5, 0, r5, carry);
+ MP_SUB_BORROW(r6, a8, r6, carry);
+ MP_SUB_BORROW(r7, a10, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 6 */
+ MP_SUB_BORROW(r0, a12, r0, carry);
+ MP_SUB_BORROW(r1, a13, r1, carry);
+ MP_SUB_BORROW(r2, a14, r2, carry);
+ MP_SUB_BORROW(r3, a15, r3, carry);
+ MP_SUB_BORROW(r4, 0, r4, carry);
+ MP_SUB_BORROW(r5, 0, r5, carry);
+ MP_SUB_BORROW(r6, a9, r6, carry);
+ MP_SUB_BORROW(r7, a11, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 7 */
+ MP_SUB_BORROW(r0, a13, r0, carry);
+ MP_SUB_BORROW(r1, a14, r1, carry);
+ MP_SUB_BORROW(r2, a15, r2, carry);
+ MP_SUB_BORROW(r3, a8, r3, carry);
+ MP_SUB_BORROW(r4, a9, r4, carry);
+ MP_SUB_BORROW(r5, a10, r5, carry);
+ MP_SUB_BORROW(r6, 0, r6, carry);
+ MP_SUB_BORROW(r7, a12, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 8 */
+ MP_SUB_BORROW(r0, a14, r0, carry);
+ MP_SUB_BORROW(r1, a15, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, a9, r3, carry);
+ MP_SUB_BORROW(r4, a10, r4, carry);
+ MP_SUB_BORROW(r5, a11, r5, carry);
+ MP_SUB_BORROW(r6, 0, r6, carry);
+ MP_SUB_BORROW(r7, a13, r7, carry);
+ r8 -= carry;
+
+ /* reduce the overflows */
+ while (r8 > 0) {
+ mp_digit r8_d = r8;
+ carry = 0;
+ MP_ADD_CARRY(r0, r8_d, r0, carry);
+ MP_ADD_CARRY(r1, 0, r1, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry);
+ MP_ADD_CARRY(r3, 0 - r8_d, r3, carry);
+ MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry);
+ MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry);
+ MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry);
+ MP_ADD_CARRY(r7, (r8_d - 1), r7, carry);
+ r8 = carry;
+ }
+
+ /* reduce the underflows */
+ while (r8 < 0) {
+ mp_digit r8_d = -r8;
+ carry = 0;
+ MP_SUB_BORROW(r0, r8_d, r0, carry);
+ MP_SUB_BORROW(r1, 0, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, 0 - r8_d, r3, carry);
+ MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry);
+ MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry);
+ MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry);
+ MP_SUB_BORROW(r7, (r8_d - 1), r7, carry);
+ r8 = 0 - carry;
+ }
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, 8));
+ }
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 8;
+
+ MP_DIGIT(r, 7) = r7;
+ MP_DIGIT(r, 6) = r6;
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+
+ /* final reduction if necessary */
+ if ((r7 == MP_DIGIT_MAX) &&
+ ((r6 > 1) || ((r6 == 1) &&
+ (r5 || r4 || r3 ||
+ ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) {
+ MP_CHECKOK(mp_sub(r, &meth->irr, r));
+ }
+
+ s_mp_clamp(r);
+#else
+ switch (a_used) {
+ case 8:
+ a7 = MP_DIGIT(a, 7);
+ case 7:
+ a6 = MP_DIGIT(a, 6);
+ case 6:
+ a5 = MP_DIGIT(a, 5);
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ }
+ a7l = a7 << 32;
+ a7h = a7 >> 32;
+ a6l = a6 << 32;
+ a6h = a6 >> 32;
+ a5l = a5 << 32;
+ a5h = a5 >> 32;
+ a4l = a4 << 32;
+ a4h = a4 >> 32;
+ r3 = MP_DIGIT(a, 3);
+ r2 = MP_DIGIT(a, 2);
+ r1 = MP_DIGIT(a, 1);
+ r0 = MP_DIGIT(a, 0);
+
+ /* sum 1 */
+ carry = 0;
+ MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+ MP_ADD_CARRY(r2, a6, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 = carry;
+ carry = 0;
+ MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+ MP_ADD_CARRY(r2, a6, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 += carry;
+ /* sum 2 */
+ carry = 0;
+ MP_ADD_CARRY(r1, a6l, r1, carry);
+ MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+ MP_ADD_CARRY(r3, a7h, r3, carry);
+ r4 += carry;
+ carry = 0;
+ MP_ADD_CARRY(r1, a6l, r1, carry);
+ MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+ MP_ADD_CARRY(r3, a7h, r3, carry);
+ r4 += carry;
+
+ /* sum 3 */
+ carry = 0;
+ MP_ADD_CARRY(r0, a4, r0, carry);
+ MP_ADD_CARRY(r1, a5l >> 32, r1, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 += carry;
+ /* sum 4 */
+ carry = 0;
+ MP_ADD_CARRY(r0, a4h | a5l, r0, carry);
+ MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry);
+ MP_ADD_CARRY(r2, a7, r2, carry);
+ MP_ADD_CARRY(r3, a6h | a4l, r3, carry);
+ r4 += carry;
+ /* diff 5 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a5h | a6l, r0, carry);
+ MP_SUB_BORROW(r1, a6h, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry);
+ r4 -= carry;
+ /* diff 6 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a6, r0, carry);
+ MP_SUB_BORROW(r1, a7, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry);
+ r4 -= carry;
+ /* diff 7 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a6h | a7l, r0, carry);
+ MP_SUB_BORROW(r1, a7h | a4l, r1, carry);
+ MP_SUB_BORROW(r2, a4h | a5l, r2, carry);
+ MP_SUB_BORROW(r3, a6l, r3, carry);
+ r4 -= carry;
+ /* diff 8 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a7, r0, carry);
+ MP_SUB_BORROW(r1, a4h << 32, r1, carry);
+ MP_SUB_BORROW(r2, a5, r2, carry);
+ MP_SUB_BORROW(r3, a6h << 32, r3, carry);
+ r4 -= carry;
+
+ /* reduce the overflows */
+ while (r4 > 0) {
+ mp_digit r4_long = r4;
+ mp_digit r4l = (r4_long << 32);
+ carry = 0;
+ MP_ADD_CARRY(r0, r4_long, r0, carry);
+ MP_ADD_CARRY(r1, 0 - r4l, r1, carry);
+ MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry);
+ MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry);
+ r4 = carry;
+ }
+
+ /* reduce the underflows */
+ while (r4 < 0) {
+ mp_digit r4_long = -r4;
+ mp_digit r4l = (r4_long << 32);
+ carry = 0;
+ MP_SUB_BORROW(r0, r4_long, r0, carry);
+ MP_SUB_BORROW(r1, 0 - r4l, r1, carry);
+ MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry);
+ MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry);
+ r4 = 0 - carry;
+ }
+
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+
+ /* final reduction if necessary */
+ if ((r3 > 0xFFFFFFFF00000001ULL) ||
+ ((r3 == 0xFFFFFFFF00000001ULL) &&
+ (r2 || (r1 >> 32) ||
+ (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
+ /* very rare, just use mp_sub */
+ MP_CHECKOK(mp_sub(r, &meth->irr, r));
+ }
+
+ s_mp_clamp(r);
+#endif
+ }
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p256. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p256.
+ */
+static mp_err
+ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p256.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p256. */
+static mp_err
+ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp256(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P256) {
+ group->meth->field_mod = &ec_GFp_nistp256_mod;
+ group->meth->field_mul = &ec_GFp_nistp256_mul;
+ group->meth->field_sqr = &ec_GFp_nistp256_sqr;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c
new file mode 100644
index 0000000000..879396ad42
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256_32.c
@@ -0,0 +1,1535 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* A 32-bit implementation of the NIST P-256 elliptic curve. */
+
+#include <string.h>
+
+#include "prtypes.h"
+#include "mpi.h"
+#include "mpi-priv.h"
+#include "ecp.h"
+
+typedef PRUint8 u8;
+typedef PRUint32 u32;
+typedef PRUint64 u64;
+
+/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
+ * MPI library calls them digits, but here they are called limbs, which is
+ * GMP's terminology.
+ *
+ * The value of an felem (field element) is:
+ * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
+ *
+ * That is, each limb is alternately 29 or 28-bits wide in little-endian
+ * order.
+ *
+ * This means that an felem hits 2**257, rather than 2**256 as we would like. A
+ * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
+ * when multiplying as terms end up one bit short of a limb which would require
+ * much bit-shifting to correct.
+ *
+ * Finally, the values stored in an felem are in Montgomery form. So the value
+ * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
+ */
+typedef u32 limb;
+#define NLIMBS 9
+typedef limb felem[NLIMBS];
+
+static const limb kBottom28Bits = 0xfffffff;
+static const limb kBottom29Bits = 0x1fffffff;
+
+/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
+ * 28-bit words.
+ */
+static const felem kOne = {
+ 2, 0, 0, 0xffff800,
+ 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
+ 0
+};
+static const felem kZero = { 0 };
+static const felem kP = {
+ 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
+ 0, 0, 0x200000, 0xf000000,
+ 0xfffffff
+};
+static const felem k2P = {
+ 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
+ 0, 0, 0x400000, 0xe000000,
+ 0x1fffffff
+};
+
+/* kPrecomputed contains precomputed values to aid the calculation of scalar
+ * multiples of the base point, G. It's actually two, equal length, tables
+ * concatenated.
+ *
+ * The first table contains (x,y) felem pairs for 16 multiples of the base
+ * point, G.
+ *
+ * Index | Index (binary) | Value
+ * 0 | 0000 | 0G (all zeros, omitted)
+ * 1 | 0001 | G
+ * 2 | 0010 | 2**64G
+ * 3 | 0011 | 2**64G + G
+ * 4 | 0100 | 2**128G
+ * 5 | 0101 | 2**128G + G
+ * 6 | 0110 | 2**128G + 2**64G
+ * 7 | 0111 | 2**128G + 2**64G + G
+ * 8 | 1000 | 2**192G
+ * 9 | 1001 | 2**192G + G
+ * 10 | 1010 | 2**192G + 2**64G
+ * 11 | 1011 | 2**192G + 2**64G + G
+ * 12 | 1100 | 2**192G + 2**128G
+ * 13 | 1101 | 2**192G + 2**128G + G
+ * 14 | 1110 | 2**192G + 2**128G + 2**64G
+ * 15 | 1111 | 2**192G + 2**128G + 2**64G + G
+ *
+ * The second table follows the same style, but the terms are 2**32G,
+ * 2**96G, 2**160G, 2**224G.
+ *
+ * This is ~2KB of data.
+ */
+static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
+ 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee,
+ 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3,
+ 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c,
+ 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22,
+ 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050,
+ 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b,
+ 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa,
+ 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2,
+ 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609,
+ 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581,
+ 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca,
+ 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33,
+ 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6,
+ 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd,
+ 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
+ 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881,
+ 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a,
+ 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26,
+ 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b,
+ 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023,
+ 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133,
+ 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa,
+ 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
+ 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc,
+ 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8,
+ 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59,
+ 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39,
+ 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689,
+ 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa,
+ 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3,
+ 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1,
+ 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f,
+ 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
+ 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d,
+ 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b,
+ 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
+ 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a,
+ 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f,
+ 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb,
+ 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc,
+ 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9,
+ 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce,
+ 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2,
+ 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca,
+ 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229,
+ 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57,
+ 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c,
+ 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa,
+ 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651,
+ 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
+ 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7,
+ 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c,
+ 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927,
+ 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298,
+ 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8,
+ 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2,
+ 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d,
+ 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4,
+ 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8,
+ 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78
+};
+
+/* Field element operations:
+ */
+
+/* NON_ZERO_TO_ALL_ONES returns:
+ * 0xffffffff for 0 < x <= 2**31
+ * 0 for x == 0 or x > 2**31.
+ *
+ * x must be a u32 or an equivalent type such as limb.
+ */
+#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1)
+
+/* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
+ * which is a term at 2**257.
+ *
+ * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
+ * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
+ */
+static void
+felem_reduce_carry(felem inout, limb carry)
+{
+ const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
+
+ inout[0] += carry << 1;
+ inout[3] += 0x10000000 & carry_mask;
+ /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
+ * previous line therefore this doesn't underflow.
+ */
+ inout[3] -= carry << 11;
+ inout[4] += (0x20000000 - 1) & carry_mask;
+ inout[5] += (0x10000000 - 1) & carry_mask;
+ inout[6] += (0x20000000 - 1) & carry_mask;
+ inout[6] -= carry << 22;
+ /* This may underflow if carry is non-zero but, if so, we'll fix it in the
+ * next line.
+ */
+ inout[7] -= 1 & carry_mask;
+ inout[7] += carry << 25;
+}
+
+/* felem_sum sets out = in+in2.
+ *
+ * On entry, in[i]+in2[i] must not overflow a 32-bit word.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_sum(felem out, const felem in, const felem in2)
+{
+ limb carry = 0;
+ unsigned int i;
+ for (i = 0;; i++) {
+ out[i] = in[i] + in2[i];
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] = in[i] + in2[i];
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+#define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
+#define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
+#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
+#define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
+#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
+#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
+
+/* zero31 is 0 mod p.
+ */
+static const felem zero31 = {
+ two31m3, two30m2, two31m2, two30p13m2,
+ two31m2, two30m2, two31p24m2, two30m27m2,
+ two31m2
+};
+
+/* felem_diff sets out = in-in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_diff(felem out, const felem in, const felem in2)
+{
+ limb carry = 0;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ out[i] = in[i] - in2[i];
+ out[i] += zero31[i];
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] = in[i] - in2[i];
+ out[i] += zero31[i];
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
+ * with the same 29,28,... bit positions as an felem.
+ *
+ * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
+ * Since we just multiplied two Montgomery values together, the result is
+ * x*y*R*R mod p. We wish to divide by R in order for the result also to be
+ * in Montgomery form.
+ *
+ * On entry: tmp[i] < 2**64
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_reduce_degree(felem out, u64 tmp[17])
+{
+ /* The following table may be helpful when reading this code:
+ *
+ * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
+ * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
+ * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
+ * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
+ */
+ limb tmp2[18], carry, x, xMask;
+ unsigned int i;
+
+ /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
+ * felem. So the top of an element of tmp might overlap with another
+ * element two positions down. The following loop eliminates this
+ * overlap.
+ */
+ tmp2[0] = tmp[0] & kBottom29Bits;
+
+ /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
+ * and hint to the compiler that it can do a single-word shift by selecting
+ * the right register rather than doing a double-word shift and truncating
+ * afterwards.
+ */
+ tmp2[1] = ((limb)tmp[0]) >> 29;
+ tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits;
+ tmp2[1] += ((limb)tmp[1]) & kBottom28Bits;
+ carry = tmp2[1] >> 28;
+ tmp2[1] &= kBottom28Bits;
+
+ for (i = 2; i < 17; i++) {
+ tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+ tmp2[i] += ((limb)(tmp[i - 1])) >> 28;
+ tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
+ tmp2[i] += ((limb)tmp[i]) & kBottom29Bits;
+ tmp2[i] += carry;
+ carry = tmp2[i] >> 29;
+ tmp2[i] &= kBottom29Bits;
+
+ i++;
+ if (i == 17)
+ break;
+ tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+ tmp2[i] += ((limb)(tmp[i - 1])) >> 29;
+ tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
+ tmp2[i] += ((limb)tmp[i]) & kBottom28Bits;
+ tmp2[i] += carry;
+ carry = tmp2[i] >> 28;
+ tmp2[i] &= kBottom28Bits;
+ }
+
+ tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25;
+ tmp2[17] += ((limb)(tmp[16])) >> 29;
+ tmp2[17] += (((limb)(tmp[16] >> 32)) << 3);
+ tmp2[17] += carry;
+
+ /* Montgomery elimination of terms:
+ *
+ * Since R is 2**257, we can divide by R with a bitwise shift if we can
+ * ensure that the right-most 257 bits are all zero. We can make that true
+ * by adding multiplies of p without affecting the value.
+ *
+ * So we eliminate limbs from right to left. Since the bottom 29 bits of p
+ * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
+ * We can do that for 8 further limbs and then right shift to eliminate the
+ * extra factor of R.
+ */
+ for (i = 0;; i += 2) {
+ tmp2[i + 1] += tmp2[i] >> 29;
+ x = tmp2[i] & kBottom29Bits;
+ xMask = NON_ZERO_TO_ALL_ONES(x);
+ tmp2[i] = 0;
+
+ /* The bounds calculations for this loop are tricky. Each iteration of
+ * the loop eliminates two words by adding values to words to their
+ * right.
+ *
+ * The following table contains the amounts added to each word (as an
+ * offset from the value of i at the top of the loop). The amounts are
+ * accounted for from the first and second half of the loop separately
+ * and are written as, for example, 28 to mean a value <2**28.
+ *
+ * Word: 3 4 5 6 7 8 9 10
+ * Added in top half: 28 11 29 21 29 28
+ * 28 29
+ * 29
+ * Added in bottom half: 29 10 28 21 28 28
+ * 29
+ *
+ * The value that is currently offset 7 will be offset 5 for the next
+ * iteration and then offset 3 for the iteration after that. Therefore
+ * the total value added will be the values added at 7, 5 and 3.
+ *
+ * The following table accumulates these values. The sums at the bottom
+ * are written as, for example, 29+28, to mean a value < 2**29+2**28.
+ *
+ * Word: 3 4 5 6 7 8 9 10 11 12 13
+ * 28 11 10 29 21 29 28 28 28 28 28
+ * 29 28 11 28 29 28 29 28 29 28
+ * 29 28 21 21 29 21 29 21
+ * 10 29 28 21 28 21 28
+ * 28 29 28 29 28 29 28
+ * 11 10 29 10 29 10
+ * 29 28 11 28 11
+ * 29 29
+ * --------------------------------------------
+ * 30+ 31+ 30+ 31+ 30+
+ * 28+ 29+ 28+ 29+ 21+
+ * 21+ 28+ 21+ 28+ 10
+ * 10 21+ 10 21+
+ * 11 11
+ *
+ * So the greatest amount is added to tmp2[10] and tmp2[12]. If
+ * tmp2[10/12] has an initial value of <2**29, then the maximum value
+ * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
+ * as required.
+ */
+ tmp2[i + 3] += (x << 10) & kBottom28Bits;
+ tmp2[i + 4] += (x >> 18);
+
+ tmp2[i + 6] += (x << 21) & kBottom29Bits;
+ tmp2[i + 7] += x >> 8;
+
+ /* At position 200, which is the starting bit position for word 7, we
+ * have a factor of 0xf000000 = 2**28 - 2**24.
+ */
+ tmp2[i + 7] += 0x10000000 & xMask;
+ /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
+ tmp2[i + 8] += (x - 1) & xMask;
+ tmp2[i + 7] -= (x << 24) & kBottom28Bits;
+ tmp2[i + 8] -= x >> 4;
+
+ tmp2[i + 8] += 0x20000000 & xMask;
+ tmp2[i + 8] -= x;
+ tmp2[i + 8] += (x << 28) & kBottom29Bits;
+ tmp2[i + 9] += ((x >> 1) - 1) & xMask;
+
+ if (i + 1 == NLIMBS)
+ break;
+ tmp2[i + 2] += tmp2[i + 1] >> 28;
+ x = tmp2[i + 1] & kBottom28Bits;
+ xMask = NON_ZERO_TO_ALL_ONES(x);
+ tmp2[i + 1] = 0;
+
+ tmp2[i + 4] += (x << 11) & kBottom29Bits;
+ tmp2[i + 5] += (x >> 18);
+
+ tmp2[i + 7] += (x << 21) & kBottom28Bits;
+ tmp2[i + 8] += x >> 7;
+
+ /* At position 199, which is the starting bit of the 8th word when
+ * dealing with a context starting on an odd word, we have a factor of
+ * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
+ * word from i+1 is i+8.
+ */
+ tmp2[i + 8] += 0x20000000 & xMask;
+ tmp2[i + 9] += (x - 1) & xMask;
+ tmp2[i + 8] -= (x << 25) & kBottom29Bits;
+ tmp2[i + 9] -= x >> 4;
+
+ tmp2[i + 9] += 0x10000000 & xMask;
+ tmp2[i + 9] -= x;
+ tmp2[i + 10] += (x - 1) & xMask;
+ }
+
+ /* We merge the right shift with a carry chain. The words above 2**257 have
+ * widths of 28,29,... which we need to correct when copying them down.
+ */
+ carry = 0;
+ for (i = 0; i < 8; i++) {
+ /* The maximum value of tmp2[i + 9] occurs on the first iteration and
+ * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
+ * therefore safe.
+ */
+ out[i] = tmp2[i + 9];
+ out[i] += carry;
+ out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ out[i] = tmp2[i + 9] >> 1;
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ out[8] = tmp2[17];
+ out[8] += carry;
+ carry = out[8] >> 29;
+ out[8] &= kBottom29Bits;
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_square sets out=in*in.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_square(felem out, const felem in)
+{
+ u64 tmp[17];
+
+ tmp[0] = ((u64)in[0]) * in[0];
+ tmp[1] = ((u64)in[0]) * (in[1] << 1);
+ tmp[2] = ((u64)in[0]) * (in[2] << 1) +
+ ((u64)in[1]) * (in[1] << 1);
+ tmp[3] = ((u64)in[0]) * (in[3] << 1) +
+ ((u64)in[1]) * (in[2] << 1);
+ tmp[4] = ((u64)in[0]) * (in[4] << 1) +
+ ((u64)in[1]) * (in[3] << 2) +
+ ((u64)in[2]) * in[2];
+ tmp[5] = ((u64)in[0]) * (in[5] << 1) +
+ ((u64)in[1]) * (in[4] << 1) +
+ ((u64)in[2]) * (in[3] << 1);
+ tmp[6] = ((u64)in[0]) * (in[6] << 1) +
+ ((u64)in[1]) * (in[5] << 2) +
+ ((u64)in[2]) * (in[4] << 1) +
+ ((u64)in[3]) * (in[3] << 1);
+ tmp[7] = ((u64)in[0]) * (in[7] << 1) +
+ ((u64)in[1]) * (in[6] << 1) +
+ ((u64)in[2]) * (in[5] << 1) +
+ ((u64)in[3]) * (in[4] << 1);
+ /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
+ * which is < 2**64 as required.
+ */
+ tmp[8] = ((u64)in[0]) * (in[8] << 1) +
+ ((u64)in[1]) * (in[7] << 2) +
+ ((u64)in[2]) * (in[6] << 1) +
+ ((u64)in[3]) * (in[5] << 2) +
+ ((u64)in[4]) * in[4];
+ tmp[9] = ((u64)in[1]) * (in[8] << 1) +
+ ((u64)in[2]) * (in[7] << 1) +
+ ((u64)in[3]) * (in[6] << 1) +
+ ((u64)in[4]) * (in[5] << 1);
+ tmp[10] = ((u64)in[2]) * (in[8] << 1) +
+ ((u64)in[3]) * (in[7] << 2) +
+ ((u64)in[4]) * (in[6] << 1) +
+ ((u64)in[5]) * (in[5] << 1);
+ tmp[11] = ((u64)in[3]) * (in[8] << 1) +
+ ((u64)in[4]) * (in[7] << 1) +
+ ((u64)in[5]) * (in[6] << 1);
+ tmp[12] = ((u64)in[4]) * (in[8] << 1) +
+ ((u64)in[5]) * (in[7] << 2) +
+ ((u64)in[6]) * in[6];
+ tmp[13] = ((u64)in[5]) * (in[8] << 1) +
+ ((u64)in[6]) * (in[7] << 1);
+ tmp[14] = ((u64)in[6]) * (in[8] << 1) +
+ ((u64)in[7]) * (in[7] << 1);
+ tmp[15] = ((u64)in[7]) * (in[8] << 1);
+ tmp[16] = ((u64)in[8]) * in[8];
+
+ felem_reduce_degree(out, tmp);
+}
+
+/* felem_mul sets out=in*in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_mul(felem out, const felem in, const felem in2)
+{
+ u64 tmp[17];
+
+ tmp[0] = ((u64)in[0]) * in2[0];
+ tmp[1] = ((u64)in[0]) * (in2[1] << 0) +
+ ((u64)in[1]) * (in2[0] << 0);
+ tmp[2] = ((u64)in[0]) * (in2[2] << 0) +
+ ((u64)in[1]) * (in2[1] << 1) +
+ ((u64)in[2]) * (in2[0] << 0);
+ tmp[3] = ((u64)in[0]) * (in2[3] << 0) +
+ ((u64)in[1]) * (in2[2] << 0) +
+ ((u64)in[2]) * (in2[1] << 0) +
+ ((u64)in[3]) * (in2[0] << 0);
+ tmp[4] = ((u64)in[0]) * (in2[4] << 0) +
+ ((u64)in[1]) * (in2[3] << 1) +
+ ((u64)in[2]) * (in2[2] << 0) +
+ ((u64)in[3]) * (in2[1] << 1) +
+ ((u64)in[4]) * (in2[0] << 0);
+ tmp[5] = ((u64)in[0]) * (in2[5] << 0) +
+ ((u64)in[1]) * (in2[4] << 0) +
+ ((u64)in[2]) * (in2[3] << 0) +
+ ((u64)in[3]) * (in2[2] << 0) +
+ ((u64)in[4]) * (in2[1] << 0) +
+ ((u64)in[5]) * (in2[0] << 0);
+ tmp[6] = ((u64)in[0]) * (in2[6] << 0) +
+ ((u64)in[1]) * (in2[5] << 1) +
+ ((u64)in[2]) * (in2[4] << 0) +
+ ((u64)in[3]) * (in2[3] << 1) +
+ ((u64)in[4]) * (in2[2] << 0) +
+ ((u64)in[5]) * (in2[1] << 1) +
+ ((u64)in[6]) * (in2[0] << 0);
+ tmp[7] = ((u64)in[0]) * (in2[7] << 0) +
+ ((u64)in[1]) * (in2[6] << 0) +
+ ((u64)in[2]) * (in2[5] << 0) +
+ ((u64)in[3]) * (in2[4] << 0) +
+ ((u64)in[4]) * (in2[3] << 0) +
+ ((u64)in[5]) * (in2[2] << 0) +
+ ((u64)in[6]) * (in2[1] << 0) +
+ ((u64)in[7]) * (in2[0] << 0);
+ /* tmp[8] has the greatest value but doesn't overflow. See logic in
+ * felem_square.
+ */
+ tmp[8] = ((u64)in[0]) * (in2[8] << 0) +
+ ((u64)in[1]) * (in2[7] << 1) +
+ ((u64)in[2]) * (in2[6] << 0) +
+ ((u64)in[3]) * (in2[5] << 1) +
+ ((u64)in[4]) * (in2[4] << 0) +
+ ((u64)in[5]) * (in2[3] << 1) +
+ ((u64)in[6]) * (in2[2] << 0) +
+ ((u64)in[7]) * (in2[1] << 1) +
+ ((u64)in[8]) * (in2[0] << 0);
+ tmp[9] = ((u64)in[1]) * (in2[8] << 0) +
+ ((u64)in[2]) * (in2[7] << 0) +
+ ((u64)in[3]) * (in2[6] << 0) +
+ ((u64)in[4]) * (in2[5] << 0) +
+ ((u64)in[5]) * (in2[4] << 0) +
+ ((u64)in[6]) * (in2[3] << 0) +
+ ((u64)in[7]) * (in2[2] << 0) +
+ ((u64)in[8]) * (in2[1] << 0);
+ tmp[10] = ((u64)in[2]) * (in2[8] << 0) +
+ ((u64)in[3]) * (in2[7] << 1) +
+ ((u64)in[4]) * (in2[6] << 0) +
+ ((u64)in[5]) * (in2[5] << 1) +
+ ((u64)in[6]) * (in2[4] << 0) +
+ ((u64)in[7]) * (in2[3] << 1) +
+ ((u64)in[8]) * (in2[2] << 0);
+ tmp[11] = ((u64)in[3]) * (in2[8] << 0) +
+ ((u64)in[4]) * (in2[7] << 0) +
+ ((u64)in[5]) * (in2[6] << 0) +
+ ((u64)in[6]) * (in2[5] << 0) +
+ ((u64)in[7]) * (in2[4] << 0) +
+ ((u64)in[8]) * (in2[3] << 0);
+ tmp[12] = ((u64)in[4]) * (in2[8] << 0) +
+ ((u64)in[5]) * (in2[7] << 1) +
+ ((u64)in[6]) * (in2[6] << 0) +
+ ((u64)in[7]) * (in2[5] << 1) +
+ ((u64)in[8]) * (in2[4] << 0);
+ tmp[13] = ((u64)in[5]) * (in2[8] << 0) +
+ ((u64)in[6]) * (in2[7] << 0) +
+ ((u64)in[7]) * (in2[6] << 0) +
+ ((u64)in[8]) * (in2[5] << 0);
+ tmp[14] = ((u64)in[6]) * (in2[8] << 0) +
+ ((u64)in[7]) * (in2[7] << 1) +
+ ((u64)in[8]) * (in2[6] << 0);
+ tmp[15] = ((u64)in[7]) * (in2[8] << 0) +
+ ((u64)in[8]) * (in2[7] << 0);
+ tmp[16] = ((u64)in[8]) * (in2[8] << 0);
+
+ felem_reduce_degree(out, tmp);
+}
+
+static void
+felem_assign(felem out, const felem in)
+{
+ memcpy(out, in, sizeof(felem));
+}
+
+/* felem_inv calculates |out| = |in|^{-1}
+ *
+ * Based on Fermat's Little Theorem:
+ * a^p = a (mod p)
+ * a^{p-1} = 1 (mod p)
+ * a^{p-2} = a^{-1} (mod p)
+ */
+static void
+felem_inv(felem out, const felem in)
+{
+ felem ftmp, ftmp2;
+ /* each e_I will hold |in|^{2^I - 1} */
+ felem e2, e4, e8, e16, e32, e64;
+ unsigned int i;
+
+ felem_square(ftmp, in); /* 2^1 */
+ felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
+ felem_assign(e2, ftmp);
+ felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
+ felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
+ felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
+ felem_assign(e4, ftmp);
+ felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
+ felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
+ felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
+ felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
+ felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
+ felem_assign(e8, ftmp);
+ for (i = 0; i < 8; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^16 - 2^8 */
+ felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
+ felem_assign(e16, ftmp);
+ for (i = 0; i < 16; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^32 - 2^16 */
+ felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
+ felem_assign(e32, ftmp);
+ for (i = 0; i < 32; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^64 - 2^32 */
+ felem_assign(e64, ftmp);
+ felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
+ for (i = 0; i < 192; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^256 - 2^224 + 2^192 */
+
+ felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
+ for (i = 0; i < 16; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^80 - 2^16 */
+ felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
+ for (i = 0; i < 8; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^88 - 2^8 */
+ felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
+ for (i = 0; i < 4; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^92 - 2^4 */
+ felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
+ felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
+ felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
+ felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
+ felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
+ felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
+ felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
+
+ felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
+}
+
+/* felem_scalar_3 sets out=3*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_3(felem out)
+{
+ limb carry = 0;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ out[i] *= 3;
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] *= 3;
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_4 sets out=4*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_4(felem out)
+{
+ limb carry = 0, next_carry;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ next_carry = out[i] >> 27;
+ out[i] <<= 2;
+ out[i] &= kBottom29Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 29);
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ next_carry = out[i] >> 26;
+ out[i] <<= 2;
+ out[i] &= kBottom28Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 28);
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_8 sets out=8*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_8(felem out)
+{
+ limb carry = 0, next_carry;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ next_carry = out[i] >> 26;
+ out[i] <<= 3;
+ out[i] &= kBottom29Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 29);
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ next_carry = out[i] >> 25;
+ out[i] <<= 3;
+ out[i] &= kBottom28Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 28);
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
+ * time depending on the value of |in|.
+ */
+static char
+felem_is_zero_vartime(const felem in)
+{
+ limb carry;
+ int i;
+ limb tmp[NLIMBS];
+ felem_assign(tmp, in);
+
+ /* First, reduce tmp to a minimal form.
+ */
+ do {
+ carry = 0;
+ for (i = 0;; i++) {
+ tmp[i] += carry;
+ carry = tmp[i] >> 29;
+ tmp[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ tmp[i] += carry;
+ carry = tmp[i] >> 28;
+ tmp[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(tmp, carry);
+ } while (carry);
+
+ /* tmp < 2**257, so the only possible zero values are 0, p and 2p.
+ */
+ return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
+ memcmp(tmp, kP, sizeof(tmp)) == 0 ||
+ memcmp(tmp, k2P, sizeof(tmp)) == 0;
+}
+
+/* Group operations:
+ *
+ * Elements of the elliptic curve group are represented in Jacobian
+ * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
+ * Jacobian form.
+ */
+
+/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
+ */
+static void
+point_double(felem x_out, felem y_out, felem z_out,
+ const felem x, const felem y, const felem z)
+{
+ felem delta, gamma, alpha, beta, tmp, tmp2;
+
+ felem_square(delta, z);
+ felem_square(gamma, y);
+ felem_mul(beta, x, gamma);
+
+ felem_sum(tmp, x, delta);
+ felem_diff(tmp2, x, delta);
+ felem_mul(alpha, tmp, tmp2);
+ felem_scalar_3(alpha);
+
+ felem_sum(tmp, y, z);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, gamma);
+ felem_diff(z_out, tmp, delta);
+
+ felem_scalar_4(beta);
+ felem_square(x_out, alpha);
+ felem_diff(x_out, x_out, beta);
+ felem_diff(x_out, x_out, beta);
+
+ felem_diff(tmp, beta, x_out);
+ felem_mul(tmp, alpha, tmp);
+ felem_square(tmp2, gamma);
+ felem_scalar_8(tmp2);
+ felem_diff(y_out, tmp, tmp2);
+}
+
+/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
+ * (i.e. the second point is affine.)
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add_mixed(felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2)
+{
+ felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
+
+ felem_square(z1z1, z1);
+ felem_sum(tmp, z1, z1);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, x1);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, y1);
+ felem_sum(r, r, r);
+ felem_mul(v, x1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, y1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add(felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2, const felem z2)
+{
+ felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+
+ felem_square(z1z1, z1);
+ felem_square(z2z2, z2);
+ felem_mul(u1, x1, z2z2);
+
+ felem_sum(tmp, z1, z2);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, z1z1);
+ felem_diff(tmp, tmp, z2z2);
+
+ felem_mul(z2z2z2, z2, z2z2);
+ felem_mul(s1, y1, z2z2z2);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, u1);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, s1);
+ felem_sum(r, r, r);
+ felem_mul(v, u1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, s1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
+ * {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * This function handles the case where {x1,y1,z1}={x2,y2,z2}.
+ */
+static void
+point_add_or_double_vartime(
+ felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2, const felem z2)
+{
+ felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+ char x_equal, y_equal;
+
+ felem_square(z1z1, z1);
+ felem_square(z2z2, z2);
+ felem_mul(u1, x1, z2z2);
+
+ felem_sum(tmp, z1, z2);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, z1z1);
+ felem_diff(tmp, tmp, z2z2);
+
+ felem_mul(z2z2z2, z2, z2z2);
+ felem_mul(s1, y1, z2z2z2);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, u1);
+ x_equal = felem_is_zero_vartime(h);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, s1);
+ y_equal = felem_is_zero_vartime(r);
+ if (x_equal && y_equal) {
+ point_double(x_out, y_out, z_out, x1, y1, z1);
+ return;
+ }
+ felem_sum(r, r, r);
+ felem_mul(v, u1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, s1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* copy_conditional sets out=in if mask = 0xffffffff in constant time.
+ *
+ * On entry: mask is either 0 or 0xffffffff.
+ */
+static void
+copy_conditional(felem out, const felem in, limb mask)
+{
+ int i;
+
+ for (i = 0; i < NLIMBS; i++) {
+ const limb tmp = mask & (in[i] ^ out[i]);
+ out[i] ^= tmp;
+ }
+}
+
+/* select_affine_point sets {out_x,out_y} to the index'th entry of table.
+ * On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_affine_point(felem out_x, felem out_y,
+ const limb *table, limb index)
+{
+ limb i, j;
+
+ memset(out_x, 0, sizeof(felem));
+ memset(out_y, 0, sizeof(felem));
+
+ for (i = 1; i < 16; i++) {
+ limb mask = i ^ index;
+ mask |= mask >> 2;
+ mask |= mask >> 1;
+ mask &= 1;
+ mask--;
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_x[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_y[j] |= *table & mask;
+ }
+ }
+}
+
+/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
+ * table. On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_jacobian_point(felem out_x, felem out_y, felem out_z,
+ const limb *table, limb index)
+{
+ limb i, j;
+
+ memset(out_x, 0, sizeof(felem));
+ memset(out_y, 0, sizeof(felem));
+ memset(out_z, 0, sizeof(felem));
+
+ /* The implicit value at index 0 is all zero. We don't need to perform that
+ * iteration of the loop because we already set out_* to zero.
+ */
+ table += 3 * NLIMBS;
+
+ for (i = 1; i < 16; i++) {
+ limb mask = i ^ index;
+ mask |= mask >> 2;
+ mask |= mask >> 1;
+ mask &= 1;
+ mask--;
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_x[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_y[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_z[j] |= *table & mask;
+ }
+ }
+}
+
+/* get_bit returns the bit'th bit of scalar. */
+static char
+get_bit(const u8 scalar[32], int bit)
+{
+ return ((scalar[bit >> 3]) >> (bit & 7)) & 1;
+}
+
+/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
+ * number. Note that the value of scalar must be less than the order of the
+ * group.
+ */
+static void
+scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])
+{
+ int i, j;
+ limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
+ u32 table_offset;
+
+ felem px, py;
+ felem tx, ty, tz;
+
+ memset(nx, 0, sizeof(felem));
+ memset(ny, 0, sizeof(felem));
+ memset(nz, 0, sizeof(felem));
+
+ /* The loop adds bits at positions 0, 64, 128 and 192, followed by
+ * positions 32,96,160 and 224 and does this 32 times.
+ */
+ for (i = 0; i < 32; i++) {
+ if (i) {
+ point_double(nx, ny, nz, nx, ny, nz);
+ }
+ table_offset = 0;
+ for (j = 0; j <= 32; j += 32) {
+ char bit0 = get_bit(scalar, 31 - i + j);
+ char bit1 = get_bit(scalar, 95 - i + j);
+ char bit2 = get_bit(scalar, 159 - i + j);
+ char bit3 = get_bit(scalar, 223 - i + j);
+ limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
+
+ select_affine_point(px, py, kPrecomputed + table_offset, index);
+ table_offset += 30 * NLIMBS;
+
+ /* Since scalar is less than the order of the group, we know that
+ * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
+ * below.
+ */
+ point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
+ /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
+ * (a.k.a. the point at infinity). We handle that situation by
+ * copying the point from the table.
+ */
+ copy_conditional(nx, px, n_is_infinity_mask);
+ copy_conditional(ny, py, n_is_infinity_mask);
+ copy_conditional(nz, kOne, n_is_infinity_mask);
+
+ /* Equally, the result is also wrong if the point from the table is
+ * zero, which happens when the index is zero. We handle that by
+ * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.
+ */
+ p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+ mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+ copy_conditional(nx, tx, mask);
+ copy_conditional(ny, ty, mask);
+ copy_conditional(nz, tz, mask);
+ /* If p was not zero, then n is now non-zero. */
+ n_is_infinity_mask &= ~p_is_noninfinite_mask;
+ }
+ }
+}
+
+/* point_to_affine converts a Jacobian point to an affine point. If the input
+ * is the point at infinity then it returns (0, 0) in constant time.
+ */
+static void
+point_to_affine(felem x_out, felem y_out,
+ const felem nx, const felem ny, const felem nz)
+{
+ felem z_inv, z_inv_sq;
+ felem_inv(z_inv, nz);
+ felem_square(z_inv_sq, z_inv);
+ felem_mul(x_out, nx, z_inv_sq);
+ felem_mul(z_inv, z_inv, z_inv_sq);
+ felem_mul(y_out, ny, z_inv);
+}
+
+/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */
+static void
+scalar_mult(felem nx, felem ny, felem nz,
+ const felem x, const felem y, const u8 scalar[32])
+{
+ int i;
+ felem px, py, pz, tx, ty, tz;
+ felem precomp[16][3];
+ limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
+
+ /* We precompute 0,1,2,... times {x,y}. */
+ memset(precomp, 0, sizeof(felem) * 3);
+ memcpy(&precomp[1][0], x, sizeof(felem));
+ memcpy(&precomp[1][1], y, sizeof(felem));
+ memcpy(&precomp[1][2], kOne, sizeof(felem));
+
+ for (i = 2; i < 16; i += 2) {
+ point_double(precomp[i][0], precomp[i][1], precomp[i][2],
+ precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
+
+ point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
+ precomp[i][0], precomp[i][1], precomp[i][2], x, y);
+ }
+
+ memset(nx, 0, sizeof(felem));
+ memset(ny, 0, sizeof(felem));
+ memset(nz, 0, sizeof(felem));
+ n_is_infinity_mask = -1;
+
+ /* We add in a window of four bits each iteration and do this 64 times. */
+ for (i = 0; i < 64; i++) {
+ if (i) {
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ }
+
+ index = scalar[31 - i / 2];
+ if ((i & 1) == 1) {
+ index &= 15;
+ } else {
+ index >>= 4;
+ }
+
+ /* See the comments in scalar_base_mult about handling infinities. */
+ select_jacobian_point(px, py, pz, precomp[0][0], index);
+ point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
+ copy_conditional(nx, px, n_is_infinity_mask);
+ copy_conditional(ny, py, n_is_infinity_mask);
+ copy_conditional(nz, pz, n_is_infinity_mask);
+
+ p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+ mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+ copy_conditional(nx, tx, mask);
+ copy_conditional(ny, ty, mask);
+ copy_conditional(nz, tz, mask);
+ n_is_infinity_mask &= ~p_is_noninfinite_mask;
+ }
+}
+
+/* Interface with Freebl: */
+
+/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
+ * little-endian order.
+ */
+#ifdef IS_BIG_ENDIAN
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define BYTESWAP32(x) OSSwapInt32(x)
+#define BYTESWAP64(x) OSSwapInt64(x)
+#else
+#define BYTESWAP32(x) \
+ (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24))
+#define BYTESWAP64(x) \
+ (((x) >> 56) | (((x) >> 40) & 0xff00) | \
+ (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \
+ (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) | \
+ (((x)&0xff00) << 40) | ((x) << 56))
+#endif
+
+#ifdef MP_USE_UINT_DIGIT
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)
+#else
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)
+#endif
+#endif /* IS_BIG_ENDIAN */
+
+#ifdef MP_USE_UINT_DIGIT
+static const mp_digit kRInvDigits[8] = {
+ 0x80000000, 1, 0xffffffff, 0,
+ 0x80000001, 0xfffffffe, 1, 0x7fffffff
+};
+#else
+static const mp_digit kRInvDigits[4] = {
+ PR_UINT64(0x180000000), 0xffffffff,
+ PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
+};
+#endif
+#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit))
+static const mp_int kRInv = {
+ MP_ZPOS,
+ MP_DIGITS_IN_256_BITS,
+ MP_DIGITS_IN_256_BITS,
+ (mp_digit *)kRInvDigits
+};
+
+static const limb kTwo28 = 0x10000000;
+static const limb kTwo29 = 0x20000000;
+
+/* to_montgomery sets out = R*in. */
+static mp_err
+to_montgomery(felem out, const mp_int *in, const ECGroup *group)
+{
+ /* There are no MPI functions for bitshift operations and we wish to shift
+ * in 257 bits left so we move the digits 256-bits left and then multiply
+ * by two.
+ */
+ mp_int in_shifted;
+ int i;
+ mp_err res;
+
+ MP_CHECKOK(mp_init(&in_shifted));
+ MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS));
+ memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),
+ MP_DIGITS(in),
+ MP_USED(in) * sizeof(mp_digit));
+ MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted));
+ MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));
+
+ for (i = 0;; i++) {
+ out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;
+ MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL));
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;
+ MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL));
+ }
+
+CLEANUP:
+ mp_clear(&in_shifted);
+ return res;
+}
+
+/* from_montgomery sets out=in/R. */
+static mp_err
+from_montgomery(mp_int *out, const felem in,
+ const ECGroup *group)
+{
+ mp_int result, tmp;
+ mp_err res;
+ int i;
+
+ MP_CHECKOK(mp_init(&result));
+ MP_CHECKOK(mp_init(&tmp));
+
+ MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result));
+ for (i = NLIMBS - 2; i >= 0; i--) {
+ if ((i & 1) == 0) {
+ MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));
+ } else {
+ MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));
+ }
+ MP_CHECKOK(mp_add_d(&tmp, in[i], &result));
+ }
+
+ MP_CHECKOK(mp_mul(&result, &kRInv, out));
+ MP_CHECKOK(group->meth->field_mod(out, out, group->meth));
+
+CLEANUP:
+ mp_clear(&result);
+ mp_clear(&tmp);
+ return res;
+}
+
+/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
+static void
+scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
+{
+ /* We require that |n| is less than the order of the group and therefore it
+ * will fit into |out_scalar|. However, these is a timing side-channel here
+ * that we cannot avoid: if |n| is sufficiently small it may be one or more
+ * words too short and we'll copy less data.
+ */
+ memset(out_scalar, 0, 32);
+#ifdef IS_LITTLE_ENDIAN
+ memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
+#else
+ {
+ mp_size i;
+ mp_digit swapped[MP_DIGITS_IN_256_BITS];
+ for (i = 0; i < MP_USED(n); i++) {
+ swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
+ }
+ memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
+ }
+#endif
+}
+
+/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
+ * order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_base_point_mul(const mp_int *n,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar[32];
+ felem x, y, z, x_affine, y_affine;
+ mp_err res;
+
+ /* FIXME(agl): test that n < order. */
+
+ scalar_from_mp_int(scalar, n);
+ scalar_base_mult(x, y, z, scalar);
+ point_to_affine(x_affine, y_affine, x, y, z);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <
+ * the order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_point_mul(const mp_int *n,
+ const mp_int *in_x, const mp_int *in_y,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar[32];
+ felem x, y, z, x_affine, y_affine, px, py;
+ mp_err res;
+
+ scalar_from_mp_int(scalar, n);
+
+ MP_CHECKOK(to_montgomery(px, in_x, group));
+ MP_CHECKOK(to_montgomery(py, in_y, group));
+
+ scalar_mult(x, y, z, px, py, scalar);
+ point_to_affine(x_affine, y_affine, x, y, z);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +
+ * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.
+ *
+ * As indicated by the name, this function operates in variable time. This
+ * is safe because it's used for signature validation which doesn't deal
+ * with secrets.
+ */
+static mp_err
+ec_GFp_nistp256_points_mul_vartime(
+ const mp_int *n1, const mp_int *n2,
+ const mp_int *in_x, const mp_int *in_y,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar1[32], scalar2[32];
+ felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;
+ mp_err res = MP_OKAY;
+
+ /* If n2 == NULL, this is just a base-point multiplication. */
+ if (n2 == NULL) {
+ return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);
+ }
+
+ /* If n1 == nULL, this is just an arbitary-point multiplication. */
+ if (n1 == NULL) {
+ return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);
+ }
+
+ /* If both scalars are zero, then the result is the point at infinity. */
+ if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {
+ mp_zero(out_x);
+ mp_zero(out_y);
+ return res;
+ }
+
+ scalar_from_mp_int(scalar1, n1);
+ scalar_from_mp_int(scalar2, n2);
+
+ MP_CHECKOK(to_montgomery(px, in_x, group));
+ MP_CHECKOK(to_montgomery(py, in_y, group));
+ scalar_base_mult(x1, y1, z1, scalar1);
+ scalar_mult(x2, y2, z2, px, py, scalar2);
+
+ if (mp_cmp_z(n2) == 0) {
+ /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
+ * {x1,y1,z1}. */
+ } else if (mp_cmp_z(n1) == 0) {
+ /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
+ * {x2,y2,z2}. */
+ memcpy(x1, x2, sizeof(x2));
+ memcpy(y1, y2, sizeof(y2));
+ memcpy(z1, z2, sizeof(z2));
+ } else {
+ /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
+ point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
+ }
+
+ point_to_affine(x_affine, y_affine, x1, y1, z1);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast point multiplication for named curves. */
+mp_err
+ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P256) {
+ group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
+ group->point_mul = &ec_GFp_nistp256_point_mul;
+ group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c
new file mode 100644
index 0000000000..702fd976ed
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_384.c
@@ -0,0 +1,258 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r.
+ * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ int a_bits = mpl_significant_bits(a);
+ int i;
+
+ /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+ mp_int m[10];
+
+#ifdef ECL_THIRTY_TWO_BIT
+ mp_digit s[10][12];
+ for (i = 0; i < 10; i++) {
+ MP_SIGN(&m[i]) = MP_ZPOS;
+ MP_ALLOC(&m[i]) = 12;
+ MP_USED(&m[i]) = 12;
+ MP_DIGITS(&m[i]) = s[i];
+ }
+#else
+ mp_digit s[10][6];
+ for (i = 0; i < 10; i++) {
+ MP_SIGN(&m[i]) = MP_ZPOS;
+ MP_ALLOC(&m[i]) = 6;
+ MP_USED(&m[i]) = 6;
+ MP_DIGITS(&m[i]) = s[i];
+ }
+#endif
+
+#ifdef ECL_THIRTY_TWO_BIT
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if ((a_bits > 768) || (a_bits <= 736)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+ for (i = 0; i < 12; i++) {
+ s[0][i] = MP_DIGIT(a, i);
+ }
+ s[1][0] = 0;
+ s[1][1] = 0;
+ s[1][2] = 0;
+ s[1][3] = 0;
+ s[1][4] = MP_DIGIT(a, 21);
+ s[1][5] = MP_DIGIT(a, 22);
+ s[1][6] = MP_DIGIT(a, 23);
+ s[1][7] = 0;
+ s[1][8] = 0;
+ s[1][9] = 0;
+ s[1][10] = 0;
+ s[1][11] = 0;
+ for (i = 0; i < 12; i++) {
+ s[2][i] = MP_DIGIT(a, i + 12);
+ }
+ s[3][0] = MP_DIGIT(a, 21);
+ s[3][1] = MP_DIGIT(a, 22);
+ s[3][2] = MP_DIGIT(a, 23);
+ for (i = 3; i < 12; i++) {
+ s[3][i] = MP_DIGIT(a, i + 9);
+ }
+ s[4][0] = 0;
+ s[4][1] = MP_DIGIT(a, 23);
+ s[4][2] = 0;
+ s[4][3] = MP_DIGIT(a, 20);
+ for (i = 4; i < 12; i++) {
+ s[4][i] = MP_DIGIT(a, i + 8);
+ }
+ s[5][0] = 0;
+ s[5][1] = 0;
+ s[5][2] = 0;
+ s[5][3] = 0;
+ s[5][4] = MP_DIGIT(a, 20);
+ s[5][5] = MP_DIGIT(a, 21);
+ s[5][6] = MP_DIGIT(a, 22);
+ s[5][7] = MP_DIGIT(a, 23);
+ s[5][8] = 0;
+ s[5][9] = 0;
+ s[5][10] = 0;
+ s[5][11] = 0;
+ s[6][0] = MP_DIGIT(a, 20);
+ s[6][1] = 0;
+ s[6][2] = 0;
+ s[6][3] = MP_DIGIT(a, 21);
+ s[6][4] = MP_DIGIT(a, 22);
+ s[6][5] = MP_DIGIT(a, 23);
+ s[6][6] = 0;
+ s[6][7] = 0;
+ s[6][8] = 0;
+ s[6][9] = 0;
+ s[6][10] = 0;
+ s[6][11] = 0;
+ s[7][0] = MP_DIGIT(a, 23);
+ for (i = 1; i < 12; i++) {
+ s[7][i] = MP_DIGIT(a, i + 11);
+ }
+ s[8][0] = 0;
+ s[8][1] = MP_DIGIT(a, 20);
+ s[8][2] = MP_DIGIT(a, 21);
+ s[8][3] = MP_DIGIT(a, 22);
+ s[8][4] = MP_DIGIT(a, 23);
+ s[8][5] = 0;
+ s[8][6] = 0;
+ s[8][7] = 0;
+ s[8][8] = 0;
+ s[8][9] = 0;
+ s[8][10] = 0;
+ s[8][11] = 0;
+ s[9][0] = 0;
+ s[9][1] = 0;
+ s[9][2] = 0;
+ s[9][3] = MP_DIGIT(a, 23);
+ s[9][4] = MP_DIGIT(a, 23);
+ s[9][5] = 0;
+ s[9][6] = 0;
+ s[9][7] = 0;
+ s[9][8] = 0;
+ s[9][9] = 0;
+ s[9][10] = 0;
+ s[9][11] = 0;
+
+ MP_CHECKOK(mp_add(&m[0], &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[2], r));
+ MP_CHECKOK(mp_add(r, &m[3], r));
+ MP_CHECKOK(mp_add(r, &m[4], r));
+ MP_CHECKOK(mp_add(r, &m[5], r));
+ MP_CHECKOK(mp_add(r, &m[6], r));
+ MP_CHECKOK(mp_sub(r, &m[7], r));
+ MP_CHECKOK(mp_sub(r, &m[8], r));
+ MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+ s_mp_clamp(r);
+ }
+#else
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if ((a_bits > 768) || (a_bits <= 736)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+ for (i = 0; i < 6; i++) {
+ s[0][i] = MP_DIGIT(a, i);
+ }
+ s[1][0] = 0;
+ s[1][1] = 0;
+ s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[1][3] = MP_DIGIT(a, 11) >> 32;
+ s[1][4] = 0;
+ s[1][5] = 0;
+ for (i = 0; i < 6; i++) {
+ s[2][i] = MP_DIGIT(a, i + 6);
+ }
+ s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+ for (i = 2; i < 6; i++) {
+ s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32);
+ }
+ s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32;
+ s[4][1] = MP_DIGIT(a, 10) << 32;
+ for (i = 2; i < 6; i++) {
+ s[4][i] = MP_DIGIT(a, i + 4);
+ }
+ s[5][0] = 0;
+ s[5][1] = 0;
+ s[5][2] = MP_DIGIT(a, 10);
+ s[5][3] = MP_DIGIT(a, 11);
+ s[5][4] = 0;
+ s[5][5] = 0;
+ s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32;
+ s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32;
+ s[6][2] = MP_DIGIT(a, 11);
+ s[6][3] = 0;
+ s[6][4] = 0;
+ s[6][5] = 0;
+ s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+ for (i = 1; i < 6; i++) {
+ s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32);
+ }
+ s[8][0] = MP_DIGIT(a, 10) << 32;
+ s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[8][2] = MP_DIGIT(a, 11) >> 32;
+ s[8][3] = 0;
+ s[8][4] = 0;
+ s[8][5] = 0;
+ s[9][0] = 0;
+ s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32;
+ s[9][2] = MP_DIGIT(a, 11) >> 32;
+ s[9][3] = 0;
+ s[9][4] = 0;
+ s[9][5] = 0;
+
+ MP_CHECKOK(mp_add(&m[0], &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[2], r));
+ MP_CHECKOK(mp_add(r, &m[3], r));
+ MP_CHECKOK(mp_add(r, &m[4], r));
+ MP_CHECKOK(mp_add(r, &m[5], r));
+ MP_CHECKOK(mp_add(r, &m[6], r));
+ MP_CHECKOK(mp_sub(r, &m[7], r));
+ MP_CHECKOK(mp_sub(r, &m[8], r));
+ MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+ s_mp_clamp(r);
+ }
+#endif
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p384. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p384.
+ */
+static mp_err
+ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p384.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p384. */
+static mp_err
+ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp384(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P384) {
+ group->meth->field_mod = &ec_GFp_nistp384_mod;
+ group->meth->field_mul = &ec_GFp_nistp384_mul;
+ group->meth->field_sqr = &ec_GFp_nistp384_sqr;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c
new file mode 100644
index 0000000000..6ca0dbb11f
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_521.c
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+#define ECP521_DIGITS ECL_CURVE_DIGITS(521)
+
+/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses
+ * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ int a_bits = mpl_significant_bits(a);
+ unsigned int i;
+
+ /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+ mp_int m1;
+
+ mp_digit s1[ECP521_DIGITS] = { 0 };
+
+ MP_SIGN(&m1) = MP_ZPOS;
+ MP_ALLOC(&m1) = ECP521_DIGITS;
+ MP_USED(&m1) = ECP521_DIGITS;
+ MP_DIGITS(&m1) = s1;
+
+ if (a_bits < 521) {
+ if (a == r)
+ return MP_OKAY;
+ return mp_copy(a, r);
+ }
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if (a_bits > (521 * 2)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+#define FIRST_DIGIT (ECP521_DIGITS - 1)
+ for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) {
+ s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9));
+ }
+ s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9;
+
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS));
+ for (i = 0; i < ECP521_DIGITS; i++) {
+ MP_DIGIT(r, i) = MP_DIGIT(a, i);
+ }
+ }
+ MP_USED(r) = ECP521_DIGITS;
+ MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+
+ MP_CHECKOK(s_mp_add(r, &m1));
+ if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) {
+ MP_CHECKOK(s_mp_add_d(r, 1));
+ MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+ } else if (s_mp_cmp(r, &meth->irr) == 0) {
+ mp_zero(r);
+ }
+ s_mp_clamp(r);
+ }
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p521. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p521.
+ */
+static mp_err
+ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p521.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p521. */
+static mp_err
+ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+static mp_err
+ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ return mp_invmod(b, &meth->irr, r);
+ } else {
+ /* MPI doesn't support divmod, so we implement it using invmod and
+ * mulmod. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+ MP_CHECKOK(mp_mul(a, &t, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ }
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp521(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P521) {
+ group->meth->field_mod = &ec_GFp_nistp521_mod;
+ group->meth->field_mul = &ec_GFp_nistp521_mul;
+ group->meth->field_sqr = &ec_GFp_nistp521_sqr;
+ group->meth->field_div = &ec_GFp_nistp521_div;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c
new file mode 100644
index 0000000000..2f8802e8d0
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_aff.c
@@ -0,0 +1,308 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */
+mp_err
+ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py)
+{
+
+ if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) {
+ return MP_YES;
+ } else {
+ return MP_NO;
+ }
+}
+
+/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */
+mp_err
+ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py)
+{
+ mp_zero(px);
+ mp_zero(py);
+ return MP_OKAY;
+}
+
+/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P,
+ * Q, and R can all be identical. Uses affine coordinates. Assumes input
+ * is already field-encoded using field_enc, and returns output that is
+ * still field-encoded. */
+mp_err
+ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int lambda, temp, tempx, tempy;
+
+ MP_DIGITS(&lambda) = 0;
+ MP_DIGITS(&temp) = 0;
+ MP_DIGITS(&tempx) = 0;
+ MP_DIGITS(&tempy) = 0;
+ MP_CHECKOK(mp_init(&lambda));
+ MP_CHECKOK(mp_init(&temp));
+ MP_CHECKOK(mp_init(&tempx));
+ MP_CHECKOK(mp_init(&tempy));
+ /* if P = inf, then R = Q */
+ if (ec_GFp_pt_is_inf_aff(px, py) == 0) {
+ MP_CHECKOK(mp_copy(qx, rx));
+ MP_CHECKOK(mp_copy(qy, ry));
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* if Q = inf, then R = P */
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* if px != qx, then lambda = (py-qy) / (px-qx) */
+ if (mp_cmp(px, qx) != 0) {
+ MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth));
+ } else {
+ /* if py != qy or qy = 0, then R = inf */
+ if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) {
+ mp_zero(rx);
+ mp_zero(ry);
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* lambda = (3qx^2+a) / (2qy) */
+ MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth));
+ MP_CHECKOK(mp_set_int(&temp, 3));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+ }
+ MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth));
+ MP_CHECKOK(mp_set_int(&temp, 2));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+ }
+ MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth));
+ }
+ /* rx = lambda^2 - px - qx */
+ MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth));
+ /* ry = (x1-x2) * lambda - y1 */
+ MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth));
+ MP_CHECKOK(mp_copy(&tempx, rx));
+ MP_CHECKOK(mp_copy(&tempy, ry));
+
+CLEANUP:
+ mp_clear(&lambda);
+ mp_clear(&temp);
+ mp_clear(&tempx);
+ mp_clear(&tempy);
+ return res;
+}
+
+/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be
+ * identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int nqy;
+
+ MP_DIGITS(&nqy) = 0;
+ MP_CHECKOK(mp_init(&nqy));
+ /* nqy = -qy */
+ MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth));
+ res = group->point_add(px, py, qx, &nqy, rx, ry, group);
+CLEANUP:
+ mp_clear(&nqy);
+ return res;
+}
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * affine coordinates. Assumes input is already field-encoded using
+ * field_enc, and returns output that is still field-encoded. */
+mp_err
+ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group)
+{
+ return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group);
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and
+ * R can be identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int k, k3, qx, qy, sx, sy;
+ int b1, b3, i, l;
+
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&k3) = 0;
+ MP_DIGITS(&qx) = 0;
+ MP_DIGITS(&qy) = 0;
+ MP_DIGITS(&sx) = 0;
+ MP_DIGITS(&sy) = 0;
+ MP_CHECKOK(mp_init(&k));
+ MP_CHECKOK(mp_init(&k3));
+ MP_CHECKOK(mp_init(&qx));
+ MP_CHECKOK(mp_init(&qy));
+ MP_CHECKOK(mp_init(&sx));
+ MP_CHECKOK(mp_init(&sy));
+
+ /* if n = 0 then r = inf */
+ if (mp_cmp_z(n) == 0) {
+ mp_zero(rx);
+ mp_zero(ry);
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* Q = P, k = n */
+ MP_CHECKOK(mp_copy(px, &qx));
+ MP_CHECKOK(mp_copy(py, &qy));
+ MP_CHECKOK(mp_copy(n, &k));
+ /* if n < 0 then Q = -Q, k = -k */
+ if (mp_cmp_z(n) < 0) {
+ MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth));
+ MP_CHECKOK(mp_neg(&k, &k));
+ }
+#ifdef ECL_DEBUG /* basic double and add method */
+ l = mpl_significant_bits(&k) - 1;
+ MP_CHECKOK(mp_copy(&qx, &sx));
+ MP_CHECKOK(mp_copy(&qy, &sy));
+ for (i = l - 1; i >= 0; i--) {
+ /* S = 2S */
+ MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+ /* if k_i = 1, then S = S + Q */
+ if (mpl_get_bit(&k, i) != 0) {
+ MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ }
+ }
+#else /* double and add/subtract method from \
+ * standard */
+ /* k3 = 3 * k */
+ MP_CHECKOK(mp_set_int(&k3, 3));
+ MP_CHECKOK(mp_mul(&k, &k3, &k3));
+ /* S = Q */
+ MP_CHECKOK(mp_copy(&qx, &sx));
+ MP_CHECKOK(mp_copy(&qy, &sy));
+ /* l = index of high order bit in binary representation of 3*k */
+ l = mpl_significant_bits(&k3) - 1;
+ /* for i = l-1 downto 1 */
+ for (i = l - 1; i >= 1; i--) {
+ /* S = 2S */
+ MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+ b3 = MP_GET_BIT(&k3, i);
+ b1 = MP_GET_BIT(&k, i);
+ /* if k3_i = 1 and k_i = 0, then S = S + Q */
+ if ((b3 == 1) && (b1 == 0)) {
+ MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ /* if k3_i = 0 and k_i = 1, then S = S - Q */
+ } else if ((b3 == 0) && (b1 == 1)) {
+ MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ }
+ }
+#endif
+ /* output S */
+ MP_CHECKOK(mp_copy(&sx, rx));
+ MP_CHECKOK(mp_copy(&sy, ry));
+
+CLEANUP:
+ mp_clear(&k);
+ mp_clear(&k3);
+ mp_clear(&qx);
+ mp_clear(&qy);
+ mp_clear(&sx);
+ mp_clear(&sy);
+ return res;
+}
+#endif
+
+/* Validates a point on a GFp curve. */
+mp_err
+ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group)
+{
+ mp_err res = MP_NO;
+ mp_int accl, accr, tmp, pxt, pyt;
+
+ MP_DIGITS(&accl) = 0;
+ MP_DIGITS(&accr) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&pxt) = 0;
+ MP_DIGITS(&pyt) = 0;
+ MP_CHECKOK(mp_init(&accl));
+ MP_CHECKOK(mp_init(&accr));
+ MP_CHECKOK(mp_init(&tmp));
+ MP_CHECKOK(mp_init(&pxt));
+ MP_CHECKOK(mp_init(&pyt));
+
+ /* 1: Verify that publicValue is not the point at infinity */
+ if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 2: Verify that the coordinates of publicValue are elements
+ * of the field.
+ */
+ if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) ||
+ (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 3: Verify that publicValue is on the curve. */
+ if (group->meth->field_enc) {
+ group->meth->field_enc(px, &pxt, group->meth);
+ group->meth->field_enc(py, &pyt, group->meth);
+ } else {
+ MP_CHECKOK(mp_copy(px, &pxt));
+ MP_CHECKOK(mp_copy(py, &pyt));
+ }
+ /* left-hand side: y^2 */
+ MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth));
+ /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */
+ MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth));
+ MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth));
+ MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth));
+ /* check LHS - RHS == 0 */
+ MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth));
+ if (mp_cmp_z(&accr) != 0) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 4: Verify that the order of the curve times the publicValue
+ * is the point at infinity.
+ */
+ MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt));
+ if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ res = MP_YES;
+
+CLEANUP:
+ mp_clear(&accl);
+ mp_clear(&accr);
+ mp_clear(&tmp);
+ mp_clear(&pxt);
+ mp_clear(&pyt);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c
new file mode 100644
index 0000000000..535e75903f
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jac.c
@@ -0,0 +1,513 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#ifdef ECL_DEBUG
+#include <assert.h>
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+
+ if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ } else {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_set_int(rz, 1));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth));
+ }
+ }
+CLEANUP:
+ return res;
+}
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry). P and R can share x and y coordinates.
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded. */
+mp_err
+ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int z1, z2, z3;
+
+ MP_DIGITS(&z1) = 0;
+ MP_DIGITS(&z2) = 0;
+ MP_DIGITS(&z3) = 0;
+ MP_CHECKOK(mp_init(&z1));
+ MP_CHECKOK(mp_init(&z2));
+ MP_CHECKOK(mp_init(&z3));
+
+ /* if point at infinity, then set point at infinity and exit */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry));
+ goto CLEANUP;
+ }
+
+ /* transform (px, py, pz) into (px / pz^2, py / pz^3) */
+ if (mp_cmp_d(pz, 1) == 0) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ } else {
+ MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth));
+ MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth));
+ MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&z1);
+ mp_clear(&z2);
+ mp_clear(&z3);
+ return res;
+}
+
+/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz)
+{
+ return mp_cmp_z(pz);
+}
+
+/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz)
+{
+ mp_zero(pz);
+ return MP_OKAY;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Jacobian-affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and
+ * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime
+ * Fields. */
+mp_err
+ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int A, B, C, D, C2, C3;
+
+ MP_DIGITS(&A) = 0;
+ MP_DIGITS(&B) = 0;
+ MP_DIGITS(&C) = 0;
+ MP_DIGITS(&D) = 0;
+ MP_DIGITS(&C2) = 0;
+ MP_DIGITS(&C3) = 0;
+ MP_CHECKOK(mp_init(&A));
+ MP_CHECKOK(mp_init(&B));
+ MP_CHECKOK(mp_init(&C));
+ MP_CHECKOK(mp_init(&D));
+ MP_CHECKOK(mp_init(&C2));
+ MP_CHECKOK(mp_init(&C3));
+
+ /* If either P or Q is the point at infinity, then return the other
+ * point */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+ goto CLEANUP;
+ }
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_copy(pz, rz));
+ goto CLEANUP;
+ }
+
+ /* A = qx * pz^2, B = qy * pz^3 */
+ MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth));
+
+ /* C = A - px, D = B - py */
+ MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth));
+
+ if (mp_cmp_z(&C) == 0) {
+ /* P == Q or P == -Q */
+ if (mp_cmp_z(&D) == 0) {
+ /* P == Q */
+ /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */
+ MP_DIGIT(&D, 0) = 1; /* Set D to 1. */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group));
+ } else {
+ /* P == -Q */
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ }
+ goto CLEANUP;
+ }
+
+ /* C2 = C^2, C3 = C^3 */
+ MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth));
+
+ /* rz = pz * C */
+ MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth));
+
+ /* C = px * C^2 */
+ MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth));
+ /* A = D^2 */
+ MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth));
+
+ /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+ MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth));
+ MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth));
+
+ /* C3 = py * C^3 */
+ MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth));
+
+ /* ry = D * (px * C^2 - rx) - py * C^3 */
+ MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth));
+
+CLEANUP:
+ mp_clear(&A);
+ mp_clear(&B);
+ mp_clear(&C);
+ mp_clear(&D);
+ mp_clear(&C2);
+ mp_clear(&C3);
+ return res;
+}
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ * This routine implements Point Doubling in the Jacobian Projective
+ * space as described in the paper "Efficient elliptic curve exponentiation
+ * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono.
+ */
+mp_err
+ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz,
+ mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int t0, t1, M, S;
+
+ MP_DIGITS(&t0) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&M) = 0;
+ MP_DIGITS(&S) = 0;
+ MP_CHECKOK(mp_init(&t0));
+ MP_CHECKOK(mp_init(&t1));
+ MP_CHECKOK(mp_init(&M));
+ MP_CHECKOK(mp_init(&S));
+
+ /* P == inf or P == -P */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ goto CLEANUP;
+ }
+
+ if (mp_cmp_d(pz, 1) == 0) {
+ /* M = 3 * px^2 + a */
+ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth));
+ } else if (MP_SIGN(&group->curvea) == MP_NEG &&
+ MP_USED(&group->curvea) == 1 &&
+ MP_DIGIT(&group->curvea, 0) == 3) {
+ /* M = 3 * (px + pz^2) * (px - pz^2) */
+ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth));
+ } else {
+ /* M = 3 * (px^2) + a * (pz^4) */
+ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth));
+ }
+
+ /* rz = 2 * py * pz */
+ /* t0 = 4 * py^2 */
+ if (mp_cmp_d(pz, 1) == 0) {
+ MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth));
+ } else {
+ MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth));
+ }
+
+ /* S = 4 * px * py^2 = px * (2 * py)^2 */
+ MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth));
+
+ /* rx = M^2 - 2 * S */
+ MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth));
+
+ /* ry = M * (S - rx) - 8 * py^4 */
+ MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth));
+ if (mp_isodd(&t1)) {
+ MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1));
+ }
+ MP_CHECKOK(mp_div_2(&t1, &t1));
+ MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth));
+
+CLEANUP:
+ mp_clear(&t0);
+ mp_clear(&t1);
+ mp_clear(&M);
+ mp_clear(&S);
+ return res;
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Elliptic curve points P and R can be
+ * identical. Uses mixed Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses 4-bit window method. */
+mp_err
+ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[16][2], rz;
+ int i, ni, d;
+
+ MP_DIGITS(&rz) = 0;
+ for (i = 0; i < 16; i++) {
+ MP_DIGITS(&precomp[i][0]) = 0;
+ MP_DIGITS(&precomp[i][1]) = 0;
+ }
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+ /* initialize precomputation table */
+ for (i = 0; i < 16; i++) {
+ MP_CHECKOK(mp_init(&precomp[i][0]));
+ MP_CHECKOK(mp_init(&precomp[i][1]));
+ }
+
+ /* fill precomputation table */
+ mp_zero(&precomp[0][0]);
+ mp_zero(&precomp[0][1]);
+ MP_CHECKOK(mp_copy(px, &precomp[1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][1]));
+ for (i = 2; i < 16; i++) {
+ MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1],
+ &precomp[i - 1][0], &precomp[i - 1][1],
+ &precomp[i][0], &precomp[i][1], group));
+ }
+
+ d = (mpl_significant_bits(n) + 3) / 4;
+
+ /* R = inf */
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ for (i = d - 1; i >= 0; i--) {
+ /* compute window ni */
+ ni = MP_GET_BIT(n, 4 * i + 3);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i + 2);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i + 1);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i);
+ /* R = 2^4 * R */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ /* R = R + (ni * P) */
+ MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry,
+ &rz, group));
+ }
+
+ /* convert result S to affine coordinates */
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+ mp_clear(&rz);
+ for (i = 0; i < 16; i++) {
+ mp_clear(&precomp[i][0]);
+ mp_clear(&precomp[i][1]);
+ }
+ return res;
+}
+#endif
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Uses mixed Jacobian-affine coordinates. Input and output values are
+ * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous
+ * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes.
+ * Software Implementation of the NIST Elliptic Curves over Prime Fields. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[4][4][2];
+ mp_int rz;
+ const mp_int *a, *b;
+ unsigned int i, j;
+ int ai, bi, d;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_DIGITS(&precomp[i][j][0]) = 0;
+ MP_DIGITS(&precomp[i][j][1]) = 0;
+ }
+ }
+ MP_DIGITS(&rz) = 0;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ /* initialize precomputation table */
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_CHECKOK(mp_init(&precomp[i][j][0]));
+ MP_CHECKOK(mp_init(&precomp[i][j][1]));
+ }
+ }
+
+ /* fill precomputation table */
+ /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+ if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+ a = k2;
+ b = k1;
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+ }
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+ } else {
+ a = k1;
+ b = k2;
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+ }
+ }
+ /* precompute [*][0][*] */
+ mp_zero(&precomp[0][0][0]);
+ mp_zero(&precomp[0][0][1]);
+ MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1], group));
+ MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1],
+ &precomp[3][0][0], &precomp[3][0][1], group));
+ /* precompute [*][1][*] */
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][1][0], &precomp[i][1][1], group));
+ }
+ /* precompute [*][2][*] */
+ MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][2][0], &precomp[i][2][1], group));
+ }
+ /* precompute [*][3][*] */
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1],
+ &precomp[0][3][0], &precomp[0][3][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][3][0], &precomp[i][3][1], group));
+ }
+
+ d = (mpl_significant_bits(a) + 1) / 2;
+
+ /* R = inf */
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ for (i = d; i-- > 0;) {
+ ai = MP_GET_BIT(a, 2 * i + 1);
+ ai <<= 1;
+ ai |= MP_GET_BIT(a, 2 * i);
+ bi = MP_GET_BIT(b, 2 * i + 1);
+ bi <<= 1;
+ bi |= MP_GET_BIT(b, 2 * i);
+ /* R = 2^2 * R */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ /* R = R + (ai * A + bi * B) */
+ MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1],
+ rx, ry, &rz, group));
+ }
+
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&rz);
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ mp_clear(&precomp[i][j][0]);
+ mp_clear(&precomp[i][j][1]);
+ }
+ }
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c
new file mode 100644
index 0000000000..7998421713
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jm.c
@@ -0,0 +1,297 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "ecl-priv.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+#define MAX_SCRATCH 6
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * Modified Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ */
+static mp_err
+ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz,
+ mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int *t0, *t1, *M, *S;
+
+ t0 = &scratch[0];
+ t1 = &scratch[1];
+ M = &scratch[2];
+ S = &scratch[3];
+
+#if MAX_SCRATCH < 4
+#error "Scratch array defined too small "
+#endif
+
+ /* Check for point at infinity */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ /* Set r = pt at infinity by setting rz = 0 */
+
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ goto CLEANUP;
+ }
+
+ /* M = 3 (px^2) + a*(pz^4) */
+ MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth));
+
+ /* rz = 2 * py * pz */
+ MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth));
+ MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth));
+
+ /* t0 = 2y^2 , t1 = 8y^4 */
+ MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth));
+ MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth));
+
+ /* S = 4 * px * py^2 = 2 * px * t0 */
+ MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth));
+ MP_CHECKOK(group->meth->field_add(S, S, S, group->meth));
+
+ /* rx = M^2 - 2S */
+ MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+
+ /* ry = M * (S - rx) - t1 */
+ MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth));
+ MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth));
+
+ /* ra*z^4 = 2*t1*(apz4) */
+ MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth));
+
+CLEANUP:
+ return res;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+static mp_err
+ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *paz4, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz,
+ mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int *A, *B, *C, *D, *C2, *C3;
+
+ A = &scratch[0];
+ B = &scratch[1];
+ C = &scratch[2];
+ D = &scratch[3];
+ C2 = &scratch[4];
+ C3 = &scratch[5];
+
+#if MAX_SCRATCH < 6
+#error "Scratch array defined too small "
+#endif
+
+ /* If either P or Q is the point at infinity, then return the other
+ * point */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+ MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+ goto CLEANUP;
+ }
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_copy(pz, rz));
+ MP_CHECKOK(mp_copy(paz4, raz4));
+ goto CLEANUP;
+ }
+
+ /* A = qx * pz^2, B = qy * pz^3 */
+ MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth));
+ MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth));
+
+ /* Check P == Q */
+ if (mp_cmp(A, px) == 0) {
+ if (mp_cmp(B, py) == 0) {
+ /* If Px == Qx && Py == Qy, double P. */
+ return ec_GFp_pt_dbl_jm(px, py, pz, paz4, rx, ry, rz, raz4,
+ scratch, group);
+ }
+ /* If Px == Qx && Py != Qy, return point at infinity. */
+ return ec_GFp_pt_set_inf_jac(rx, ry, rz);
+ }
+
+ /* C = A - px, D = B - py */
+ MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth));
+ MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth));
+
+ /* C2 = C^2, C3 = C^3 */
+ MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth));
+
+ /* rz = pz * C */
+ MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth));
+
+ /* C = px * C^2 */
+ MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth));
+ /* A = D^2 */
+ MP_CHECKOK(group->meth->field_sqr(D, A, group->meth));
+
+ /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+ MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth));
+ MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth));
+
+ /* C3 = py * C^3 */
+ MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth));
+
+ /* ry = D * (px * C^2 - rx) - py * C^3 */
+ MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth));
+ MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth));
+
+ /* raz4 = a * rz^4 */
+ MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+CLEANUP:
+ return res;
+}
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[16][2], rz, tpx, tpy;
+ mp_int raz4;
+ mp_int scratch[MAX_SCRATCH];
+ signed char *naf = NULL;
+ int i, orderBitSize = 0;
+
+ MP_DIGITS(&rz) = 0;
+ MP_DIGITS(&raz4) = 0;
+ MP_DIGITS(&tpx) = 0;
+ MP_DIGITS(&tpy) = 0;
+ for (i = 0; i < 16; i++) {
+ MP_DIGITS(&precomp[i][0]) = 0;
+ MP_DIGITS(&precomp[i][1]) = 0;
+ }
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ MP_DIGITS(&scratch[i]) = 0;
+ }
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+ /* initialize precomputation table */
+ MP_CHECKOK(mp_init(&tpx));
+ MP_CHECKOK(mp_init(&tpy));
+ ;
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(mp_init(&raz4));
+
+ for (i = 0; i < 16; i++) {
+ MP_CHECKOK(mp_init(&precomp[i][0]));
+ MP_CHECKOK(mp_init(&precomp[i][1]));
+ }
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ MP_CHECKOK(mp_init(&scratch[i]));
+ }
+
+ /* Set out[8] = P */
+ MP_CHECKOK(mp_copy(px, &precomp[8][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[8][1]));
+
+ /* Set (tpx, tpy) = 2P */
+ MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy,
+ group));
+
+ /* Set 3P, 5P, ..., 15P */
+ for (i = 8; i < 15; i++) {
+ MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy,
+ &precomp[i + 1][0], &precomp[i + 1][1],
+ group));
+ }
+
+ /* Set -15P, -13P, ..., -P */
+ for (i = 0; i < 8; i++) {
+ MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0]));
+ MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1],
+ group->meth));
+ }
+
+ /* R = inf */
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ orderBitSize = mpl_significant_bits(&group->order);
+
+ /* Allocate memory for NAF */
+ naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1));
+ if (naf == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+
+ /* Compute 5NAF */
+ ec_compute_wNAF(naf, orderBitSize, n, 5);
+
+ /* wNAF method */
+ for (i = orderBitSize; i >= 0; i--) {
+ /* R = 2R */
+ ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz,
+ &raz4, scratch, group);
+ if (naf[i] != 0) {
+ ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4,
+ &precomp[(naf[i] + 15) / 2][0],
+ &precomp[(naf[i] + 15) / 2][1], rx, ry,
+ &rz, &raz4, scratch, group);
+ }
+ }
+
+ /* convert result S to affine coordinates */
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ mp_clear(&scratch[i]);
+ }
+ for (i = 0; i < 16; i++) {
+ mp_clear(&precomp[i][0]);
+ mp_clear(&precomp[i][1]);
+ }
+ mp_clear(&tpx);
+ mp_clear(&tpy);
+ mp_clear(&rz);
+ mp_clear(&raz4);
+ if (naf) {
+ memset(naf, 0, orderBitSize + 1);
+ }
+ free(naf);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c
new file mode 100644
index 0000000000..779685b4dd
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_mont.c
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Uses Montgomery reduction for field arithmetic. See mpi/mpmontg.c for
+ * code implementation. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp_mont(const mp_int *irr)
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth = NULL;
+ mp_mont_modulus *mmm;
+
+ meth = GFMethod_consGFp(irr);
+ if (meth == NULL)
+ return NULL;
+
+ mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus));
+ if (mmm == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+
+ meth->field_mul = &ec_GFp_mul_mont;
+ meth->field_sqr = &ec_GFp_sqr_mont;
+ meth->field_div = &ec_GFp_div_mont;
+ meth->field_enc = &ec_GFp_enc_mont;
+ meth->field_dec = &ec_GFp_dec_mont;
+ meth->extra1 = mmm;
+ meth->extra2 = NULL;
+ meth->extra_free = &ec_GFp_extra_free_mont;
+
+ mmm->N = meth->irr;
+ mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Field multiplication using Montgomery reduction. */
+mp_err
+ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+#ifdef MP_MONT_USE_MP_MUL
+ /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont
+ * is not implemented and we have to use mp_mul and s_mp_redc directly
+ */
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+#else
+ mp_int s;
+
+ MP_DIGITS(&s) = 0;
+ /* s_mp_mul_mont doesn't allow source and destination to be the same */
+ if ((a == r) || (b == r)) {
+ MP_CHECKOK(mp_init(&s));
+ MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1));
+ MP_CHECKOK(mp_copy(&s, r));
+ mp_clear(&s);
+ } else {
+ return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1);
+ }
+#endif
+CLEANUP:
+ return res;
+}
+
+/* Field squaring using Montgomery reduction. */
+mp_err
+ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return ec_GFp_mul_mont(a, a, r, meth);
+}
+
+/* Field division using Montgomery reduction. */
+mp_err
+ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ /* if A=aZ represents a encoded in montgomery coordinates with Z and #
+ * and \ respectively represent multiplication and division in
+ * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv =
+ * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */
+ MP_CHECKOK(ec_GFp_div(a, b, r, meth));
+ MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+ if (a == NULL) {
+ MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+ }
+CLEANUP:
+ return res;
+}
+
+/* Encode a field element in Montgomery form. See s_mp_to_mont in
+ * mpi/mpmontg.c */
+mp_err
+ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_mont_modulus *mmm;
+ mp_err res = MP_OKAY;
+
+ mmm = (mp_mont_modulus *)meth->extra1;
+ MP_CHECKOK(mp_copy(a, r));
+ MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N)));
+ MP_CHECKOK(mp_mod(r, &mmm->N, r));
+CLEANUP:
+ return res;
+}
+
+/* Decode a field element from Montgomery form. */
+mp_err
+ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ if (a != r) {
+ MP_CHECKOK(mp_copy(a, r));
+ }
+ MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+CLEANUP:
+ return res;
+}
+
+/* Free the memory allocated to the extra fields of Montgomery GFMethod
+ * object. */
+void
+ec_GFp_extra_free_mont(GFMethod *meth)
+{
+ if (meth->extra1 != NULL) {
+ free(meth->extra1);
+ meth->extra1 = NULL;
+ }
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_secp256r1.c b/security/nss/lib/freebl/ecl/ecp_secp256r1.c
new file mode 100644
index 0000000000..044f6a7a1a
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp256r1.c
@@ -0,0 +1,258 @@
+/* P-256 from HACL* */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "ecl-priv.h"
+#include "secitem.h"
+#include "secerr.h"
+#include "secmpi.h"
+#include "../verified/Hacl_P256.h"
+
+/*
+ * Point Validation for P-256.
+ */
+
+SECStatus
+ec_secp256r1_pt_validate(const SECItem *pt)
+{
+ SECStatus res = SECSuccess;
+ if (!pt || !pt->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ if (pt->len != 65) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ return res;
+ }
+
+ if (pt->data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ res = SECFailure;
+ return res;
+ }
+
+ bool b = Hacl_P256_validate_public_key(pt->data + 1);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ }
+ return res;
+}
+
+/*
+ * Scalar Validation for P-256.
+ */
+
+SECStatus
+ec_secp256r1_scalar_validate(const SECItem *scalar)
+{
+ SECStatus res = SECSuccess;
+ if (!scalar || !scalar->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ if (scalar->len != 32) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ return res;
+ }
+
+ bool b = Hacl_P256_validate_private_key(scalar->data);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ }
+ return res;
+}
+
+/*
+ * Scalar multiplication for P-256.
+ * If P == NULL, the base point is used.
+ * Returns X = k*P
+ */
+
+SECStatus
+ec_secp256r1_pt_mul(SECItem *X, SECItem *k, SECItem *P)
+{
+ SECStatus res = SECSuccess;
+ if (!P) {
+ uint8_t derived[64] = { 0 };
+
+ if (!X || !k || !X->data || !k->data ||
+ X->len < 65 || k->len != 32) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ bool b = Hacl_P256_dh_initiator(derived, k->data);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ return res;
+ }
+
+ X->len = 65;
+ X->data[0] = EC_POINT_FORM_UNCOMPRESSED;
+ memcpy(X->data + 1, derived, 64);
+
+ } else {
+ uint8_t full_key[32] = { 0 };
+ uint8_t *key;
+ uint8_t derived[64] = { 0 };
+
+ if (!X || !k || !P || !X->data || !k->data || !P->data ||
+ X->len < 32 || P->len != 65 ||
+ P->data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ /* We consider keys of up to size 32, or of size 33 with a single leading 0 */
+ if (k->len < 32) {
+ memcpy(full_key + 32 - k->len, k->data, k->len);
+ key = full_key;
+ } else if (k->len == 32) {
+ key = k->data;
+ } else if (k->len == 33 && k->data[0] == 0) {
+ key = k->data + 1;
+ } else {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ bool b = Hacl_P256_dh_responder(derived, P->data + 1, key);
+
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ return res;
+ }
+
+ X->len = 32;
+ memcpy(X->data, derived, 32);
+ }
+
+ return res;
+}
+
+/*
+ * ECDSA Signature for P-256
+ */
+
+SECStatus
+ec_secp256r1_sign_digest(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *kb,
+ const unsigned int kblen)
+{
+ SECStatus res = SECSuccess;
+
+ if (!key || !signature || !digest || !kb ||
+ !key->privateValue.data ||
+ !signature->data || !digest->data ||
+ key->ecParams.name != ECCurve_NIST_P256) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ if (key->privateValue.len != 32 ||
+ kblen == 0 ||
+ digest->len == 0 ||
+ signature->len < 64) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ res = SECFailure;
+ return res;
+ }
+
+ uint8_t hash[32] = { 0 };
+ if (digest->len < 32) {
+ memcpy(hash + 32 - digest->len, digest->data, digest->len);
+ } else {
+ memcpy(hash, digest->data, 32);
+ }
+
+ uint8_t nonce[32] = { 0 };
+ if (kblen < 32) {
+ memcpy(nonce + 32 - kblen, kb, kblen);
+ } else {
+ memcpy(nonce, kb, 32);
+ }
+
+ bool b = Hacl_P256_ecdsa_sign_p256_without_hash(
+ signature->data, 32, hash,
+ key->privateValue.data, nonce);
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ res = SECFailure;
+ return res;
+ }
+
+ signature->len = 64;
+ return res;
+}
+
+/*
+ * ECDSA Signature Verification for P-256
+ */
+
+SECStatus
+ec_secp256r1_verify_digest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ SECStatus res = SECSuccess;
+
+ if (!key || !signature || !digest ||
+ !key->publicValue.data ||
+ !signature->data || !digest->data ||
+ key->ecParams.name != ECCurve_NIST_P256) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ res = SECFailure;
+ return res;
+ }
+
+ if (key->publicValue.len != 65 ||
+ digest->len == 0 ||
+ signature->len != 64) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ res = SECFailure;
+ return res;
+ }
+
+ if (key->publicValue.data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ res = SECFailure;
+ return res;
+ }
+
+ uint8_t hash[32] = { 0 };
+ if (digest->len < 32) {
+ memcpy(hash + 32 - digest->len, digest->data, digest->len);
+ } else {
+ memcpy(hash, digest->data, 32);
+ }
+
+ bool b = Hacl_P256_ecdsa_verif_without_hash(
+ 32, hash,
+ key->publicValue.data + 1,
+ signature->data, signature->data + 32);
+ if (!b) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ res = SECFailure;
+ return res;
+ }
+
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_secp384r1.c b/security/nss/lib/freebl/ecl/ecp_secp384r1.c
new file mode 100644
index 0000000000..1359036115
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp384r1.c
@@ -0,0 +1,20411 @@
+/* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */
+/*-
+ * MIT License
+ * -
+ * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley
+ * -
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * -
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * -
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__SIZEOF_INT128__) && !defined(PEDANTIC)
+
+#include "ecp_secp384r1.h"
+#include <stdint.h>
+#include <string.h>
+#define LIMB_BITS 64
+#define LIMB_CNT 6
+/* Field elements */
+typedef uint64_t fe_t[LIMB_CNT];
+typedef uint64_t limb_t;
+
+#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t))
+#define fe_set_zero(d) memset(d, 0, sizeof(fe_t))
+
+/* Projective points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+ fe_t Z;
+} pt_prj_t;
+
+/* Affine points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+} pt_aff_t;
+
+/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */
+/*-
+ * MIT License
+ *
+ * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file).
+ * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Autogenerated: word_by_word_montgomery --static --use-value-barrier secp384r1 64 '2^384 - 2^128 - 2^96 + 2^32 - 1' */
+/* curve description: secp384r1 */
+/* machine_wordsize = 64 (from "64") */
+/* requested operations: (all) */
+/* m = 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff (from "2^384 - 2^128 - 2^96 + 2^32 - 1") */
+/* */
+/* NOTE: In addition to the bounds specified above each function, all */
+/* functions synthesized for this Montgomery arithmetic require the */
+/* input to be strictly less than the prime modulus (m), and also */
+/* require the input to be in the unique saturated representation. */
+/* All functions also ensure that these two properties are true of */
+/* return values. */
+/* */
+/* Computed values: */
+/* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) */
+/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */
+/* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) + (z[4] << 256) + (z[5] << 0x140) in */
+/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */
+
+#include <stdint.h>
+typedef unsigned char fiat_secp384r1_uint1;
+typedef signed char fiat_secp384r1_int1;
+#ifdef __GNUC__
+#define FIAT_SECP384R1_FIAT_EXTENSION __extension__
+#define FIAT_SECP384R1_FIAT_INLINE __inline__
+#else
+#define FIAT_SECP384R1_FIAT_EXTENSION
+#define FIAT_SECP384R1_FIAT_INLINE
+#endif
+
+FIAT_SECP384R1_FIAT_EXTENSION typedef signed __int128 fiat_secp384r1_int128;
+FIAT_SECP384R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp384r1_uint128;
+
+/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
+typedef uint64_t fiat_secp384r1_montgomery_domain_field_element[6];
+
+/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] */
+typedef uint64_t fiat_secp384r1_non_montgomery_domain_field_element[6];
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+#if !defined(FIAT_SECP384R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint64_t
+fiat_secp384r1_value_barrier_u64(uint64_t a)
+{
+ __asm__(""
+ : "+r"(a)
+ : /* no inputs */);
+ return a;
+}
+#else
+#define fiat_secp384r1_value_barrier_u64(x) (x)
+#endif
+
+/*
+ * The function fiat_secp384r1_addcarryx_u64 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^64
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffffffffffff]
+ * arg3: [0x0 ~> 0xffffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp384r1_addcarryx_u64(uint64_t *out1,
+ fiat_secp384r1_uint1 *out2,
+ fiat_secp384r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ fiat_secp384r1_uint128 x1;
+ uint64_t x2;
+ fiat_secp384r1_uint1 x3;
+ x1 = ((arg1 + (fiat_secp384r1_uint128)arg2) + arg3);
+ x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+ x3 = (fiat_secp384r1_uint1)(x1 >> 64);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_subborrowx_u64 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^64
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffffffffffff]
+ * arg3: [0x0 ~> 0xffffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp384r1_subborrowx_u64(uint64_t *out1,
+ fiat_secp384r1_uint1 *out2,
+ fiat_secp384r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ fiat_secp384r1_int128 x1;
+ fiat_secp384r1_int1 x2;
+ uint64_t x3;
+ x1 = ((arg2 - (fiat_secp384r1_int128)arg1) - arg3);
+ x2 = (fiat_secp384r1_int1)(x1 >> 64);
+ x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp384r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp384r1_mulx_u64 is a multiplication, returning the full double-width result.
+ *
+ * Postconditions:
+ * out1 = (arg1 * arg2) mod 2^64
+ * out2 = ⌊arg1 * arg2 / 2^64⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0xffffffffffffffff]
+ * arg2: [0x0 ~> 0xffffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ * out2: [0x0 ~> 0xffffffffffffffff]
+ */
+static void
+fiat_secp384r1_mulx_u64(uint64_t *out1, uint64_t *out2,
+ uint64_t arg1, uint64_t arg2)
+{
+ fiat_secp384r1_uint128 x1;
+ uint64_t x2;
+ uint64_t x3;
+ x1 = ((fiat_secp384r1_uint128)arg1 * arg2);
+ x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
+ x3 = (uint64_t)(x1 >> 64);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_cmovznz_u64 is a single-word conditional move.
+ *
+ * Postconditions:
+ * out1 = (if arg1 = 0 then arg2 else arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffffffffffff]
+ * arg3: [0x0 ~> 0xffffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ */
+static void
+fiat_secp384r1_cmovznz_u64(uint64_t *out1,
+ fiat_secp384r1_uint1 arg1, uint64_t arg2,
+ uint64_t arg3)
+{
+ fiat_secp384r1_uint1 x1;
+ uint64_t x2;
+ uint64_t x3;
+ x1 = (!(!arg1));
+ x2 = ((fiat_secp384r1_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
+ x3 = ((fiat_secp384r1_value_barrier_u64(x2) & arg3) |
+ (fiat_secp384r1_value_barrier_u64((~x2)) & arg2));
+ *out1 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_mul(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint64_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint64_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint64_t x25;
+ fiat_secp384r1_uint1 x26;
+ uint64_t x27;
+ fiat_secp384r1_uint1 x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint64_t x46;
+ fiat_secp384r1_uint1 x47;
+ uint64_t x48;
+ fiat_secp384r1_uint1 x49;
+ uint64_t x50;
+ fiat_secp384r1_uint1 x51;
+ uint64_t x52;
+ fiat_secp384r1_uint1 x53;
+ uint64_t x54;
+ uint64_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint64_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint64_t x59;
+ fiat_secp384r1_uint1 x60;
+ uint64_t x61;
+ fiat_secp384r1_uint1 x62;
+ uint64_t x63;
+ fiat_secp384r1_uint1 x64;
+ uint64_t x65;
+ fiat_secp384r1_uint1 x66;
+ uint64_t x67;
+ fiat_secp384r1_uint1 x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint64_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint64_t x83;
+ fiat_secp384r1_uint1 x84;
+ uint64_t x85;
+ fiat_secp384r1_uint1 x86;
+ uint64_t x87;
+ fiat_secp384r1_uint1 x88;
+ uint64_t x89;
+ fiat_secp384r1_uint1 x90;
+ uint64_t x91;
+ uint64_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint64_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint64_t x96;
+ fiat_secp384r1_uint1 x97;
+ uint64_t x98;
+ fiat_secp384r1_uint1 x99;
+ uint64_t x100;
+ fiat_secp384r1_uint1 x101;
+ uint64_t x102;
+ fiat_secp384r1_uint1 x103;
+ uint64_t x104;
+ fiat_secp384r1_uint1 x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ uint64_t x117;
+ uint64_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ fiat_secp384r1_uint1 x121;
+ uint64_t x122;
+ fiat_secp384r1_uint1 x123;
+ uint64_t x124;
+ fiat_secp384r1_uint1 x125;
+ uint64_t x126;
+ fiat_secp384r1_uint1 x127;
+ uint64_t x128;
+ fiat_secp384r1_uint1 x129;
+ uint64_t x130;
+ uint64_t x131;
+ fiat_secp384r1_uint1 x132;
+ uint64_t x133;
+ fiat_secp384r1_uint1 x134;
+ uint64_t x135;
+ fiat_secp384r1_uint1 x136;
+ uint64_t x137;
+ fiat_secp384r1_uint1 x138;
+ uint64_t x139;
+ fiat_secp384r1_uint1 x140;
+ uint64_t x141;
+ fiat_secp384r1_uint1 x142;
+ uint64_t x143;
+ fiat_secp384r1_uint1 x144;
+ uint64_t x145;
+ uint64_t x146;
+ uint64_t x147;
+ uint64_t x148;
+ uint64_t x149;
+ uint64_t x150;
+ uint64_t x151;
+ uint64_t x152;
+ uint64_t x153;
+ uint64_t x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ fiat_secp384r1_uint1 x159;
+ uint64_t x160;
+ fiat_secp384r1_uint1 x161;
+ uint64_t x162;
+ fiat_secp384r1_uint1 x163;
+ uint64_t x164;
+ fiat_secp384r1_uint1 x165;
+ uint64_t x166;
+ fiat_secp384r1_uint1 x167;
+ uint64_t x168;
+ uint64_t x169;
+ fiat_secp384r1_uint1 x170;
+ uint64_t x171;
+ fiat_secp384r1_uint1 x172;
+ uint64_t x173;
+ fiat_secp384r1_uint1 x174;
+ uint64_t x175;
+ fiat_secp384r1_uint1 x176;
+ uint64_t x177;
+ fiat_secp384r1_uint1 x178;
+ uint64_t x179;
+ fiat_secp384r1_uint1 x180;
+ uint64_t x181;
+ fiat_secp384r1_uint1 x182;
+ uint64_t x183;
+ uint64_t x184;
+ uint64_t x185;
+ uint64_t x186;
+ uint64_t x187;
+ uint64_t x188;
+ uint64_t x189;
+ uint64_t x190;
+ uint64_t x191;
+ uint64_t x192;
+ uint64_t x193;
+ uint64_t x194;
+ uint64_t x195;
+ uint64_t x196;
+ uint64_t x197;
+ fiat_secp384r1_uint1 x198;
+ uint64_t x199;
+ fiat_secp384r1_uint1 x200;
+ uint64_t x201;
+ fiat_secp384r1_uint1 x202;
+ uint64_t x203;
+ fiat_secp384r1_uint1 x204;
+ uint64_t x205;
+ fiat_secp384r1_uint1 x206;
+ uint64_t x207;
+ uint64_t x208;
+ fiat_secp384r1_uint1 x209;
+ uint64_t x210;
+ fiat_secp384r1_uint1 x211;
+ uint64_t x212;
+ fiat_secp384r1_uint1 x213;
+ uint64_t x214;
+ fiat_secp384r1_uint1 x215;
+ uint64_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint64_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint64_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint64_t x222;
+ uint64_t x223;
+ uint64_t x224;
+ uint64_t x225;
+ uint64_t x226;
+ uint64_t x227;
+ uint64_t x228;
+ uint64_t x229;
+ uint64_t x230;
+ uint64_t x231;
+ uint64_t x232;
+ uint64_t x233;
+ uint64_t x234;
+ uint64_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint64_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint64_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint64_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint64_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint64_t x245;
+ uint64_t x246;
+ fiat_secp384r1_uint1 x247;
+ uint64_t x248;
+ fiat_secp384r1_uint1 x249;
+ uint64_t x250;
+ fiat_secp384r1_uint1 x251;
+ uint64_t x252;
+ fiat_secp384r1_uint1 x253;
+ uint64_t x254;
+ fiat_secp384r1_uint1 x255;
+ uint64_t x256;
+ fiat_secp384r1_uint1 x257;
+ uint64_t x258;
+ fiat_secp384r1_uint1 x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint64_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint64_t x265;
+ uint64_t x266;
+ uint64_t x267;
+ uint64_t x268;
+ uint64_t x269;
+ uint64_t x270;
+ uint64_t x271;
+ uint64_t x272;
+ uint64_t x273;
+ uint64_t x274;
+ fiat_secp384r1_uint1 x275;
+ uint64_t x276;
+ fiat_secp384r1_uint1 x277;
+ uint64_t x278;
+ fiat_secp384r1_uint1 x279;
+ uint64_t x280;
+ fiat_secp384r1_uint1 x281;
+ uint64_t x282;
+ fiat_secp384r1_uint1 x283;
+ uint64_t x284;
+ uint64_t x285;
+ fiat_secp384r1_uint1 x286;
+ uint64_t x287;
+ fiat_secp384r1_uint1 x288;
+ uint64_t x289;
+ fiat_secp384r1_uint1 x290;
+ uint64_t x291;
+ fiat_secp384r1_uint1 x292;
+ uint64_t x293;
+ fiat_secp384r1_uint1 x294;
+ uint64_t x295;
+ fiat_secp384r1_uint1 x296;
+ uint64_t x297;
+ fiat_secp384r1_uint1 x298;
+ uint64_t x299;
+ uint64_t x300;
+ uint64_t x301;
+ uint64_t x302;
+ uint64_t x303;
+ uint64_t x304;
+ uint64_t x305;
+ uint64_t x306;
+ uint64_t x307;
+ uint64_t x308;
+ uint64_t x309;
+ uint64_t x310;
+ uint64_t x311;
+ uint64_t x312;
+ fiat_secp384r1_uint1 x313;
+ uint64_t x314;
+ fiat_secp384r1_uint1 x315;
+ uint64_t x316;
+ fiat_secp384r1_uint1 x317;
+ uint64_t x318;
+ fiat_secp384r1_uint1 x319;
+ uint64_t x320;
+ fiat_secp384r1_uint1 x321;
+ uint64_t x322;
+ uint64_t x323;
+ fiat_secp384r1_uint1 x324;
+ uint64_t x325;
+ fiat_secp384r1_uint1 x326;
+ uint64_t x327;
+ fiat_secp384r1_uint1 x328;
+ uint64_t x329;
+ fiat_secp384r1_uint1 x330;
+ uint64_t x331;
+ fiat_secp384r1_uint1 x332;
+ uint64_t x333;
+ fiat_secp384r1_uint1 x334;
+ uint64_t x335;
+ fiat_secp384r1_uint1 x336;
+ uint64_t x337;
+ uint64_t x338;
+ uint64_t x339;
+ uint64_t x340;
+ uint64_t x341;
+ uint64_t x342;
+ uint64_t x343;
+ uint64_t x344;
+ uint64_t x345;
+ uint64_t x346;
+ uint64_t x347;
+ uint64_t x348;
+ uint64_t x349;
+ uint64_t x350;
+ uint64_t x351;
+ fiat_secp384r1_uint1 x352;
+ uint64_t x353;
+ fiat_secp384r1_uint1 x354;
+ uint64_t x355;
+ fiat_secp384r1_uint1 x356;
+ uint64_t x357;
+ fiat_secp384r1_uint1 x358;
+ uint64_t x359;
+ fiat_secp384r1_uint1 x360;
+ uint64_t x361;
+ uint64_t x362;
+ fiat_secp384r1_uint1 x363;
+ uint64_t x364;
+ fiat_secp384r1_uint1 x365;
+ uint64_t x366;
+ fiat_secp384r1_uint1 x367;
+ uint64_t x368;
+ fiat_secp384r1_uint1 x369;
+ uint64_t x370;
+ fiat_secp384r1_uint1 x371;
+ uint64_t x372;
+ fiat_secp384r1_uint1 x373;
+ uint64_t x374;
+ fiat_secp384r1_uint1 x375;
+ uint64_t x376;
+ uint64_t x377;
+ uint64_t x378;
+ uint64_t x379;
+ uint64_t x380;
+ uint64_t x381;
+ uint64_t x382;
+ uint64_t x383;
+ uint64_t x384;
+ uint64_t x385;
+ uint64_t x386;
+ uint64_t x387;
+ uint64_t x388;
+ uint64_t x389;
+ fiat_secp384r1_uint1 x390;
+ uint64_t x391;
+ fiat_secp384r1_uint1 x392;
+ uint64_t x393;
+ fiat_secp384r1_uint1 x394;
+ uint64_t x395;
+ fiat_secp384r1_uint1 x396;
+ uint64_t x397;
+ fiat_secp384r1_uint1 x398;
+ uint64_t x399;
+ uint64_t x400;
+ fiat_secp384r1_uint1 x401;
+ uint64_t x402;
+ fiat_secp384r1_uint1 x403;
+ uint64_t x404;
+ fiat_secp384r1_uint1 x405;
+ uint64_t x406;
+ fiat_secp384r1_uint1 x407;
+ uint64_t x408;
+ fiat_secp384r1_uint1 x409;
+ uint64_t x410;
+ fiat_secp384r1_uint1 x411;
+ uint64_t x412;
+ fiat_secp384r1_uint1 x413;
+ uint64_t x414;
+ uint64_t x415;
+ uint64_t x416;
+ uint64_t x417;
+ uint64_t x418;
+ uint64_t x419;
+ uint64_t x420;
+ uint64_t x421;
+ uint64_t x422;
+ uint64_t x423;
+ uint64_t x424;
+ uint64_t x425;
+ uint64_t x426;
+ uint64_t x427;
+ uint64_t x428;
+ fiat_secp384r1_uint1 x429;
+ uint64_t x430;
+ fiat_secp384r1_uint1 x431;
+ uint64_t x432;
+ fiat_secp384r1_uint1 x433;
+ uint64_t x434;
+ fiat_secp384r1_uint1 x435;
+ uint64_t x436;
+ fiat_secp384r1_uint1 x437;
+ uint64_t x438;
+ uint64_t x439;
+ fiat_secp384r1_uint1 x440;
+ uint64_t x441;
+ fiat_secp384r1_uint1 x442;
+ uint64_t x443;
+ fiat_secp384r1_uint1 x444;
+ uint64_t x445;
+ fiat_secp384r1_uint1 x446;
+ uint64_t x447;
+ fiat_secp384r1_uint1 x448;
+ uint64_t x449;
+ fiat_secp384r1_uint1 x450;
+ uint64_t x451;
+ fiat_secp384r1_uint1 x452;
+ uint64_t x453;
+ uint64_t x454;
+ fiat_secp384r1_uint1 x455;
+ uint64_t x456;
+ fiat_secp384r1_uint1 x457;
+ uint64_t x458;
+ fiat_secp384r1_uint1 x459;
+ uint64_t x460;
+ fiat_secp384r1_uint1 x461;
+ uint64_t x462;
+ fiat_secp384r1_uint1 x463;
+ uint64_t x464;
+ fiat_secp384r1_uint1 x465;
+ uint64_t x466;
+ fiat_secp384r1_uint1 x467;
+ uint64_t x468;
+ uint64_t x469;
+ uint64_t x470;
+ uint64_t x471;
+ uint64_t x472;
+ uint64_t x473;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[0]);
+ fiat_secp384r1_mulx_u64(&x7, &x8, x6, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x9, &x10, x6, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x11, &x12, x6, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x13, &x14, x6, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x15, &x16, x6, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x17, &x18, x6, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x19, &x20, 0x0, x18, x15);
+ fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x16, x13);
+ fiat_secp384r1_addcarryx_u64(&x23, &x24, x22, x14, x11);
+ fiat_secp384r1_addcarryx_u64(&x25, &x26, x24, x12, x9);
+ fiat_secp384r1_addcarryx_u64(&x27, &x28, x26, x10, x7);
+ x29 = (x28 + x8);
+ fiat_secp384r1_mulx_u64(&x30, &x31, x17, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x32, &x33, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x34, &x35, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x36, &x37, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x38, &x39, x30, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x40, &x41, x30, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x42, &x43, x30, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
+ fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x41, x38);
+ fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x39, x36);
+ fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x37, x34);
+ fiat_secp384r1_addcarryx_u64(&x52, &x53, x51, x35, x32);
+ x54 = (x53 + x33);
+ fiat_secp384r1_addcarryx_u64(&x55, &x56, 0x0, x17, x42);
+ fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x19, x44);
+ fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, x21, x46);
+ fiat_secp384r1_addcarryx_u64(&x61, &x62, x60, x23, x48);
+ fiat_secp384r1_addcarryx_u64(&x63, &x64, x62, x25, x50);
+ fiat_secp384r1_addcarryx_u64(&x65, &x66, x64, x27, x52);
+ fiat_secp384r1_addcarryx_u64(&x67, &x68, x66, x29, x54);
+ fiat_secp384r1_mulx_u64(&x69, &x70, x1, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x71, &x72, x1, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x73, &x74, x1, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x75, &x76, x1, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x77, &x78, x1, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x79, &x80, x1, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x81, &x82, 0x0, x80, x77);
+ fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x78, x75);
+ fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x76, x73);
+ fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x74, x71);
+ fiat_secp384r1_addcarryx_u64(&x89, &x90, x88, x72, x69);
+ x91 = (x90 + x70);
+ fiat_secp384r1_addcarryx_u64(&x92, &x93, 0x0, x57, x79);
+ fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x59, x81);
+ fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x61, x83);
+ fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x63, x85);
+ fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x65, x87);
+ fiat_secp384r1_addcarryx_u64(&x102, &x103, x101, x67, x89);
+ fiat_secp384r1_addcarryx_u64(&x104, &x105, x103, x68, x91);
+ fiat_secp384r1_mulx_u64(&x106, &x107, x92, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x108, &x109, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x110, &x111, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x112, &x113, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x114, &x115, x106, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x116, &x117, x106, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x118, &x119, x106, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x120, &x121, 0x0, x119, x116);
+ fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x117, x114);
+ fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x115, x112);
+ fiat_secp384r1_addcarryx_u64(&x126, &x127, x125, x113, x110);
+ fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x111, x108);
+ x130 = (x129 + x109);
+ fiat_secp384r1_addcarryx_u64(&x131, &x132, 0x0, x92, x118);
+ fiat_secp384r1_addcarryx_u64(&x133, &x134, x132, x94, x120);
+ fiat_secp384r1_addcarryx_u64(&x135, &x136, x134, x96, x122);
+ fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x98, x124);
+ fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x100, x126);
+ fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x102, x128);
+ fiat_secp384r1_addcarryx_u64(&x143, &x144, x142, x104, x130);
+ x145 = ((uint64_t)x144 + x105);
+ fiat_secp384r1_mulx_u64(&x146, &x147, x2, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x148, &x149, x2, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x150, &x151, x2, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x152, &x153, x2, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x154, &x155, x2, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x156, &x157, x2, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x157, x154);
+ fiat_secp384r1_addcarryx_u64(&x160, &x161, x159, x155, x152);
+ fiat_secp384r1_addcarryx_u64(&x162, &x163, x161, x153, x150);
+ fiat_secp384r1_addcarryx_u64(&x164, &x165, x163, x151, x148);
+ fiat_secp384r1_addcarryx_u64(&x166, &x167, x165, x149, x146);
+ x168 = (x167 + x147);
+ fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x133, x156);
+ fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x135, x158);
+ fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x137, x160);
+ fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x139, x162);
+ fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x141, x164);
+ fiat_secp384r1_addcarryx_u64(&x179, &x180, x178, x143, x166);
+ fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x168);
+ fiat_secp384r1_mulx_u64(&x183, &x184, x169, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x185, &x186, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x187, &x188, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x189, &x190, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x191, &x192, x183, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x193, &x194, x183, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x195, &x196, x183, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x197, &x198, 0x0, x196, x193);
+ fiat_secp384r1_addcarryx_u64(&x199, &x200, x198, x194, x191);
+ fiat_secp384r1_addcarryx_u64(&x201, &x202, x200, x192, x189);
+ fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x190, x187);
+ fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x188, x185);
+ x207 = (x206 + x186);
+ fiat_secp384r1_addcarryx_u64(&x208, &x209, 0x0, x169, x195);
+ fiat_secp384r1_addcarryx_u64(&x210, &x211, x209, x171, x197);
+ fiat_secp384r1_addcarryx_u64(&x212, &x213, x211, x173, x199);
+ fiat_secp384r1_addcarryx_u64(&x214, &x215, x213, x175, x201);
+ fiat_secp384r1_addcarryx_u64(&x216, &x217, x215, x177, x203);
+ fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x179, x205);
+ fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x181, x207);
+ x222 = ((uint64_t)x221 + x182);
+ fiat_secp384r1_mulx_u64(&x223, &x224, x3, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x225, &x226, x3, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x227, &x228, x3, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x229, &x230, x3, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x231, &x232, x3, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x233, &x234, x3, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231);
+ fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229);
+ fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227);
+ fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225);
+ fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223);
+ x245 = (x244 + x224);
+ fiat_secp384r1_addcarryx_u64(&x246, &x247, 0x0, x210, x233);
+ fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x212, x235);
+ fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x214, x237);
+ fiat_secp384r1_addcarryx_u64(&x252, &x253, x251, x216, x239);
+ fiat_secp384r1_addcarryx_u64(&x254, &x255, x253, x218, x241);
+ fiat_secp384r1_addcarryx_u64(&x256, &x257, x255, x220, x243);
+ fiat_secp384r1_addcarryx_u64(&x258, &x259, x257, x222, x245);
+ fiat_secp384r1_mulx_u64(&x260, &x261, x246, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x262, &x263, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x264, &x265, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x266, &x267, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x268, &x269, x260, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x270, &x271, x260, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x272, &x273, x260, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x274, &x275, 0x0, x273, x270);
+ fiat_secp384r1_addcarryx_u64(&x276, &x277, x275, x271, x268);
+ fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x269, x266);
+ fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x267, x264);
+ fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x265, x262);
+ x284 = (x283 + x263);
+ fiat_secp384r1_addcarryx_u64(&x285, &x286, 0x0, x246, x272);
+ fiat_secp384r1_addcarryx_u64(&x287, &x288, x286, x248, x274);
+ fiat_secp384r1_addcarryx_u64(&x289, &x290, x288, x250, x276);
+ fiat_secp384r1_addcarryx_u64(&x291, &x292, x290, x252, x278);
+ fiat_secp384r1_addcarryx_u64(&x293, &x294, x292, x254, x280);
+ fiat_secp384r1_addcarryx_u64(&x295, &x296, x294, x256, x282);
+ fiat_secp384r1_addcarryx_u64(&x297, &x298, x296, x258, x284);
+ x299 = ((uint64_t)x298 + x259);
+ fiat_secp384r1_mulx_u64(&x300, &x301, x4, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x302, &x303, x4, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x304, &x305, x4, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x306, &x307, x4, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x308, &x309, x4, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x310, &x311, x4, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x312, &x313, 0x0, x311, x308);
+ fiat_secp384r1_addcarryx_u64(&x314, &x315, x313, x309, x306);
+ fiat_secp384r1_addcarryx_u64(&x316, &x317, x315, x307, x304);
+ fiat_secp384r1_addcarryx_u64(&x318, &x319, x317, x305, x302);
+ fiat_secp384r1_addcarryx_u64(&x320, &x321, x319, x303, x300);
+ x322 = (x321 + x301);
+ fiat_secp384r1_addcarryx_u64(&x323, &x324, 0x0, x287, x310);
+ fiat_secp384r1_addcarryx_u64(&x325, &x326, x324, x289, x312);
+ fiat_secp384r1_addcarryx_u64(&x327, &x328, x326, x291, x314);
+ fiat_secp384r1_addcarryx_u64(&x329, &x330, x328, x293, x316);
+ fiat_secp384r1_addcarryx_u64(&x331, &x332, x330, x295, x318);
+ fiat_secp384r1_addcarryx_u64(&x333, &x334, x332, x297, x320);
+ fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x299, x322);
+ fiat_secp384r1_mulx_u64(&x337, &x338, x323, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x339, &x340, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x341, &x342, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x343, &x344, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x345, &x346, x337, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x347, &x348, x337, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x349, &x350, x337, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x351, &x352, 0x0, x350, x347);
+ fiat_secp384r1_addcarryx_u64(&x353, &x354, x352, x348, x345);
+ fiat_secp384r1_addcarryx_u64(&x355, &x356, x354, x346, x343);
+ fiat_secp384r1_addcarryx_u64(&x357, &x358, x356, x344, x341);
+ fiat_secp384r1_addcarryx_u64(&x359, &x360, x358, x342, x339);
+ x361 = (x360 + x340);
+ fiat_secp384r1_addcarryx_u64(&x362, &x363, 0x0, x323, x349);
+ fiat_secp384r1_addcarryx_u64(&x364, &x365, x363, x325, x351);
+ fiat_secp384r1_addcarryx_u64(&x366, &x367, x365, x327, x353);
+ fiat_secp384r1_addcarryx_u64(&x368, &x369, x367, x329, x355);
+ fiat_secp384r1_addcarryx_u64(&x370, &x371, x369, x331, x357);
+ fiat_secp384r1_addcarryx_u64(&x372, &x373, x371, x333, x359);
+ fiat_secp384r1_addcarryx_u64(&x374, &x375, x373, x335, x361);
+ x376 = ((uint64_t)x375 + x336);
+ fiat_secp384r1_mulx_u64(&x377, &x378, x5, (arg2[5]));
+ fiat_secp384r1_mulx_u64(&x379, &x380, x5, (arg2[4]));
+ fiat_secp384r1_mulx_u64(&x381, &x382, x5, (arg2[3]));
+ fiat_secp384r1_mulx_u64(&x383, &x384, x5, (arg2[2]));
+ fiat_secp384r1_mulx_u64(&x385, &x386, x5, (arg2[1]));
+ fiat_secp384r1_mulx_u64(&x387, &x388, x5, (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x389, &x390, 0x0, x388, x385);
+ fiat_secp384r1_addcarryx_u64(&x391, &x392, x390, x386, x383);
+ fiat_secp384r1_addcarryx_u64(&x393, &x394, x392, x384, x381);
+ fiat_secp384r1_addcarryx_u64(&x395, &x396, x394, x382, x379);
+ fiat_secp384r1_addcarryx_u64(&x397, &x398, x396, x380, x377);
+ x399 = (x398 + x378);
+ fiat_secp384r1_addcarryx_u64(&x400, &x401, 0x0, x364, x387);
+ fiat_secp384r1_addcarryx_u64(&x402, &x403, x401, x366, x389);
+ fiat_secp384r1_addcarryx_u64(&x404, &x405, x403, x368, x391);
+ fiat_secp384r1_addcarryx_u64(&x406, &x407, x405, x370, x393);
+ fiat_secp384r1_addcarryx_u64(&x408, &x409, x407, x372, x395);
+ fiat_secp384r1_addcarryx_u64(&x410, &x411, x409, x374, x397);
+ fiat_secp384r1_addcarryx_u64(&x412, &x413, x411, x376, x399);
+ fiat_secp384r1_mulx_u64(&x414, &x415, x400, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x416, &x417, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x418, &x419, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x420, &x421, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x422, &x423, x414, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x424, &x425, x414, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x426, &x427, x414, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x428, &x429, 0x0, x427, x424);
+ fiat_secp384r1_addcarryx_u64(&x430, &x431, x429, x425, x422);
+ fiat_secp384r1_addcarryx_u64(&x432, &x433, x431, x423, x420);
+ fiat_secp384r1_addcarryx_u64(&x434, &x435, x433, x421, x418);
+ fiat_secp384r1_addcarryx_u64(&x436, &x437, x435, x419, x416);
+ x438 = (x437 + x417);
+ fiat_secp384r1_addcarryx_u64(&x439, &x440, 0x0, x400, x426);
+ fiat_secp384r1_addcarryx_u64(&x441, &x442, x440, x402, x428);
+ fiat_secp384r1_addcarryx_u64(&x443, &x444, x442, x404, x430);
+ fiat_secp384r1_addcarryx_u64(&x445, &x446, x444, x406, x432);
+ fiat_secp384r1_addcarryx_u64(&x447, &x448, x446, x408, x434);
+ fiat_secp384r1_addcarryx_u64(&x449, &x450, x448, x410, x436);
+ fiat_secp384r1_addcarryx_u64(&x451, &x452, x450, x412, x438);
+ x453 = ((uint64_t)x452 + x413);
+ fiat_secp384r1_subborrowx_u64(&x454, &x455, 0x0, x441,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x456, &x457, x455, x443,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x458, &x459, x457, x445,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x460, &x461, x459, x447,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x462, &x463, x461, x449,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x464, &x465, x463, x451,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x466, &x467, x465, x453, 0x0);
+ fiat_secp384r1_cmovznz_u64(&x468, x467, x454, x441);
+ fiat_secp384r1_cmovznz_u64(&x469, x467, x456, x443);
+ fiat_secp384r1_cmovznz_u64(&x470, x467, x458, x445);
+ fiat_secp384r1_cmovznz_u64(&x471, x467, x460, x447);
+ fiat_secp384r1_cmovznz_u64(&x472, x467, x462, x449);
+ fiat_secp384r1_cmovznz_u64(&x473, x467, x464, x451);
+ out1[0] = x468;
+ out1[1] = x469;
+ out1[2] = x470;
+ out1[3] = x471;
+ out1[4] = x472;
+ out1[5] = x473;
+}
+
+/*
+ * The function fiat_secp384r1_square squares a field element in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_square(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint64_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint64_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint64_t x25;
+ fiat_secp384r1_uint1 x26;
+ uint64_t x27;
+ fiat_secp384r1_uint1 x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint64_t x46;
+ fiat_secp384r1_uint1 x47;
+ uint64_t x48;
+ fiat_secp384r1_uint1 x49;
+ uint64_t x50;
+ fiat_secp384r1_uint1 x51;
+ uint64_t x52;
+ fiat_secp384r1_uint1 x53;
+ uint64_t x54;
+ uint64_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint64_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint64_t x59;
+ fiat_secp384r1_uint1 x60;
+ uint64_t x61;
+ fiat_secp384r1_uint1 x62;
+ uint64_t x63;
+ fiat_secp384r1_uint1 x64;
+ uint64_t x65;
+ fiat_secp384r1_uint1 x66;
+ uint64_t x67;
+ fiat_secp384r1_uint1 x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint64_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint64_t x83;
+ fiat_secp384r1_uint1 x84;
+ uint64_t x85;
+ fiat_secp384r1_uint1 x86;
+ uint64_t x87;
+ fiat_secp384r1_uint1 x88;
+ uint64_t x89;
+ fiat_secp384r1_uint1 x90;
+ uint64_t x91;
+ uint64_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint64_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint64_t x96;
+ fiat_secp384r1_uint1 x97;
+ uint64_t x98;
+ fiat_secp384r1_uint1 x99;
+ uint64_t x100;
+ fiat_secp384r1_uint1 x101;
+ uint64_t x102;
+ fiat_secp384r1_uint1 x103;
+ uint64_t x104;
+ fiat_secp384r1_uint1 x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ uint64_t x117;
+ uint64_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ fiat_secp384r1_uint1 x121;
+ uint64_t x122;
+ fiat_secp384r1_uint1 x123;
+ uint64_t x124;
+ fiat_secp384r1_uint1 x125;
+ uint64_t x126;
+ fiat_secp384r1_uint1 x127;
+ uint64_t x128;
+ fiat_secp384r1_uint1 x129;
+ uint64_t x130;
+ uint64_t x131;
+ fiat_secp384r1_uint1 x132;
+ uint64_t x133;
+ fiat_secp384r1_uint1 x134;
+ uint64_t x135;
+ fiat_secp384r1_uint1 x136;
+ uint64_t x137;
+ fiat_secp384r1_uint1 x138;
+ uint64_t x139;
+ fiat_secp384r1_uint1 x140;
+ uint64_t x141;
+ fiat_secp384r1_uint1 x142;
+ uint64_t x143;
+ fiat_secp384r1_uint1 x144;
+ uint64_t x145;
+ uint64_t x146;
+ uint64_t x147;
+ uint64_t x148;
+ uint64_t x149;
+ uint64_t x150;
+ uint64_t x151;
+ uint64_t x152;
+ uint64_t x153;
+ uint64_t x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ fiat_secp384r1_uint1 x159;
+ uint64_t x160;
+ fiat_secp384r1_uint1 x161;
+ uint64_t x162;
+ fiat_secp384r1_uint1 x163;
+ uint64_t x164;
+ fiat_secp384r1_uint1 x165;
+ uint64_t x166;
+ fiat_secp384r1_uint1 x167;
+ uint64_t x168;
+ uint64_t x169;
+ fiat_secp384r1_uint1 x170;
+ uint64_t x171;
+ fiat_secp384r1_uint1 x172;
+ uint64_t x173;
+ fiat_secp384r1_uint1 x174;
+ uint64_t x175;
+ fiat_secp384r1_uint1 x176;
+ uint64_t x177;
+ fiat_secp384r1_uint1 x178;
+ uint64_t x179;
+ fiat_secp384r1_uint1 x180;
+ uint64_t x181;
+ fiat_secp384r1_uint1 x182;
+ uint64_t x183;
+ uint64_t x184;
+ uint64_t x185;
+ uint64_t x186;
+ uint64_t x187;
+ uint64_t x188;
+ uint64_t x189;
+ uint64_t x190;
+ uint64_t x191;
+ uint64_t x192;
+ uint64_t x193;
+ uint64_t x194;
+ uint64_t x195;
+ uint64_t x196;
+ uint64_t x197;
+ fiat_secp384r1_uint1 x198;
+ uint64_t x199;
+ fiat_secp384r1_uint1 x200;
+ uint64_t x201;
+ fiat_secp384r1_uint1 x202;
+ uint64_t x203;
+ fiat_secp384r1_uint1 x204;
+ uint64_t x205;
+ fiat_secp384r1_uint1 x206;
+ uint64_t x207;
+ uint64_t x208;
+ fiat_secp384r1_uint1 x209;
+ uint64_t x210;
+ fiat_secp384r1_uint1 x211;
+ uint64_t x212;
+ fiat_secp384r1_uint1 x213;
+ uint64_t x214;
+ fiat_secp384r1_uint1 x215;
+ uint64_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint64_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint64_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint64_t x222;
+ uint64_t x223;
+ uint64_t x224;
+ uint64_t x225;
+ uint64_t x226;
+ uint64_t x227;
+ uint64_t x228;
+ uint64_t x229;
+ uint64_t x230;
+ uint64_t x231;
+ uint64_t x232;
+ uint64_t x233;
+ uint64_t x234;
+ uint64_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint64_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint64_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint64_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint64_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint64_t x245;
+ uint64_t x246;
+ fiat_secp384r1_uint1 x247;
+ uint64_t x248;
+ fiat_secp384r1_uint1 x249;
+ uint64_t x250;
+ fiat_secp384r1_uint1 x251;
+ uint64_t x252;
+ fiat_secp384r1_uint1 x253;
+ uint64_t x254;
+ fiat_secp384r1_uint1 x255;
+ uint64_t x256;
+ fiat_secp384r1_uint1 x257;
+ uint64_t x258;
+ fiat_secp384r1_uint1 x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint64_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint64_t x265;
+ uint64_t x266;
+ uint64_t x267;
+ uint64_t x268;
+ uint64_t x269;
+ uint64_t x270;
+ uint64_t x271;
+ uint64_t x272;
+ uint64_t x273;
+ uint64_t x274;
+ fiat_secp384r1_uint1 x275;
+ uint64_t x276;
+ fiat_secp384r1_uint1 x277;
+ uint64_t x278;
+ fiat_secp384r1_uint1 x279;
+ uint64_t x280;
+ fiat_secp384r1_uint1 x281;
+ uint64_t x282;
+ fiat_secp384r1_uint1 x283;
+ uint64_t x284;
+ uint64_t x285;
+ fiat_secp384r1_uint1 x286;
+ uint64_t x287;
+ fiat_secp384r1_uint1 x288;
+ uint64_t x289;
+ fiat_secp384r1_uint1 x290;
+ uint64_t x291;
+ fiat_secp384r1_uint1 x292;
+ uint64_t x293;
+ fiat_secp384r1_uint1 x294;
+ uint64_t x295;
+ fiat_secp384r1_uint1 x296;
+ uint64_t x297;
+ fiat_secp384r1_uint1 x298;
+ uint64_t x299;
+ uint64_t x300;
+ uint64_t x301;
+ uint64_t x302;
+ uint64_t x303;
+ uint64_t x304;
+ uint64_t x305;
+ uint64_t x306;
+ uint64_t x307;
+ uint64_t x308;
+ uint64_t x309;
+ uint64_t x310;
+ uint64_t x311;
+ uint64_t x312;
+ fiat_secp384r1_uint1 x313;
+ uint64_t x314;
+ fiat_secp384r1_uint1 x315;
+ uint64_t x316;
+ fiat_secp384r1_uint1 x317;
+ uint64_t x318;
+ fiat_secp384r1_uint1 x319;
+ uint64_t x320;
+ fiat_secp384r1_uint1 x321;
+ uint64_t x322;
+ uint64_t x323;
+ fiat_secp384r1_uint1 x324;
+ uint64_t x325;
+ fiat_secp384r1_uint1 x326;
+ uint64_t x327;
+ fiat_secp384r1_uint1 x328;
+ uint64_t x329;
+ fiat_secp384r1_uint1 x330;
+ uint64_t x331;
+ fiat_secp384r1_uint1 x332;
+ uint64_t x333;
+ fiat_secp384r1_uint1 x334;
+ uint64_t x335;
+ fiat_secp384r1_uint1 x336;
+ uint64_t x337;
+ uint64_t x338;
+ uint64_t x339;
+ uint64_t x340;
+ uint64_t x341;
+ uint64_t x342;
+ uint64_t x343;
+ uint64_t x344;
+ uint64_t x345;
+ uint64_t x346;
+ uint64_t x347;
+ uint64_t x348;
+ uint64_t x349;
+ uint64_t x350;
+ uint64_t x351;
+ fiat_secp384r1_uint1 x352;
+ uint64_t x353;
+ fiat_secp384r1_uint1 x354;
+ uint64_t x355;
+ fiat_secp384r1_uint1 x356;
+ uint64_t x357;
+ fiat_secp384r1_uint1 x358;
+ uint64_t x359;
+ fiat_secp384r1_uint1 x360;
+ uint64_t x361;
+ uint64_t x362;
+ fiat_secp384r1_uint1 x363;
+ uint64_t x364;
+ fiat_secp384r1_uint1 x365;
+ uint64_t x366;
+ fiat_secp384r1_uint1 x367;
+ uint64_t x368;
+ fiat_secp384r1_uint1 x369;
+ uint64_t x370;
+ fiat_secp384r1_uint1 x371;
+ uint64_t x372;
+ fiat_secp384r1_uint1 x373;
+ uint64_t x374;
+ fiat_secp384r1_uint1 x375;
+ uint64_t x376;
+ uint64_t x377;
+ uint64_t x378;
+ uint64_t x379;
+ uint64_t x380;
+ uint64_t x381;
+ uint64_t x382;
+ uint64_t x383;
+ uint64_t x384;
+ uint64_t x385;
+ uint64_t x386;
+ uint64_t x387;
+ uint64_t x388;
+ uint64_t x389;
+ fiat_secp384r1_uint1 x390;
+ uint64_t x391;
+ fiat_secp384r1_uint1 x392;
+ uint64_t x393;
+ fiat_secp384r1_uint1 x394;
+ uint64_t x395;
+ fiat_secp384r1_uint1 x396;
+ uint64_t x397;
+ fiat_secp384r1_uint1 x398;
+ uint64_t x399;
+ uint64_t x400;
+ fiat_secp384r1_uint1 x401;
+ uint64_t x402;
+ fiat_secp384r1_uint1 x403;
+ uint64_t x404;
+ fiat_secp384r1_uint1 x405;
+ uint64_t x406;
+ fiat_secp384r1_uint1 x407;
+ uint64_t x408;
+ fiat_secp384r1_uint1 x409;
+ uint64_t x410;
+ fiat_secp384r1_uint1 x411;
+ uint64_t x412;
+ fiat_secp384r1_uint1 x413;
+ uint64_t x414;
+ uint64_t x415;
+ uint64_t x416;
+ uint64_t x417;
+ uint64_t x418;
+ uint64_t x419;
+ uint64_t x420;
+ uint64_t x421;
+ uint64_t x422;
+ uint64_t x423;
+ uint64_t x424;
+ uint64_t x425;
+ uint64_t x426;
+ uint64_t x427;
+ uint64_t x428;
+ fiat_secp384r1_uint1 x429;
+ uint64_t x430;
+ fiat_secp384r1_uint1 x431;
+ uint64_t x432;
+ fiat_secp384r1_uint1 x433;
+ uint64_t x434;
+ fiat_secp384r1_uint1 x435;
+ uint64_t x436;
+ fiat_secp384r1_uint1 x437;
+ uint64_t x438;
+ uint64_t x439;
+ fiat_secp384r1_uint1 x440;
+ uint64_t x441;
+ fiat_secp384r1_uint1 x442;
+ uint64_t x443;
+ fiat_secp384r1_uint1 x444;
+ uint64_t x445;
+ fiat_secp384r1_uint1 x446;
+ uint64_t x447;
+ fiat_secp384r1_uint1 x448;
+ uint64_t x449;
+ fiat_secp384r1_uint1 x450;
+ uint64_t x451;
+ fiat_secp384r1_uint1 x452;
+ uint64_t x453;
+ uint64_t x454;
+ fiat_secp384r1_uint1 x455;
+ uint64_t x456;
+ fiat_secp384r1_uint1 x457;
+ uint64_t x458;
+ fiat_secp384r1_uint1 x459;
+ uint64_t x460;
+ fiat_secp384r1_uint1 x461;
+ uint64_t x462;
+ fiat_secp384r1_uint1 x463;
+ uint64_t x464;
+ fiat_secp384r1_uint1 x465;
+ uint64_t x466;
+ fiat_secp384r1_uint1 x467;
+ uint64_t x468;
+ uint64_t x469;
+ uint64_t x470;
+ uint64_t x471;
+ uint64_t x472;
+ uint64_t x473;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[0]);
+ fiat_secp384r1_mulx_u64(&x7, &x8, x6, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x9, &x10, x6, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x11, &x12, x6, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x13, &x14, x6, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x15, &x16, x6, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x17, &x18, x6, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x19, &x20, 0x0, x18, x15);
+ fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x16, x13);
+ fiat_secp384r1_addcarryx_u64(&x23, &x24, x22, x14, x11);
+ fiat_secp384r1_addcarryx_u64(&x25, &x26, x24, x12, x9);
+ fiat_secp384r1_addcarryx_u64(&x27, &x28, x26, x10, x7);
+ x29 = (x28 + x8);
+ fiat_secp384r1_mulx_u64(&x30, &x31, x17, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x32, &x33, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x34, &x35, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x36, &x37, x30, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x38, &x39, x30, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x40, &x41, x30, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x42, &x43, x30, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
+ fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x41, x38);
+ fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x39, x36);
+ fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x37, x34);
+ fiat_secp384r1_addcarryx_u64(&x52, &x53, x51, x35, x32);
+ x54 = (x53 + x33);
+ fiat_secp384r1_addcarryx_u64(&x55, &x56, 0x0, x17, x42);
+ fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x19, x44);
+ fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, x21, x46);
+ fiat_secp384r1_addcarryx_u64(&x61, &x62, x60, x23, x48);
+ fiat_secp384r1_addcarryx_u64(&x63, &x64, x62, x25, x50);
+ fiat_secp384r1_addcarryx_u64(&x65, &x66, x64, x27, x52);
+ fiat_secp384r1_addcarryx_u64(&x67, &x68, x66, x29, x54);
+ fiat_secp384r1_mulx_u64(&x69, &x70, x1, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x71, &x72, x1, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x73, &x74, x1, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x75, &x76, x1, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x77, &x78, x1, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x79, &x80, x1, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x81, &x82, 0x0, x80, x77);
+ fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x78, x75);
+ fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x76, x73);
+ fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x74, x71);
+ fiat_secp384r1_addcarryx_u64(&x89, &x90, x88, x72, x69);
+ x91 = (x90 + x70);
+ fiat_secp384r1_addcarryx_u64(&x92, &x93, 0x0, x57, x79);
+ fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x59, x81);
+ fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x61, x83);
+ fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x63, x85);
+ fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x65, x87);
+ fiat_secp384r1_addcarryx_u64(&x102, &x103, x101, x67, x89);
+ fiat_secp384r1_addcarryx_u64(&x104, &x105, x103, x68, x91);
+ fiat_secp384r1_mulx_u64(&x106, &x107, x92, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x108, &x109, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x110, &x111, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x112, &x113, x106, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x114, &x115, x106, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x116, &x117, x106, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x118, &x119, x106, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x120, &x121, 0x0, x119, x116);
+ fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x117, x114);
+ fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x115, x112);
+ fiat_secp384r1_addcarryx_u64(&x126, &x127, x125, x113, x110);
+ fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x111, x108);
+ x130 = (x129 + x109);
+ fiat_secp384r1_addcarryx_u64(&x131, &x132, 0x0, x92, x118);
+ fiat_secp384r1_addcarryx_u64(&x133, &x134, x132, x94, x120);
+ fiat_secp384r1_addcarryx_u64(&x135, &x136, x134, x96, x122);
+ fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x98, x124);
+ fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x100, x126);
+ fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x102, x128);
+ fiat_secp384r1_addcarryx_u64(&x143, &x144, x142, x104, x130);
+ x145 = ((uint64_t)x144 + x105);
+ fiat_secp384r1_mulx_u64(&x146, &x147, x2, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x148, &x149, x2, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x150, &x151, x2, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x152, &x153, x2, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x154, &x155, x2, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x156, &x157, x2, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x157, x154);
+ fiat_secp384r1_addcarryx_u64(&x160, &x161, x159, x155, x152);
+ fiat_secp384r1_addcarryx_u64(&x162, &x163, x161, x153, x150);
+ fiat_secp384r1_addcarryx_u64(&x164, &x165, x163, x151, x148);
+ fiat_secp384r1_addcarryx_u64(&x166, &x167, x165, x149, x146);
+ x168 = (x167 + x147);
+ fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x133, x156);
+ fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x135, x158);
+ fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x137, x160);
+ fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x139, x162);
+ fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x141, x164);
+ fiat_secp384r1_addcarryx_u64(&x179, &x180, x178, x143, x166);
+ fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x168);
+ fiat_secp384r1_mulx_u64(&x183, &x184, x169, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x185, &x186, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x187, &x188, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x189, &x190, x183, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x191, &x192, x183, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x193, &x194, x183, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x195, &x196, x183, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x197, &x198, 0x0, x196, x193);
+ fiat_secp384r1_addcarryx_u64(&x199, &x200, x198, x194, x191);
+ fiat_secp384r1_addcarryx_u64(&x201, &x202, x200, x192, x189);
+ fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x190, x187);
+ fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x188, x185);
+ x207 = (x206 + x186);
+ fiat_secp384r1_addcarryx_u64(&x208, &x209, 0x0, x169, x195);
+ fiat_secp384r1_addcarryx_u64(&x210, &x211, x209, x171, x197);
+ fiat_secp384r1_addcarryx_u64(&x212, &x213, x211, x173, x199);
+ fiat_secp384r1_addcarryx_u64(&x214, &x215, x213, x175, x201);
+ fiat_secp384r1_addcarryx_u64(&x216, &x217, x215, x177, x203);
+ fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x179, x205);
+ fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x181, x207);
+ x222 = ((uint64_t)x221 + x182);
+ fiat_secp384r1_mulx_u64(&x223, &x224, x3, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x225, &x226, x3, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x227, &x228, x3, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x229, &x230, x3, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x231, &x232, x3, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x233, &x234, x3, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231);
+ fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229);
+ fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227);
+ fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225);
+ fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223);
+ x245 = (x244 + x224);
+ fiat_secp384r1_addcarryx_u64(&x246, &x247, 0x0, x210, x233);
+ fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x212, x235);
+ fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x214, x237);
+ fiat_secp384r1_addcarryx_u64(&x252, &x253, x251, x216, x239);
+ fiat_secp384r1_addcarryx_u64(&x254, &x255, x253, x218, x241);
+ fiat_secp384r1_addcarryx_u64(&x256, &x257, x255, x220, x243);
+ fiat_secp384r1_addcarryx_u64(&x258, &x259, x257, x222, x245);
+ fiat_secp384r1_mulx_u64(&x260, &x261, x246, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x262, &x263, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x264, &x265, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x266, &x267, x260, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x268, &x269, x260, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x270, &x271, x260, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x272, &x273, x260, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x274, &x275, 0x0, x273, x270);
+ fiat_secp384r1_addcarryx_u64(&x276, &x277, x275, x271, x268);
+ fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x269, x266);
+ fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x267, x264);
+ fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x265, x262);
+ x284 = (x283 + x263);
+ fiat_secp384r1_addcarryx_u64(&x285, &x286, 0x0, x246, x272);
+ fiat_secp384r1_addcarryx_u64(&x287, &x288, x286, x248, x274);
+ fiat_secp384r1_addcarryx_u64(&x289, &x290, x288, x250, x276);
+ fiat_secp384r1_addcarryx_u64(&x291, &x292, x290, x252, x278);
+ fiat_secp384r1_addcarryx_u64(&x293, &x294, x292, x254, x280);
+ fiat_secp384r1_addcarryx_u64(&x295, &x296, x294, x256, x282);
+ fiat_secp384r1_addcarryx_u64(&x297, &x298, x296, x258, x284);
+ x299 = ((uint64_t)x298 + x259);
+ fiat_secp384r1_mulx_u64(&x300, &x301, x4, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x302, &x303, x4, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x304, &x305, x4, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x306, &x307, x4, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x308, &x309, x4, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x310, &x311, x4, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x312, &x313, 0x0, x311, x308);
+ fiat_secp384r1_addcarryx_u64(&x314, &x315, x313, x309, x306);
+ fiat_secp384r1_addcarryx_u64(&x316, &x317, x315, x307, x304);
+ fiat_secp384r1_addcarryx_u64(&x318, &x319, x317, x305, x302);
+ fiat_secp384r1_addcarryx_u64(&x320, &x321, x319, x303, x300);
+ x322 = (x321 + x301);
+ fiat_secp384r1_addcarryx_u64(&x323, &x324, 0x0, x287, x310);
+ fiat_secp384r1_addcarryx_u64(&x325, &x326, x324, x289, x312);
+ fiat_secp384r1_addcarryx_u64(&x327, &x328, x326, x291, x314);
+ fiat_secp384r1_addcarryx_u64(&x329, &x330, x328, x293, x316);
+ fiat_secp384r1_addcarryx_u64(&x331, &x332, x330, x295, x318);
+ fiat_secp384r1_addcarryx_u64(&x333, &x334, x332, x297, x320);
+ fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x299, x322);
+ fiat_secp384r1_mulx_u64(&x337, &x338, x323, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x339, &x340, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x341, &x342, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x343, &x344, x337, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x345, &x346, x337, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x347, &x348, x337, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x349, &x350, x337, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x351, &x352, 0x0, x350, x347);
+ fiat_secp384r1_addcarryx_u64(&x353, &x354, x352, x348, x345);
+ fiat_secp384r1_addcarryx_u64(&x355, &x356, x354, x346, x343);
+ fiat_secp384r1_addcarryx_u64(&x357, &x358, x356, x344, x341);
+ fiat_secp384r1_addcarryx_u64(&x359, &x360, x358, x342, x339);
+ x361 = (x360 + x340);
+ fiat_secp384r1_addcarryx_u64(&x362, &x363, 0x0, x323, x349);
+ fiat_secp384r1_addcarryx_u64(&x364, &x365, x363, x325, x351);
+ fiat_secp384r1_addcarryx_u64(&x366, &x367, x365, x327, x353);
+ fiat_secp384r1_addcarryx_u64(&x368, &x369, x367, x329, x355);
+ fiat_secp384r1_addcarryx_u64(&x370, &x371, x369, x331, x357);
+ fiat_secp384r1_addcarryx_u64(&x372, &x373, x371, x333, x359);
+ fiat_secp384r1_addcarryx_u64(&x374, &x375, x373, x335, x361);
+ x376 = ((uint64_t)x375 + x336);
+ fiat_secp384r1_mulx_u64(&x377, &x378, x5, (arg1[5]));
+ fiat_secp384r1_mulx_u64(&x379, &x380, x5, (arg1[4]));
+ fiat_secp384r1_mulx_u64(&x381, &x382, x5, (arg1[3]));
+ fiat_secp384r1_mulx_u64(&x383, &x384, x5, (arg1[2]));
+ fiat_secp384r1_mulx_u64(&x385, &x386, x5, (arg1[1]));
+ fiat_secp384r1_mulx_u64(&x387, &x388, x5, (arg1[0]));
+ fiat_secp384r1_addcarryx_u64(&x389, &x390, 0x0, x388, x385);
+ fiat_secp384r1_addcarryx_u64(&x391, &x392, x390, x386, x383);
+ fiat_secp384r1_addcarryx_u64(&x393, &x394, x392, x384, x381);
+ fiat_secp384r1_addcarryx_u64(&x395, &x396, x394, x382, x379);
+ fiat_secp384r1_addcarryx_u64(&x397, &x398, x396, x380, x377);
+ x399 = (x398 + x378);
+ fiat_secp384r1_addcarryx_u64(&x400, &x401, 0x0, x364, x387);
+ fiat_secp384r1_addcarryx_u64(&x402, &x403, x401, x366, x389);
+ fiat_secp384r1_addcarryx_u64(&x404, &x405, x403, x368, x391);
+ fiat_secp384r1_addcarryx_u64(&x406, &x407, x405, x370, x393);
+ fiat_secp384r1_addcarryx_u64(&x408, &x409, x407, x372, x395);
+ fiat_secp384r1_addcarryx_u64(&x410, &x411, x409, x374, x397);
+ fiat_secp384r1_addcarryx_u64(&x412, &x413, x411, x376, x399);
+ fiat_secp384r1_mulx_u64(&x414, &x415, x400, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x416, &x417, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x418, &x419, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x420, &x421, x414, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x422, &x423, x414, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x424, &x425, x414, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x426, &x427, x414, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x428, &x429, 0x0, x427, x424);
+ fiat_secp384r1_addcarryx_u64(&x430, &x431, x429, x425, x422);
+ fiat_secp384r1_addcarryx_u64(&x432, &x433, x431, x423, x420);
+ fiat_secp384r1_addcarryx_u64(&x434, &x435, x433, x421, x418);
+ fiat_secp384r1_addcarryx_u64(&x436, &x437, x435, x419, x416);
+ x438 = (x437 + x417);
+ fiat_secp384r1_addcarryx_u64(&x439, &x440, 0x0, x400, x426);
+ fiat_secp384r1_addcarryx_u64(&x441, &x442, x440, x402, x428);
+ fiat_secp384r1_addcarryx_u64(&x443, &x444, x442, x404, x430);
+ fiat_secp384r1_addcarryx_u64(&x445, &x446, x444, x406, x432);
+ fiat_secp384r1_addcarryx_u64(&x447, &x448, x446, x408, x434);
+ fiat_secp384r1_addcarryx_u64(&x449, &x450, x448, x410, x436);
+ fiat_secp384r1_addcarryx_u64(&x451, &x452, x450, x412, x438);
+ x453 = ((uint64_t)x452 + x413);
+ fiat_secp384r1_subborrowx_u64(&x454, &x455, 0x0, x441,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x456, &x457, x455, x443,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x458, &x459, x457, x445,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x460, &x461, x459, x447,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x462, &x463, x461, x449,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x464, &x465, x463, x451,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x466, &x467, x465, x453, 0x0);
+ fiat_secp384r1_cmovznz_u64(&x468, x467, x454, x441);
+ fiat_secp384r1_cmovznz_u64(&x469, x467, x456, x443);
+ fiat_secp384r1_cmovznz_u64(&x470, x467, x458, x445);
+ fiat_secp384r1_cmovznz_u64(&x471, x467, x460, x447);
+ fiat_secp384r1_cmovznz_u64(&x472, x467, x462, x449);
+ fiat_secp384r1_cmovznz_u64(&x473, x467, x464, x451);
+ out1[0] = x468;
+ out1[1] = x469;
+ out1[2] = x470;
+ out1[3] = x471;
+ out1[4] = x472;
+ out1[5] = x473;
+}
+
+/*
+ * The function fiat_secp384r1_add adds two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_add(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint64_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint64_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint64_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint64_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint64_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint64_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint64_t x13;
+ fiat_secp384r1_uint1 x14;
+ uint64_t x15;
+ fiat_secp384r1_uint1 x16;
+ uint64_t x17;
+ fiat_secp384r1_uint1 x18;
+ uint64_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint64_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint64_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint64_t x25;
+ fiat_secp384r1_uint1 x26;
+ uint64_t x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ fiat_secp384r1_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+ fiat_secp384r1_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+ fiat_secp384r1_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+ fiat_secp384r1_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+ fiat_secp384r1_addcarryx_u64(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+ fiat_secp384r1_addcarryx_u64(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+ fiat_secp384r1_subborrowx_u64(&x13, &x14, 0x0, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x15, &x16, x14, x3,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x17, &x18, x16, x5,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x19, &x20, x18, x7,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x21, &x22, x20, x9,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x23, &x24, x22, x11,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x25, &x26, x24, x12, 0x0);
+ fiat_secp384r1_cmovznz_u64(&x27, x26, x13, x1);
+ fiat_secp384r1_cmovznz_u64(&x28, x26, x15, x3);
+ fiat_secp384r1_cmovznz_u64(&x29, x26, x17, x5);
+ fiat_secp384r1_cmovznz_u64(&x30, x26, x19, x7);
+ fiat_secp384r1_cmovznz_u64(&x31, x26, x21, x9);
+ fiat_secp384r1_cmovznz_u64(&x32, x26, x23, x11);
+ out1[0] = x27;
+ out1[1] = x28;
+ out1[2] = x29;
+ out1[3] = x30;
+ out1[4] = x31;
+ out1[5] = x32;
+}
+
+/*
+ * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_sub(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint64_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint64_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint64_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint64_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint64_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint64_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint64_t x13;
+ uint64_t x14;
+ fiat_secp384r1_uint1 x15;
+ uint64_t x16;
+ fiat_secp384r1_uint1 x17;
+ uint64_t x18;
+ fiat_secp384r1_uint1 x19;
+ uint64_t x20;
+ fiat_secp384r1_uint1 x21;
+ uint64_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint64_t x24;
+ fiat_secp384r1_uint1 x25;
+ fiat_secp384r1_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+ fiat_secp384r1_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+ fiat_secp384r1_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+ fiat_secp384r1_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+ fiat_secp384r1_subborrowx_u64(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+ fiat_secp384r1_subborrowx_u64(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+ fiat_secp384r1_cmovznz_u64(&x13, x12, 0x0, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, x1,
+ (x13 & UINT32_C(0xffffffff)));
+ fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, x3,
+ (x13 & UINT64_C(0xffffffff00000000)));
+ fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x5,
+ (x13 & UINT64_C(0xfffffffffffffffe)));
+ fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x7, x13);
+ fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x13);
+ fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x11, x13);
+ out1[0] = x14;
+ out1[1] = x16;
+ out1[2] = x18;
+ out1[3] = x20;
+ out1[4] = x22;
+ out1[5] = x24;
+}
+
+/*
+ * The function fiat_secp384r1_opp negates a field element in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_opp(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint64_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint64_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint64_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint64_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint64_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint64_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint64_t x13;
+ uint64_t x14;
+ fiat_secp384r1_uint1 x15;
+ uint64_t x16;
+ fiat_secp384r1_uint1 x17;
+ uint64_t x18;
+ fiat_secp384r1_uint1 x19;
+ uint64_t x20;
+ fiat_secp384r1_uint1 x21;
+ uint64_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint64_t x24;
+ fiat_secp384r1_uint1 x25;
+ fiat_secp384r1_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
+ fiat_secp384r1_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
+ fiat_secp384r1_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
+ fiat_secp384r1_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
+ fiat_secp384r1_subborrowx_u64(&x9, &x10, x8, 0x0, (arg1[4]));
+ fiat_secp384r1_subborrowx_u64(&x11, &x12, x10, 0x0, (arg1[5]));
+ fiat_secp384r1_cmovznz_u64(&x13, x12, 0x0, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, x1,
+ (x13 & UINT32_C(0xffffffff)));
+ fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, x3,
+ (x13 & UINT64_C(0xffffffff00000000)));
+ fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x5,
+ (x13 & UINT64_C(0xfffffffffffffffe)));
+ fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x7, x13);
+ fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x13);
+ fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x11, x13);
+ out1[0] = x14;
+ out1[1] = x16;
+ out1[2] = x18;
+ out1[3] = x20;
+ out1[4] = x22;
+ out1[5] = x24;
+}
+
+/*
+ * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^6) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_from_montgomery(
+ fiat_secp384r1_non_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ fiat_secp384r1_uint1 x17;
+ uint64_t x18;
+ fiat_secp384r1_uint1 x19;
+ uint64_t x20;
+ fiat_secp384r1_uint1 x21;
+ uint64_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint64_t x24;
+ fiat_secp384r1_uint1 x25;
+ uint64_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint64_t x28;
+ fiat_secp384r1_uint1 x29;
+ uint64_t x30;
+ fiat_secp384r1_uint1 x31;
+ uint64_t x32;
+ fiat_secp384r1_uint1 x33;
+ uint64_t x34;
+ fiat_secp384r1_uint1 x35;
+ uint64_t x36;
+ fiat_secp384r1_uint1 x37;
+ uint64_t x38;
+ fiat_secp384r1_uint1 x39;
+ uint64_t x40;
+ fiat_secp384r1_uint1 x41;
+ uint64_t x42;
+ fiat_secp384r1_uint1 x43;
+ uint64_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint64_t x46;
+ fiat_secp384r1_uint1 x47;
+ uint64_t x48;
+ fiat_secp384r1_uint1 x49;
+ uint64_t x50;
+ fiat_secp384r1_uint1 x51;
+ uint64_t x52;
+ uint64_t x53;
+ uint64_t x54;
+ uint64_t x55;
+ uint64_t x56;
+ uint64_t x57;
+ uint64_t x58;
+ uint64_t x59;
+ uint64_t x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint64_t x66;
+ fiat_secp384r1_uint1 x67;
+ uint64_t x68;
+ fiat_secp384r1_uint1 x69;
+ uint64_t x70;
+ fiat_secp384r1_uint1 x71;
+ uint64_t x72;
+ fiat_secp384r1_uint1 x73;
+ uint64_t x74;
+ fiat_secp384r1_uint1 x75;
+ uint64_t x76;
+ fiat_secp384r1_uint1 x77;
+ uint64_t x78;
+ fiat_secp384r1_uint1 x79;
+ uint64_t x80;
+ fiat_secp384r1_uint1 x81;
+ uint64_t x82;
+ fiat_secp384r1_uint1 x83;
+ uint64_t x84;
+ fiat_secp384r1_uint1 x85;
+ uint64_t x86;
+ fiat_secp384r1_uint1 x87;
+ uint64_t x88;
+ fiat_secp384r1_uint1 x89;
+ uint64_t x90;
+ fiat_secp384r1_uint1 x91;
+ uint64_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint64_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint64_t x96;
+ fiat_secp384r1_uint1 x97;
+ uint64_t x98;
+ fiat_secp384r1_uint1 x99;
+ uint64_t x100;
+ fiat_secp384r1_uint1 x101;
+ uint64_t x102;
+ uint64_t x103;
+ uint64_t x104;
+ uint64_t x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ fiat_secp384r1_uint1 x117;
+ uint64_t x118;
+ fiat_secp384r1_uint1 x119;
+ uint64_t x120;
+ fiat_secp384r1_uint1 x121;
+ uint64_t x122;
+ fiat_secp384r1_uint1 x123;
+ uint64_t x124;
+ fiat_secp384r1_uint1 x125;
+ uint64_t x126;
+ fiat_secp384r1_uint1 x127;
+ uint64_t x128;
+ fiat_secp384r1_uint1 x129;
+ uint64_t x130;
+ fiat_secp384r1_uint1 x131;
+ uint64_t x132;
+ fiat_secp384r1_uint1 x133;
+ uint64_t x134;
+ fiat_secp384r1_uint1 x135;
+ uint64_t x136;
+ fiat_secp384r1_uint1 x137;
+ uint64_t x138;
+ fiat_secp384r1_uint1 x139;
+ uint64_t x140;
+ fiat_secp384r1_uint1 x141;
+ uint64_t x142;
+ fiat_secp384r1_uint1 x143;
+ uint64_t x144;
+ fiat_secp384r1_uint1 x145;
+ uint64_t x146;
+ fiat_secp384r1_uint1 x147;
+ uint64_t x148;
+ fiat_secp384r1_uint1 x149;
+ uint64_t x150;
+ fiat_secp384r1_uint1 x151;
+ uint64_t x152;
+ uint64_t x153;
+ uint64_t x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ uint64_t x159;
+ uint64_t x160;
+ uint64_t x161;
+ uint64_t x162;
+ uint64_t x163;
+ uint64_t x164;
+ uint64_t x165;
+ uint64_t x166;
+ fiat_secp384r1_uint1 x167;
+ uint64_t x168;
+ fiat_secp384r1_uint1 x169;
+ uint64_t x170;
+ fiat_secp384r1_uint1 x171;
+ uint64_t x172;
+ fiat_secp384r1_uint1 x173;
+ uint64_t x174;
+ fiat_secp384r1_uint1 x175;
+ uint64_t x176;
+ fiat_secp384r1_uint1 x177;
+ uint64_t x178;
+ fiat_secp384r1_uint1 x179;
+ uint64_t x180;
+ fiat_secp384r1_uint1 x181;
+ uint64_t x182;
+ fiat_secp384r1_uint1 x183;
+ uint64_t x184;
+ fiat_secp384r1_uint1 x185;
+ uint64_t x186;
+ fiat_secp384r1_uint1 x187;
+ uint64_t x188;
+ fiat_secp384r1_uint1 x189;
+ uint64_t x190;
+ fiat_secp384r1_uint1 x191;
+ uint64_t x192;
+ fiat_secp384r1_uint1 x193;
+ uint64_t x194;
+ fiat_secp384r1_uint1 x195;
+ uint64_t x196;
+ fiat_secp384r1_uint1 x197;
+ uint64_t x198;
+ fiat_secp384r1_uint1 x199;
+ uint64_t x200;
+ fiat_secp384r1_uint1 x201;
+ uint64_t x202;
+ uint64_t x203;
+ uint64_t x204;
+ uint64_t x205;
+ uint64_t x206;
+ uint64_t x207;
+ uint64_t x208;
+ uint64_t x209;
+ uint64_t x210;
+ uint64_t x211;
+ uint64_t x212;
+ uint64_t x213;
+ uint64_t x214;
+ uint64_t x215;
+ uint64_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint64_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint64_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint64_t x222;
+ fiat_secp384r1_uint1 x223;
+ uint64_t x224;
+ fiat_secp384r1_uint1 x225;
+ uint64_t x226;
+ fiat_secp384r1_uint1 x227;
+ uint64_t x228;
+ fiat_secp384r1_uint1 x229;
+ uint64_t x230;
+ fiat_secp384r1_uint1 x231;
+ uint64_t x232;
+ fiat_secp384r1_uint1 x233;
+ uint64_t x234;
+ fiat_secp384r1_uint1 x235;
+ uint64_t x236;
+ fiat_secp384r1_uint1 x237;
+ uint64_t x238;
+ fiat_secp384r1_uint1 x239;
+ uint64_t x240;
+ fiat_secp384r1_uint1 x241;
+ uint64_t x242;
+ fiat_secp384r1_uint1 x243;
+ uint64_t x244;
+ fiat_secp384r1_uint1 x245;
+ uint64_t x246;
+ fiat_secp384r1_uint1 x247;
+ uint64_t x248;
+ fiat_secp384r1_uint1 x249;
+ uint64_t x250;
+ fiat_secp384r1_uint1 x251;
+ uint64_t x252;
+ uint64_t x253;
+ uint64_t x254;
+ uint64_t x255;
+ uint64_t x256;
+ uint64_t x257;
+ uint64_t x258;
+ uint64_t x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint64_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint64_t x265;
+ uint64_t x266;
+ fiat_secp384r1_uint1 x267;
+ uint64_t x268;
+ fiat_secp384r1_uint1 x269;
+ uint64_t x270;
+ fiat_secp384r1_uint1 x271;
+ uint64_t x272;
+ fiat_secp384r1_uint1 x273;
+ uint64_t x274;
+ fiat_secp384r1_uint1 x275;
+ uint64_t x276;
+ fiat_secp384r1_uint1 x277;
+ uint64_t x278;
+ fiat_secp384r1_uint1 x279;
+ uint64_t x280;
+ fiat_secp384r1_uint1 x281;
+ uint64_t x282;
+ fiat_secp384r1_uint1 x283;
+ uint64_t x284;
+ fiat_secp384r1_uint1 x285;
+ uint64_t x286;
+ fiat_secp384r1_uint1 x287;
+ uint64_t x288;
+ fiat_secp384r1_uint1 x289;
+ uint64_t x290;
+ fiat_secp384r1_uint1 x291;
+ uint64_t x292;
+ fiat_secp384r1_uint1 x293;
+ uint64_t x294;
+ fiat_secp384r1_uint1 x295;
+ uint64_t x296;
+ fiat_secp384r1_uint1 x297;
+ uint64_t x298;
+ fiat_secp384r1_uint1 x299;
+ uint64_t x300;
+ fiat_secp384r1_uint1 x301;
+ uint64_t x302;
+ fiat_secp384r1_uint1 x303;
+ uint64_t x304;
+ uint64_t x305;
+ uint64_t x306;
+ uint64_t x307;
+ uint64_t x308;
+ uint64_t x309;
+ x1 = (arg1[0]);
+ fiat_secp384r1_mulx_u64(&x2, &x3, x1, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x4, &x5, x2, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x6, &x7, x2, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x8, &x9, x2, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x10, &x11, x2, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x12, &x13, x2, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x14, &x15, x2, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x16, &x17, 0x0, x15, x12);
+ fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, x13, x10);
+ fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, x11, x8);
+ fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, x9, x6);
+ fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, x7, x4);
+ fiat_secp384r1_addcarryx_u64(&x26, &x27, 0x0, x1, x14);
+ fiat_secp384r1_addcarryx_u64(&x28, &x29, x27, 0x0, x16);
+ fiat_secp384r1_addcarryx_u64(&x30, &x31, x29, 0x0, x18);
+ fiat_secp384r1_addcarryx_u64(&x32, &x33, x31, 0x0, x20);
+ fiat_secp384r1_addcarryx_u64(&x34, &x35, x33, 0x0, x22);
+ fiat_secp384r1_addcarryx_u64(&x36, &x37, x35, 0x0, x24);
+ fiat_secp384r1_addcarryx_u64(&x38, &x39, x37, 0x0, (x25 + x5));
+ fiat_secp384r1_addcarryx_u64(&x40, &x41, 0x0, x28, (arg1[1]));
+ fiat_secp384r1_addcarryx_u64(&x42, &x43, x41, x30, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x44, &x45, x43, x32, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x46, &x47, x45, x34, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x48, &x49, x47, x36, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x50, &x51, x49, x38, 0x0);
+ fiat_secp384r1_mulx_u64(&x52, &x53, x40, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x54, &x55, x52, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x56, &x57, x52, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x58, &x59, x52, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x60, &x61, x52, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x62, &x63, x52, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x64, &x65, x52, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
+ fiat_secp384r1_addcarryx_u64(&x68, &x69, x67, x63, x60);
+ fiat_secp384r1_addcarryx_u64(&x70, &x71, x69, x61, x58);
+ fiat_secp384r1_addcarryx_u64(&x72, &x73, x71, x59, x56);
+ fiat_secp384r1_addcarryx_u64(&x74, &x75, x73, x57, x54);
+ fiat_secp384r1_addcarryx_u64(&x76, &x77, 0x0, x40, x64);
+ fiat_secp384r1_addcarryx_u64(&x78, &x79, x77, x42, x66);
+ fiat_secp384r1_addcarryx_u64(&x80, &x81, x79, x44, x68);
+ fiat_secp384r1_addcarryx_u64(&x82, &x83, x81, x46, x70);
+ fiat_secp384r1_addcarryx_u64(&x84, &x85, x83, x48, x72);
+ fiat_secp384r1_addcarryx_u64(&x86, &x87, x85, x50, x74);
+ fiat_secp384r1_addcarryx_u64(&x88, &x89, x87, ((uint64_t)x51 + x39),
+ (x75 + x55));
+ fiat_secp384r1_addcarryx_u64(&x90, &x91, 0x0, x78, (arg1[2]));
+ fiat_secp384r1_addcarryx_u64(&x92, &x93, x91, x80, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x82, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x84, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x98, &x99, x97, x86, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x100, &x101, x99, x88, 0x0);
+ fiat_secp384r1_mulx_u64(&x102, &x103, x90, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x104, &x105, x102, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x106, &x107, x102, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x108, &x109, x102, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x110, &x111, x102, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x112, &x113, x102, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x114, &x115, x102, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x116, &x117, 0x0, x115, x112);
+ fiat_secp384r1_addcarryx_u64(&x118, &x119, x117, x113, x110);
+ fiat_secp384r1_addcarryx_u64(&x120, &x121, x119, x111, x108);
+ fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x109, x106);
+ fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x107, x104);
+ fiat_secp384r1_addcarryx_u64(&x126, &x127, 0x0, x90, x114);
+ fiat_secp384r1_addcarryx_u64(&x128, &x129, x127, x92, x116);
+ fiat_secp384r1_addcarryx_u64(&x130, &x131, x129, x94, x118);
+ fiat_secp384r1_addcarryx_u64(&x132, &x133, x131, x96, x120);
+ fiat_secp384r1_addcarryx_u64(&x134, &x135, x133, x98, x122);
+ fiat_secp384r1_addcarryx_u64(&x136, &x137, x135, x100, x124);
+ fiat_secp384r1_addcarryx_u64(&x138, &x139, x137, ((uint64_t)x101 + x89),
+ (x125 + x105));
+ fiat_secp384r1_addcarryx_u64(&x140, &x141, 0x0, x128, (arg1[3]));
+ fiat_secp384r1_addcarryx_u64(&x142, &x143, x141, x130, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x144, &x145, x143, x132, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x146, &x147, x145, x134, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x148, &x149, x147, x136, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x150, &x151, x149, x138, 0x0);
+ fiat_secp384r1_mulx_u64(&x152, &x153, x140, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x154, &x155, x152, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x156, &x157, x152, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x158, &x159, x152, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x160, &x161, x152, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x162, &x163, x152, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x164, &x165, x152, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x166, &x167, 0x0, x165, x162);
+ fiat_secp384r1_addcarryx_u64(&x168, &x169, x167, x163, x160);
+ fiat_secp384r1_addcarryx_u64(&x170, &x171, x169, x161, x158);
+ fiat_secp384r1_addcarryx_u64(&x172, &x173, x171, x159, x156);
+ fiat_secp384r1_addcarryx_u64(&x174, &x175, x173, x157, x154);
+ fiat_secp384r1_addcarryx_u64(&x176, &x177, 0x0, x140, x164);
+ fiat_secp384r1_addcarryx_u64(&x178, &x179, x177, x142, x166);
+ fiat_secp384r1_addcarryx_u64(&x180, &x181, x179, x144, x168);
+ fiat_secp384r1_addcarryx_u64(&x182, &x183, x181, x146, x170);
+ fiat_secp384r1_addcarryx_u64(&x184, &x185, x183, x148, x172);
+ fiat_secp384r1_addcarryx_u64(&x186, &x187, x185, x150, x174);
+ fiat_secp384r1_addcarryx_u64(&x188, &x189, x187, ((uint64_t)x151 + x139),
+ (x175 + x155));
+ fiat_secp384r1_addcarryx_u64(&x190, &x191, 0x0, x178, (arg1[4]));
+ fiat_secp384r1_addcarryx_u64(&x192, &x193, x191, x180, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x194, &x195, x193, x182, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x196, &x197, x195, x184, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x198, &x199, x197, x186, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x200, &x201, x199, x188, 0x0);
+ fiat_secp384r1_mulx_u64(&x202, &x203, x190, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x204, &x205, x202, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x206, &x207, x202, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x208, &x209, x202, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x210, &x211, x202, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x212, &x213, x202, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x214, &x215, x202, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x216, &x217, 0x0, x215, x212);
+ fiat_secp384r1_addcarryx_u64(&x218, &x219, x217, x213, x210);
+ fiat_secp384r1_addcarryx_u64(&x220, &x221, x219, x211, x208);
+ fiat_secp384r1_addcarryx_u64(&x222, &x223, x221, x209, x206);
+ fiat_secp384r1_addcarryx_u64(&x224, &x225, x223, x207, x204);
+ fiat_secp384r1_addcarryx_u64(&x226, &x227, 0x0, x190, x214);
+ fiat_secp384r1_addcarryx_u64(&x228, &x229, x227, x192, x216);
+ fiat_secp384r1_addcarryx_u64(&x230, &x231, x229, x194, x218);
+ fiat_secp384r1_addcarryx_u64(&x232, &x233, x231, x196, x220);
+ fiat_secp384r1_addcarryx_u64(&x234, &x235, x233, x198, x222);
+ fiat_secp384r1_addcarryx_u64(&x236, &x237, x235, x200, x224);
+ fiat_secp384r1_addcarryx_u64(&x238, &x239, x237, ((uint64_t)x201 + x189),
+ (x225 + x205));
+ fiat_secp384r1_addcarryx_u64(&x240, &x241, 0x0, x228, (arg1[5]));
+ fiat_secp384r1_addcarryx_u64(&x242, &x243, x241, x230, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x244, &x245, x243, x232, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x246, &x247, x245, x234, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x248, &x249, x247, x236, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x250, &x251, x249, x238, 0x0);
+ fiat_secp384r1_mulx_u64(&x252, &x253, x240, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x254, &x255, x252, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x256, &x257, x252, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x258, &x259, x252, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x260, &x261, x252, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x262, &x263, x252, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x264, &x265, x252, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x266, &x267, 0x0, x265, x262);
+ fiat_secp384r1_addcarryx_u64(&x268, &x269, x267, x263, x260);
+ fiat_secp384r1_addcarryx_u64(&x270, &x271, x269, x261, x258);
+ fiat_secp384r1_addcarryx_u64(&x272, &x273, x271, x259, x256);
+ fiat_secp384r1_addcarryx_u64(&x274, &x275, x273, x257, x254);
+ fiat_secp384r1_addcarryx_u64(&x276, &x277, 0x0, x240, x264);
+ fiat_secp384r1_addcarryx_u64(&x278, &x279, x277, x242, x266);
+ fiat_secp384r1_addcarryx_u64(&x280, &x281, x279, x244, x268);
+ fiat_secp384r1_addcarryx_u64(&x282, &x283, x281, x246, x270);
+ fiat_secp384r1_addcarryx_u64(&x284, &x285, x283, x248, x272);
+ fiat_secp384r1_addcarryx_u64(&x286, &x287, x285, x250, x274);
+ fiat_secp384r1_addcarryx_u64(&x288, &x289, x287, ((uint64_t)x251 + x239),
+ (x275 + x255));
+ fiat_secp384r1_subborrowx_u64(&x290, &x291, 0x0, x278,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x292, &x293, x291, x280,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x294, &x295, x293, x282,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x296, &x297, x295, x284,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x298, &x299, x297, x286,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x300, &x301, x299, x288,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x302, &x303, x301, x289, 0x0);
+ fiat_secp384r1_cmovznz_u64(&x304, x303, x290, x278);
+ fiat_secp384r1_cmovznz_u64(&x305, x303, x292, x280);
+ fiat_secp384r1_cmovznz_u64(&x306, x303, x294, x282);
+ fiat_secp384r1_cmovznz_u64(&x307, x303, x296, x284);
+ fiat_secp384r1_cmovznz_u64(&x308, x303, x298, x286);
+ fiat_secp384r1_cmovznz_u64(&x309, x303, x300, x288);
+ out1[0] = x304;
+ out1[1] = x305;
+ out1[2] = x306;
+ out1[3] = x307;
+ out1[4] = x308;
+ out1[5] = x309;
+}
+
+/*
+ * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = eval arg1 mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_to_montgomery(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_non_montgomery_domain_field_element arg1)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ fiat_secp384r1_uint1 x16;
+ uint64_t x17;
+ fiat_secp384r1_uint1 x18;
+ uint64_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint64_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint64_t x23;
+ uint64_t x24;
+ uint64_t x25;
+ uint64_t x26;
+ uint64_t x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ fiat_secp384r1_uint1 x38;
+ uint64_t x39;
+ fiat_secp384r1_uint1 x40;
+ uint64_t x41;
+ fiat_secp384r1_uint1 x42;
+ uint64_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint64_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint64_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint64_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint64_t x51;
+ fiat_secp384r1_uint1 x52;
+ uint64_t x53;
+ fiat_secp384r1_uint1 x54;
+ uint64_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint64_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint64_t x59;
+ fiat_secp384r1_uint1 x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint64_t x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ fiat_secp384r1_uint1 x70;
+ uint64_t x71;
+ fiat_secp384r1_uint1 x72;
+ uint64_t x73;
+ fiat_secp384r1_uint1 x74;
+ uint64_t x75;
+ fiat_secp384r1_uint1 x76;
+ uint64_t x77;
+ fiat_secp384r1_uint1 x78;
+ uint64_t x79;
+ fiat_secp384r1_uint1 x80;
+ uint64_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint64_t x83;
+ fiat_secp384r1_uint1 x84;
+ uint64_t x85;
+ fiat_secp384r1_uint1 x86;
+ uint64_t x87;
+ fiat_secp384r1_uint1 x88;
+ uint64_t x89;
+ uint64_t x90;
+ uint64_t x91;
+ uint64_t x92;
+ uint64_t x93;
+ uint64_t x94;
+ uint64_t x95;
+ uint64_t x96;
+ uint64_t x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ uint64_t x101;
+ uint64_t x102;
+ uint64_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint64_t x105;
+ fiat_secp384r1_uint1 x106;
+ uint64_t x107;
+ fiat_secp384r1_uint1 x108;
+ uint64_t x109;
+ fiat_secp384r1_uint1 x110;
+ uint64_t x111;
+ fiat_secp384r1_uint1 x112;
+ uint64_t x113;
+ fiat_secp384r1_uint1 x114;
+ uint64_t x115;
+ fiat_secp384r1_uint1 x116;
+ uint64_t x117;
+ fiat_secp384r1_uint1 x118;
+ uint64_t x119;
+ fiat_secp384r1_uint1 x120;
+ uint64_t x121;
+ fiat_secp384r1_uint1 x122;
+ uint64_t x123;
+ fiat_secp384r1_uint1 x124;
+ uint64_t x125;
+ fiat_secp384r1_uint1 x126;
+ uint64_t x127;
+ uint64_t x128;
+ uint64_t x129;
+ uint64_t x130;
+ uint64_t x131;
+ uint64_t x132;
+ uint64_t x133;
+ uint64_t x134;
+ uint64_t x135;
+ fiat_secp384r1_uint1 x136;
+ uint64_t x137;
+ fiat_secp384r1_uint1 x138;
+ uint64_t x139;
+ fiat_secp384r1_uint1 x140;
+ uint64_t x141;
+ fiat_secp384r1_uint1 x142;
+ uint64_t x143;
+ fiat_secp384r1_uint1 x144;
+ uint64_t x145;
+ fiat_secp384r1_uint1 x146;
+ uint64_t x147;
+ fiat_secp384r1_uint1 x148;
+ uint64_t x149;
+ fiat_secp384r1_uint1 x150;
+ uint64_t x151;
+ fiat_secp384r1_uint1 x152;
+ uint64_t x153;
+ fiat_secp384r1_uint1 x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ uint64_t x159;
+ uint64_t x160;
+ uint64_t x161;
+ uint64_t x162;
+ uint64_t x163;
+ uint64_t x164;
+ uint64_t x165;
+ uint64_t x166;
+ uint64_t x167;
+ uint64_t x168;
+ uint64_t x169;
+ fiat_secp384r1_uint1 x170;
+ uint64_t x171;
+ fiat_secp384r1_uint1 x172;
+ uint64_t x173;
+ fiat_secp384r1_uint1 x174;
+ uint64_t x175;
+ fiat_secp384r1_uint1 x176;
+ uint64_t x177;
+ fiat_secp384r1_uint1 x178;
+ uint64_t x179;
+ fiat_secp384r1_uint1 x180;
+ uint64_t x181;
+ fiat_secp384r1_uint1 x182;
+ uint64_t x183;
+ fiat_secp384r1_uint1 x184;
+ uint64_t x185;
+ fiat_secp384r1_uint1 x186;
+ uint64_t x187;
+ fiat_secp384r1_uint1 x188;
+ uint64_t x189;
+ fiat_secp384r1_uint1 x190;
+ uint64_t x191;
+ fiat_secp384r1_uint1 x192;
+ uint64_t x193;
+ uint64_t x194;
+ uint64_t x195;
+ uint64_t x196;
+ uint64_t x197;
+ uint64_t x198;
+ uint64_t x199;
+ uint64_t x200;
+ uint64_t x201;
+ fiat_secp384r1_uint1 x202;
+ uint64_t x203;
+ fiat_secp384r1_uint1 x204;
+ uint64_t x205;
+ fiat_secp384r1_uint1 x206;
+ uint64_t x207;
+ fiat_secp384r1_uint1 x208;
+ uint64_t x209;
+ fiat_secp384r1_uint1 x210;
+ uint64_t x211;
+ fiat_secp384r1_uint1 x212;
+ uint64_t x213;
+ fiat_secp384r1_uint1 x214;
+ uint64_t x215;
+ fiat_secp384r1_uint1 x216;
+ uint64_t x217;
+ fiat_secp384r1_uint1 x218;
+ uint64_t x219;
+ fiat_secp384r1_uint1 x220;
+ uint64_t x221;
+ uint64_t x222;
+ uint64_t x223;
+ uint64_t x224;
+ uint64_t x225;
+ uint64_t x226;
+ uint64_t x227;
+ uint64_t x228;
+ uint64_t x229;
+ uint64_t x230;
+ uint64_t x231;
+ uint64_t x232;
+ uint64_t x233;
+ uint64_t x234;
+ uint64_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint64_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint64_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint64_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint64_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint64_t x245;
+ fiat_secp384r1_uint1 x246;
+ uint64_t x247;
+ fiat_secp384r1_uint1 x248;
+ uint64_t x249;
+ fiat_secp384r1_uint1 x250;
+ uint64_t x251;
+ fiat_secp384r1_uint1 x252;
+ uint64_t x253;
+ fiat_secp384r1_uint1 x254;
+ uint64_t x255;
+ fiat_secp384r1_uint1 x256;
+ uint64_t x257;
+ fiat_secp384r1_uint1 x258;
+ uint64_t x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint64_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint64_t x265;
+ uint64_t x266;
+ uint64_t x267;
+ fiat_secp384r1_uint1 x268;
+ uint64_t x269;
+ fiat_secp384r1_uint1 x270;
+ uint64_t x271;
+ fiat_secp384r1_uint1 x272;
+ uint64_t x273;
+ fiat_secp384r1_uint1 x274;
+ uint64_t x275;
+ fiat_secp384r1_uint1 x276;
+ uint64_t x277;
+ fiat_secp384r1_uint1 x278;
+ uint64_t x279;
+ fiat_secp384r1_uint1 x280;
+ uint64_t x281;
+ fiat_secp384r1_uint1 x282;
+ uint64_t x283;
+ fiat_secp384r1_uint1 x284;
+ uint64_t x285;
+ fiat_secp384r1_uint1 x286;
+ uint64_t x287;
+ uint64_t x288;
+ uint64_t x289;
+ uint64_t x290;
+ uint64_t x291;
+ uint64_t x292;
+ uint64_t x293;
+ uint64_t x294;
+ uint64_t x295;
+ uint64_t x296;
+ uint64_t x297;
+ uint64_t x298;
+ uint64_t x299;
+ uint64_t x300;
+ uint64_t x301;
+ fiat_secp384r1_uint1 x302;
+ uint64_t x303;
+ fiat_secp384r1_uint1 x304;
+ uint64_t x305;
+ fiat_secp384r1_uint1 x306;
+ uint64_t x307;
+ fiat_secp384r1_uint1 x308;
+ uint64_t x309;
+ fiat_secp384r1_uint1 x310;
+ uint64_t x311;
+ fiat_secp384r1_uint1 x312;
+ uint64_t x313;
+ fiat_secp384r1_uint1 x314;
+ uint64_t x315;
+ fiat_secp384r1_uint1 x316;
+ uint64_t x317;
+ fiat_secp384r1_uint1 x318;
+ uint64_t x319;
+ fiat_secp384r1_uint1 x320;
+ uint64_t x321;
+ fiat_secp384r1_uint1 x322;
+ uint64_t x323;
+ fiat_secp384r1_uint1 x324;
+ uint64_t x325;
+ uint64_t x326;
+ uint64_t x327;
+ uint64_t x328;
+ uint64_t x329;
+ uint64_t x330;
+ uint64_t x331;
+ uint64_t x332;
+ uint64_t x333;
+ fiat_secp384r1_uint1 x334;
+ uint64_t x335;
+ fiat_secp384r1_uint1 x336;
+ uint64_t x337;
+ fiat_secp384r1_uint1 x338;
+ uint64_t x339;
+ fiat_secp384r1_uint1 x340;
+ uint64_t x341;
+ fiat_secp384r1_uint1 x342;
+ uint64_t x343;
+ fiat_secp384r1_uint1 x344;
+ uint64_t x345;
+ fiat_secp384r1_uint1 x346;
+ uint64_t x347;
+ fiat_secp384r1_uint1 x348;
+ uint64_t x349;
+ fiat_secp384r1_uint1 x350;
+ uint64_t x351;
+ fiat_secp384r1_uint1 x352;
+ uint64_t x353;
+ uint64_t x354;
+ uint64_t x355;
+ uint64_t x356;
+ uint64_t x357;
+ uint64_t x358;
+ uint64_t x359;
+ uint64_t x360;
+ uint64_t x361;
+ uint64_t x362;
+ uint64_t x363;
+ uint64_t x364;
+ uint64_t x365;
+ uint64_t x366;
+ uint64_t x367;
+ fiat_secp384r1_uint1 x368;
+ uint64_t x369;
+ fiat_secp384r1_uint1 x370;
+ uint64_t x371;
+ fiat_secp384r1_uint1 x372;
+ uint64_t x373;
+ fiat_secp384r1_uint1 x374;
+ uint64_t x375;
+ fiat_secp384r1_uint1 x376;
+ uint64_t x377;
+ fiat_secp384r1_uint1 x378;
+ uint64_t x379;
+ fiat_secp384r1_uint1 x380;
+ uint64_t x381;
+ fiat_secp384r1_uint1 x382;
+ uint64_t x383;
+ fiat_secp384r1_uint1 x384;
+ uint64_t x385;
+ fiat_secp384r1_uint1 x386;
+ uint64_t x387;
+ fiat_secp384r1_uint1 x388;
+ uint64_t x389;
+ fiat_secp384r1_uint1 x390;
+ uint64_t x391;
+ fiat_secp384r1_uint1 x392;
+ uint64_t x393;
+ fiat_secp384r1_uint1 x394;
+ uint64_t x395;
+ fiat_secp384r1_uint1 x396;
+ uint64_t x397;
+ fiat_secp384r1_uint1 x398;
+ uint64_t x399;
+ fiat_secp384r1_uint1 x400;
+ uint64_t x401;
+ fiat_secp384r1_uint1 x402;
+ uint64_t x403;
+ fiat_secp384r1_uint1 x404;
+ uint64_t x405;
+ uint64_t x406;
+ uint64_t x407;
+ uint64_t x408;
+ uint64_t x409;
+ uint64_t x410;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[0]);
+ fiat_secp384r1_mulx_u64(&x7, &x8, x6, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x9, &x10, x6, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x11, &x12, x6, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x13, &x14, x6, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x15, &x16, 0x0, x14, x11);
+ fiat_secp384r1_addcarryx_u64(&x17, &x18, x16, x12, x9);
+ fiat_secp384r1_addcarryx_u64(&x19, &x20, x18, x10, x7);
+ fiat_secp384r1_addcarryx_u64(&x21, &x22, x20, x8, x6);
+ fiat_secp384r1_mulx_u64(&x23, &x24, x13, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x25, &x26, x23, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x27, &x28, x23, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x29, &x30, x23, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x31, &x32, x23, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x33, &x34, x23, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x35, &x36, x23, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x37, &x38, 0x0, x36, x33);
+ fiat_secp384r1_addcarryx_u64(&x39, &x40, x38, x34, x31);
+ fiat_secp384r1_addcarryx_u64(&x41, &x42, x40, x32, x29);
+ fiat_secp384r1_addcarryx_u64(&x43, &x44, x42, x30, x27);
+ fiat_secp384r1_addcarryx_u64(&x45, &x46, x44, x28, x25);
+ fiat_secp384r1_addcarryx_u64(&x47, &x48, 0x0, x13, x35);
+ fiat_secp384r1_addcarryx_u64(&x49, &x50, x48, x15, x37);
+ fiat_secp384r1_addcarryx_u64(&x51, &x52, x50, x17, x39);
+ fiat_secp384r1_addcarryx_u64(&x53, &x54, x52, x19, x41);
+ fiat_secp384r1_addcarryx_u64(&x55, &x56, x54, x21, x43);
+ fiat_secp384r1_addcarryx_u64(&x57, &x58, x56, x22, x45);
+ fiat_secp384r1_addcarryx_u64(&x59, &x60, x58, 0x0, (x46 + x26));
+ fiat_secp384r1_mulx_u64(&x61, &x62, x1, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x63, &x64, x1, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x65, &x66, x1, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x67, &x68, x1, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x69, &x70, 0x0, x68, x65);
+ fiat_secp384r1_addcarryx_u64(&x71, &x72, x70, x66, x63);
+ fiat_secp384r1_addcarryx_u64(&x73, &x74, x72, x64, x61);
+ fiat_secp384r1_addcarryx_u64(&x75, &x76, x74, x62, x1);
+ fiat_secp384r1_addcarryx_u64(&x77, &x78, 0x0, x49, x67);
+ fiat_secp384r1_addcarryx_u64(&x79, &x80, x78, x51, x69);
+ fiat_secp384r1_addcarryx_u64(&x81, &x82, x80, x53, x71);
+ fiat_secp384r1_addcarryx_u64(&x83, &x84, x82, x55, x73);
+ fiat_secp384r1_addcarryx_u64(&x85, &x86, x84, x57, x75);
+ fiat_secp384r1_addcarryx_u64(&x87, &x88, x86, x59, x76);
+ fiat_secp384r1_mulx_u64(&x89, &x90, x77, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x91, &x92, x89, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x93, &x94, x89, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x95, &x96, x89, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x97, &x98, x89, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x99, &x100, x89, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x101, &x102, x89, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x103, &x104, 0x0, x102, x99);
+ fiat_secp384r1_addcarryx_u64(&x105, &x106, x104, x100, x97);
+ fiat_secp384r1_addcarryx_u64(&x107, &x108, x106, x98, x95);
+ fiat_secp384r1_addcarryx_u64(&x109, &x110, x108, x96, x93);
+ fiat_secp384r1_addcarryx_u64(&x111, &x112, x110, x94, x91);
+ fiat_secp384r1_addcarryx_u64(&x113, &x114, 0x0, x77, x101);
+ fiat_secp384r1_addcarryx_u64(&x115, &x116, x114, x79, x103);
+ fiat_secp384r1_addcarryx_u64(&x117, &x118, x116, x81, x105);
+ fiat_secp384r1_addcarryx_u64(&x119, &x120, x118, x83, x107);
+ fiat_secp384r1_addcarryx_u64(&x121, &x122, x120, x85, x109);
+ fiat_secp384r1_addcarryx_u64(&x123, &x124, x122, x87, x111);
+ fiat_secp384r1_addcarryx_u64(&x125, &x126, x124, ((uint64_t)x88 + x60),
+ (x112 + x92));
+ fiat_secp384r1_mulx_u64(&x127, &x128, x2, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x129, &x130, x2, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x131, &x132, x2, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x133, &x134, x2, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x135, &x136, 0x0, x134, x131);
+ fiat_secp384r1_addcarryx_u64(&x137, &x138, x136, x132, x129);
+ fiat_secp384r1_addcarryx_u64(&x139, &x140, x138, x130, x127);
+ fiat_secp384r1_addcarryx_u64(&x141, &x142, x140, x128, x2);
+ fiat_secp384r1_addcarryx_u64(&x143, &x144, 0x0, x115, x133);
+ fiat_secp384r1_addcarryx_u64(&x145, &x146, x144, x117, x135);
+ fiat_secp384r1_addcarryx_u64(&x147, &x148, x146, x119, x137);
+ fiat_secp384r1_addcarryx_u64(&x149, &x150, x148, x121, x139);
+ fiat_secp384r1_addcarryx_u64(&x151, &x152, x150, x123, x141);
+ fiat_secp384r1_addcarryx_u64(&x153, &x154, x152, x125, x142);
+ fiat_secp384r1_mulx_u64(&x155, &x156, x143, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x157, &x158, x155, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x159, &x160, x155, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x161, &x162, x155, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x163, &x164, x155, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x165, &x166, x155, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x167, &x168, x155, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x169, &x170, 0x0, x168, x165);
+ fiat_secp384r1_addcarryx_u64(&x171, &x172, x170, x166, x163);
+ fiat_secp384r1_addcarryx_u64(&x173, &x174, x172, x164, x161);
+ fiat_secp384r1_addcarryx_u64(&x175, &x176, x174, x162, x159);
+ fiat_secp384r1_addcarryx_u64(&x177, &x178, x176, x160, x157);
+ fiat_secp384r1_addcarryx_u64(&x179, &x180, 0x0, x143, x167);
+ fiat_secp384r1_addcarryx_u64(&x181, &x182, x180, x145, x169);
+ fiat_secp384r1_addcarryx_u64(&x183, &x184, x182, x147, x171);
+ fiat_secp384r1_addcarryx_u64(&x185, &x186, x184, x149, x173);
+ fiat_secp384r1_addcarryx_u64(&x187, &x188, x186, x151, x175);
+ fiat_secp384r1_addcarryx_u64(&x189, &x190, x188, x153, x177);
+ fiat_secp384r1_addcarryx_u64(&x191, &x192, x190, ((uint64_t)x154 + x126),
+ (x178 + x158));
+ fiat_secp384r1_mulx_u64(&x193, &x194, x3, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x195, &x196, x3, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x197, &x198, x3, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x199, &x200, x3, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x201, &x202, 0x0, x200, x197);
+ fiat_secp384r1_addcarryx_u64(&x203, &x204, x202, x198, x195);
+ fiat_secp384r1_addcarryx_u64(&x205, &x206, x204, x196, x193);
+ fiat_secp384r1_addcarryx_u64(&x207, &x208, x206, x194, x3);
+ fiat_secp384r1_addcarryx_u64(&x209, &x210, 0x0, x181, x199);
+ fiat_secp384r1_addcarryx_u64(&x211, &x212, x210, x183, x201);
+ fiat_secp384r1_addcarryx_u64(&x213, &x214, x212, x185, x203);
+ fiat_secp384r1_addcarryx_u64(&x215, &x216, x214, x187, x205);
+ fiat_secp384r1_addcarryx_u64(&x217, &x218, x216, x189, x207);
+ fiat_secp384r1_addcarryx_u64(&x219, &x220, x218, x191, x208);
+ fiat_secp384r1_mulx_u64(&x221, &x222, x209, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x223, &x224, x221, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x225, &x226, x221, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x227, &x228, x221, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x229, &x230, x221, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x231, &x232, x221, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x233, &x234, x221, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x235, &x236, 0x0, x234, x231);
+ fiat_secp384r1_addcarryx_u64(&x237, &x238, x236, x232, x229);
+ fiat_secp384r1_addcarryx_u64(&x239, &x240, x238, x230, x227);
+ fiat_secp384r1_addcarryx_u64(&x241, &x242, x240, x228, x225);
+ fiat_secp384r1_addcarryx_u64(&x243, &x244, x242, x226, x223);
+ fiat_secp384r1_addcarryx_u64(&x245, &x246, 0x0, x209, x233);
+ fiat_secp384r1_addcarryx_u64(&x247, &x248, x246, x211, x235);
+ fiat_secp384r1_addcarryx_u64(&x249, &x250, x248, x213, x237);
+ fiat_secp384r1_addcarryx_u64(&x251, &x252, x250, x215, x239);
+ fiat_secp384r1_addcarryx_u64(&x253, &x254, x252, x217, x241);
+ fiat_secp384r1_addcarryx_u64(&x255, &x256, x254, x219, x243);
+ fiat_secp384r1_addcarryx_u64(&x257, &x258, x256, ((uint64_t)x220 + x192),
+ (x244 + x224));
+ fiat_secp384r1_mulx_u64(&x259, &x260, x4, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x261, &x262, x4, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x263, &x264, x4, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x265, &x266, x4, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x267, &x268, 0x0, x266, x263);
+ fiat_secp384r1_addcarryx_u64(&x269, &x270, x268, x264, x261);
+ fiat_secp384r1_addcarryx_u64(&x271, &x272, x270, x262, x259);
+ fiat_secp384r1_addcarryx_u64(&x273, &x274, x272, x260, x4);
+ fiat_secp384r1_addcarryx_u64(&x275, &x276, 0x0, x247, x265);
+ fiat_secp384r1_addcarryx_u64(&x277, &x278, x276, x249, x267);
+ fiat_secp384r1_addcarryx_u64(&x279, &x280, x278, x251, x269);
+ fiat_secp384r1_addcarryx_u64(&x281, &x282, x280, x253, x271);
+ fiat_secp384r1_addcarryx_u64(&x283, &x284, x282, x255, x273);
+ fiat_secp384r1_addcarryx_u64(&x285, &x286, x284, x257, x274);
+ fiat_secp384r1_mulx_u64(&x287, &x288, x275, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x289, &x290, x287, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x291, &x292, x287, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x293, &x294, x287, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x295, &x296, x287, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x297, &x298, x287, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x299, &x300, x287, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x301, &x302, 0x0, x300, x297);
+ fiat_secp384r1_addcarryx_u64(&x303, &x304, x302, x298, x295);
+ fiat_secp384r1_addcarryx_u64(&x305, &x306, x304, x296, x293);
+ fiat_secp384r1_addcarryx_u64(&x307, &x308, x306, x294, x291);
+ fiat_secp384r1_addcarryx_u64(&x309, &x310, x308, x292, x289);
+ fiat_secp384r1_addcarryx_u64(&x311, &x312, 0x0, x275, x299);
+ fiat_secp384r1_addcarryx_u64(&x313, &x314, x312, x277, x301);
+ fiat_secp384r1_addcarryx_u64(&x315, &x316, x314, x279, x303);
+ fiat_secp384r1_addcarryx_u64(&x317, &x318, x316, x281, x305);
+ fiat_secp384r1_addcarryx_u64(&x319, &x320, x318, x283, x307);
+ fiat_secp384r1_addcarryx_u64(&x321, &x322, x320, x285, x309);
+ fiat_secp384r1_addcarryx_u64(&x323, &x324, x322, ((uint64_t)x286 + x258),
+ (x310 + x290));
+ fiat_secp384r1_mulx_u64(&x325, &x326, x5, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x327, &x328, x5, UINT64_C(0xfffffffe00000000));
+ fiat_secp384r1_mulx_u64(&x329, &x330, x5, UINT64_C(0x200000000));
+ fiat_secp384r1_mulx_u64(&x331, &x332, x5, UINT64_C(0xfffffffe00000001));
+ fiat_secp384r1_addcarryx_u64(&x333, &x334, 0x0, x332, x329);
+ fiat_secp384r1_addcarryx_u64(&x335, &x336, x334, x330, x327);
+ fiat_secp384r1_addcarryx_u64(&x337, &x338, x336, x328, x325);
+ fiat_secp384r1_addcarryx_u64(&x339, &x340, x338, x326, x5);
+ fiat_secp384r1_addcarryx_u64(&x341, &x342, 0x0, x313, x331);
+ fiat_secp384r1_addcarryx_u64(&x343, &x344, x342, x315, x333);
+ fiat_secp384r1_addcarryx_u64(&x345, &x346, x344, x317, x335);
+ fiat_secp384r1_addcarryx_u64(&x347, &x348, x346, x319, x337);
+ fiat_secp384r1_addcarryx_u64(&x349, &x350, x348, x321, x339);
+ fiat_secp384r1_addcarryx_u64(&x351, &x352, x350, x323, x340);
+ fiat_secp384r1_mulx_u64(&x353, &x354, x341, UINT64_C(0x100000001));
+ fiat_secp384r1_mulx_u64(&x355, &x356, x353, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x357, &x358, x353, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x359, &x360, x353, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_mulx_u64(&x361, &x362, x353, UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_mulx_u64(&x363, &x364, x353, UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_mulx_u64(&x365, &x366, x353, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u64(&x367, &x368, 0x0, x366, x363);
+ fiat_secp384r1_addcarryx_u64(&x369, &x370, x368, x364, x361);
+ fiat_secp384r1_addcarryx_u64(&x371, &x372, x370, x362, x359);
+ fiat_secp384r1_addcarryx_u64(&x373, &x374, x372, x360, x357);
+ fiat_secp384r1_addcarryx_u64(&x375, &x376, x374, x358, x355);
+ fiat_secp384r1_addcarryx_u64(&x377, &x378, 0x0, x341, x365);
+ fiat_secp384r1_addcarryx_u64(&x379, &x380, x378, x343, x367);
+ fiat_secp384r1_addcarryx_u64(&x381, &x382, x380, x345, x369);
+ fiat_secp384r1_addcarryx_u64(&x383, &x384, x382, x347, x371);
+ fiat_secp384r1_addcarryx_u64(&x385, &x386, x384, x349, x373);
+ fiat_secp384r1_addcarryx_u64(&x387, &x388, x386, x351, x375);
+ fiat_secp384r1_addcarryx_u64(&x389, &x390, x388, ((uint64_t)x352 + x324),
+ (x376 + x356));
+ fiat_secp384r1_subborrowx_u64(&x391, &x392, 0x0, x379,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x393, &x394, x392, x381,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x395, &x396, x394, x383,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x397, &x398, x396, x385,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x399, &x400, x398, x387,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x401, &x402, x400, x389,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x403, &x404, x402, x390, 0x0);
+ fiat_secp384r1_cmovznz_u64(&x405, x404, x391, x379);
+ fiat_secp384r1_cmovznz_u64(&x406, x404, x393, x381);
+ fiat_secp384r1_cmovznz_u64(&x407, x404, x395, x383);
+ fiat_secp384r1_cmovznz_u64(&x408, x404, x397, x385);
+ fiat_secp384r1_cmovznz_u64(&x409, x404, x399, x387);
+ fiat_secp384r1_cmovznz_u64(&x410, x404, x401, x389);
+ out1[0] = x405;
+ out1[1] = x406;
+ out1[2] = x407;
+ out1[3] = x408;
+ out1[4] = x409;
+ out1[5] = x410;
+}
+
+/*
+ * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ */
+static void
+fiat_secp384r1_nonzero(uint64_t *out1, const uint64_t arg1[6])
+{
+ uint64_t x1;
+ x1 = ((arg1[0]) |
+ ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | ((arg1[4]) | (arg1[5]))))));
+ *out1 = x1;
+}
+
+/*
+ * The function fiat_secp384r1_selectznz is a multi-limb conditional select.
+ *
+ * Postconditions:
+ * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static void
+fiat_secp384r1_selectznz(uint64_t out1[6],
+ fiat_secp384r1_uint1 arg1,
+ const uint64_t arg2[6],
+ const uint64_t arg3[6])
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ fiat_secp384r1_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
+ fiat_secp384r1_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
+ fiat_secp384r1_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
+ fiat_secp384r1_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
+ fiat_secp384r1_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4]));
+ fiat_secp384r1_cmovznz_u64(&x6, arg1, (arg2[5]), (arg3[5]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+}
+
+/*
+ * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..47]
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+ */
+static void
+fiat_secp384r1_to_bytes(uint8_t out1[48], const uint64_t arg1[6])
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint8_t x7;
+ uint64_t x8;
+ uint8_t x9;
+ uint64_t x10;
+ uint8_t x11;
+ uint64_t x12;
+ uint8_t x13;
+ uint64_t x14;
+ uint8_t x15;
+ uint64_t x16;
+ uint8_t x17;
+ uint64_t x18;
+ uint8_t x19;
+ uint8_t x20;
+ uint8_t x21;
+ uint64_t x22;
+ uint8_t x23;
+ uint64_t x24;
+ uint8_t x25;
+ uint64_t x26;
+ uint8_t x27;
+ uint64_t x28;
+ uint8_t x29;
+ uint64_t x30;
+ uint8_t x31;
+ uint64_t x32;
+ uint8_t x33;
+ uint8_t x34;
+ uint8_t x35;
+ uint64_t x36;
+ uint8_t x37;
+ uint64_t x38;
+ uint8_t x39;
+ uint64_t x40;
+ uint8_t x41;
+ uint64_t x42;
+ uint8_t x43;
+ uint64_t x44;
+ uint8_t x45;
+ uint64_t x46;
+ uint8_t x47;
+ uint8_t x48;
+ uint8_t x49;
+ uint64_t x50;
+ uint8_t x51;
+ uint64_t x52;
+ uint8_t x53;
+ uint64_t x54;
+ uint8_t x55;
+ uint64_t x56;
+ uint8_t x57;
+ uint64_t x58;
+ uint8_t x59;
+ uint64_t x60;
+ uint8_t x61;
+ uint8_t x62;
+ uint8_t x63;
+ uint64_t x64;
+ uint8_t x65;
+ uint64_t x66;
+ uint8_t x67;
+ uint64_t x68;
+ uint8_t x69;
+ uint64_t x70;
+ uint8_t x71;
+ uint64_t x72;
+ uint8_t x73;
+ uint64_t x74;
+ uint8_t x75;
+ uint8_t x76;
+ uint8_t x77;
+ uint64_t x78;
+ uint8_t x79;
+ uint64_t x80;
+ uint8_t x81;
+ uint64_t x82;
+ uint8_t x83;
+ uint64_t x84;
+ uint8_t x85;
+ uint64_t x86;
+ uint8_t x87;
+ uint64_t x88;
+ uint8_t x89;
+ uint8_t x90;
+ x1 = (arg1[5]);
+ x2 = (arg1[4]);
+ x3 = (arg1[3]);
+ x4 = (arg1[2]);
+ x5 = (arg1[1]);
+ x6 = (arg1[0]);
+ x7 = (uint8_t)(x6 & UINT8_C(0xff));
+ x8 = (x6 >> 8);
+ x9 = (uint8_t)(x8 & UINT8_C(0xff));
+ x10 = (x8 >> 8);
+ x11 = (uint8_t)(x10 & UINT8_C(0xff));
+ x12 = (x10 >> 8);
+ x13 = (uint8_t)(x12 & UINT8_C(0xff));
+ x14 = (x12 >> 8);
+ x15 = (uint8_t)(x14 & UINT8_C(0xff));
+ x16 = (x14 >> 8);
+ x17 = (uint8_t)(x16 & UINT8_C(0xff));
+ x18 = (x16 >> 8);
+ x19 = (uint8_t)(x18 & UINT8_C(0xff));
+ x20 = (uint8_t)(x18 >> 8);
+ x21 = (uint8_t)(x5 & UINT8_C(0xff));
+ x22 = (x5 >> 8);
+ x23 = (uint8_t)(x22 & UINT8_C(0xff));
+ x24 = (x22 >> 8);
+ x25 = (uint8_t)(x24 & UINT8_C(0xff));
+ x26 = (x24 >> 8);
+ x27 = (uint8_t)(x26 & UINT8_C(0xff));
+ x28 = (x26 >> 8);
+ x29 = (uint8_t)(x28 & UINT8_C(0xff));
+ x30 = (x28 >> 8);
+ x31 = (uint8_t)(x30 & UINT8_C(0xff));
+ x32 = (x30 >> 8);
+ x33 = (uint8_t)(x32 & UINT8_C(0xff));
+ x34 = (uint8_t)(x32 >> 8);
+ x35 = (uint8_t)(x4 & UINT8_C(0xff));
+ x36 = (x4 >> 8);
+ x37 = (uint8_t)(x36 & UINT8_C(0xff));
+ x38 = (x36 >> 8);
+ x39 = (uint8_t)(x38 & UINT8_C(0xff));
+ x40 = (x38 >> 8);
+ x41 = (uint8_t)(x40 & UINT8_C(0xff));
+ x42 = (x40 >> 8);
+ x43 = (uint8_t)(x42 & UINT8_C(0xff));
+ x44 = (x42 >> 8);
+ x45 = (uint8_t)(x44 & UINT8_C(0xff));
+ x46 = (x44 >> 8);
+ x47 = (uint8_t)(x46 & UINT8_C(0xff));
+ x48 = (uint8_t)(x46 >> 8);
+ x49 = (uint8_t)(x3 & UINT8_C(0xff));
+ x50 = (x3 >> 8);
+ x51 = (uint8_t)(x50 & UINT8_C(0xff));
+ x52 = (x50 >> 8);
+ x53 = (uint8_t)(x52 & UINT8_C(0xff));
+ x54 = (x52 >> 8);
+ x55 = (uint8_t)(x54 & UINT8_C(0xff));
+ x56 = (x54 >> 8);
+ x57 = (uint8_t)(x56 & UINT8_C(0xff));
+ x58 = (x56 >> 8);
+ x59 = (uint8_t)(x58 & UINT8_C(0xff));
+ x60 = (x58 >> 8);
+ x61 = (uint8_t)(x60 & UINT8_C(0xff));
+ x62 = (uint8_t)(x60 >> 8);
+ x63 = (uint8_t)(x2 & UINT8_C(0xff));
+ x64 = (x2 >> 8);
+ x65 = (uint8_t)(x64 & UINT8_C(0xff));
+ x66 = (x64 >> 8);
+ x67 = (uint8_t)(x66 & UINT8_C(0xff));
+ x68 = (x66 >> 8);
+ x69 = (uint8_t)(x68 & UINT8_C(0xff));
+ x70 = (x68 >> 8);
+ x71 = (uint8_t)(x70 & UINT8_C(0xff));
+ x72 = (x70 >> 8);
+ x73 = (uint8_t)(x72 & UINT8_C(0xff));
+ x74 = (x72 >> 8);
+ x75 = (uint8_t)(x74 & UINT8_C(0xff));
+ x76 = (uint8_t)(x74 >> 8);
+ x77 = (uint8_t)(x1 & UINT8_C(0xff));
+ x78 = (x1 >> 8);
+ x79 = (uint8_t)(x78 & UINT8_C(0xff));
+ x80 = (x78 >> 8);
+ x81 = (uint8_t)(x80 & UINT8_C(0xff));
+ x82 = (x80 >> 8);
+ x83 = (uint8_t)(x82 & UINT8_C(0xff));
+ x84 = (x82 >> 8);
+ x85 = (uint8_t)(x84 & UINT8_C(0xff));
+ x86 = (x84 >> 8);
+ x87 = (uint8_t)(x86 & UINT8_C(0xff));
+ x88 = (x86 >> 8);
+ x89 = (uint8_t)(x88 & UINT8_C(0xff));
+ x90 = (uint8_t)(x88 >> 8);
+ out1[0] = x7;
+ out1[1] = x9;
+ out1[2] = x11;
+ out1[3] = x13;
+ out1[4] = x15;
+ out1[5] = x17;
+ out1[6] = x19;
+ out1[7] = x20;
+ out1[8] = x21;
+ out1[9] = x23;
+ out1[10] = x25;
+ out1[11] = x27;
+ out1[12] = x29;
+ out1[13] = x31;
+ out1[14] = x33;
+ out1[15] = x34;
+ out1[16] = x35;
+ out1[17] = x37;
+ out1[18] = x39;
+ out1[19] = x41;
+ out1[20] = x43;
+ out1[21] = x45;
+ out1[22] = x47;
+ out1[23] = x48;
+ out1[24] = x49;
+ out1[25] = x51;
+ out1[26] = x53;
+ out1[27] = x55;
+ out1[28] = x57;
+ out1[29] = x59;
+ out1[30] = x61;
+ out1[31] = x62;
+ out1[32] = x63;
+ out1[33] = x65;
+ out1[34] = x67;
+ out1[35] = x69;
+ out1[36] = x71;
+ out1[37] = x73;
+ out1[38] = x75;
+ out1[39] = x76;
+ out1[40] = x77;
+ out1[41] = x79;
+ out1[42] = x81;
+ out1[43] = x83;
+ out1[44] = x85;
+ out1[45] = x87;
+ out1[46] = x89;
+ out1[47] = x90;
+}
+
+/*
+ * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+ *
+ * Preconditions:
+ * 0 ≤ bytes_eval arg1 < m
+ * Postconditions:
+ * eval out1 mod m = bytes_eval arg1 mod m
+ * 0 ≤ eval out1 < m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static void
+fiat_secp384r1_from_bytes(uint64_t out1[6],
+ const uint8_t arg1[48])
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint8_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint8_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ uint64_t x21;
+ uint64_t x22;
+ uint64_t x23;
+ uint8_t x24;
+ uint64_t x25;
+ uint64_t x26;
+ uint64_t x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint8_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint8_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ uint64_t x45;
+ uint64_t x46;
+ uint64_t x47;
+ uint8_t x48;
+ uint64_t x49;
+ uint64_t x50;
+ uint64_t x51;
+ uint64_t x52;
+ uint64_t x53;
+ uint64_t x54;
+ uint64_t x55;
+ uint64_t x56;
+ uint64_t x57;
+ uint64_t x58;
+ uint64_t x59;
+ uint64_t x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint64_t x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint64_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ uint64_t x82;
+ uint64_t x83;
+ uint64_t x84;
+ uint64_t x85;
+ uint64_t x86;
+ uint64_t x87;
+ uint64_t x88;
+ uint64_t x89;
+ uint64_t x90;
+ x1 = ((uint64_t)(arg1[47]) << 56);
+ x2 = ((uint64_t)(arg1[46]) << 48);
+ x3 = ((uint64_t)(arg1[45]) << 40);
+ x4 = ((uint64_t)(arg1[44]) << 32);
+ x5 = ((uint64_t)(arg1[43]) << 24);
+ x6 = ((uint64_t)(arg1[42]) << 16);
+ x7 = ((uint64_t)(arg1[41]) << 8);
+ x8 = (arg1[40]);
+ x9 = ((uint64_t)(arg1[39]) << 56);
+ x10 = ((uint64_t)(arg1[38]) << 48);
+ x11 = ((uint64_t)(arg1[37]) << 40);
+ x12 = ((uint64_t)(arg1[36]) << 32);
+ x13 = ((uint64_t)(arg1[35]) << 24);
+ x14 = ((uint64_t)(arg1[34]) << 16);
+ x15 = ((uint64_t)(arg1[33]) << 8);
+ x16 = (arg1[32]);
+ x17 = ((uint64_t)(arg1[31]) << 56);
+ x18 = ((uint64_t)(arg1[30]) << 48);
+ x19 = ((uint64_t)(arg1[29]) << 40);
+ x20 = ((uint64_t)(arg1[28]) << 32);
+ x21 = ((uint64_t)(arg1[27]) << 24);
+ x22 = ((uint64_t)(arg1[26]) << 16);
+ x23 = ((uint64_t)(arg1[25]) << 8);
+ x24 = (arg1[24]);
+ x25 = ((uint64_t)(arg1[23]) << 56);
+ x26 = ((uint64_t)(arg1[22]) << 48);
+ x27 = ((uint64_t)(arg1[21]) << 40);
+ x28 = ((uint64_t)(arg1[20]) << 32);
+ x29 = ((uint64_t)(arg1[19]) << 24);
+ x30 = ((uint64_t)(arg1[18]) << 16);
+ x31 = ((uint64_t)(arg1[17]) << 8);
+ x32 = (arg1[16]);
+ x33 = ((uint64_t)(arg1[15]) << 56);
+ x34 = ((uint64_t)(arg1[14]) << 48);
+ x35 = ((uint64_t)(arg1[13]) << 40);
+ x36 = ((uint64_t)(arg1[12]) << 32);
+ x37 = ((uint64_t)(arg1[11]) << 24);
+ x38 = ((uint64_t)(arg1[10]) << 16);
+ x39 = ((uint64_t)(arg1[9]) << 8);
+ x40 = (arg1[8]);
+ x41 = ((uint64_t)(arg1[7]) << 56);
+ x42 = ((uint64_t)(arg1[6]) << 48);
+ x43 = ((uint64_t)(arg1[5]) << 40);
+ x44 = ((uint64_t)(arg1[4]) << 32);
+ x45 = ((uint64_t)(arg1[3]) << 24);
+ x46 = ((uint64_t)(arg1[2]) << 16);
+ x47 = ((uint64_t)(arg1[1]) << 8);
+ x48 = (arg1[0]);
+ x49 = (x47 + (uint64_t)x48);
+ x50 = (x46 + x49);
+ x51 = (x45 + x50);
+ x52 = (x44 + x51);
+ x53 = (x43 + x52);
+ x54 = (x42 + x53);
+ x55 = (x41 + x54);
+ x56 = (x39 + (uint64_t)x40);
+ x57 = (x38 + x56);
+ x58 = (x37 + x57);
+ x59 = (x36 + x58);
+ x60 = (x35 + x59);
+ x61 = (x34 + x60);
+ x62 = (x33 + x61);
+ x63 = (x31 + (uint64_t)x32);
+ x64 = (x30 + x63);
+ x65 = (x29 + x64);
+ x66 = (x28 + x65);
+ x67 = (x27 + x66);
+ x68 = (x26 + x67);
+ x69 = (x25 + x68);
+ x70 = (x23 + (uint64_t)x24);
+ x71 = (x22 + x70);
+ x72 = (x21 + x71);
+ x73 = (x20 + x72);
+ x74 = (x19 + x73);
+ x75 = (x18 + x74);
+ x76 = (x17 + x75);
+ x77 = (x15 + (uint64_t)x16);
+ x78 = (x14 + x77);
+ x79 = (x13 + x78);
+ x80 = (x12 + x79);
+ x81 = (x11 + x80);
+ x82 = (x10 + x81);
+ x83 = (x9 + x82);
+ x84 = (x7 + (uint64_t)x8);
+ x85 = (x6 + x84);
+ x86 = (x5 + x85);
+ x87 = (x4 + x86);
+ x88 = (x3 + x87);
+ x89 = (x2 + x88);
+ x90 = (x1 + x89);
+ out1[0] = x55;
+ out1[1] = x62;
+ out1[2] = x69;
+ out1[3] = x76;
+ out1[4] = x83;
+ out1[5] = x90;
+}
+
+/*
+ * The function fiat_secp384r1_divstep computes a divstep.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg4 < m
+ * 0 ≤ eval arg5 < m
+ * Postconditions:
+ * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+ * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+ * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+ * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+ * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+ * 0 ≤ eval out5 < m
+ * 0 ≤ eval out5 < m
+ * 0 ≤ eval out2 < m
+ * 0 ≤ eval out3 < m
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0xffffffffffffffff]
+ * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * arg4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * arg5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ * out2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * out3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * out4: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * out5: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static void
+fiat_secp384r1_divstep(
+ uint64_t *out1, uint64_t out2[7], uint64_t out3[7], uint64_t out4[6],
+ uint64_t out5[6], uint64_t arg1, const uint64_t arg2[7],
+ const uint64_t arg3[7], const uint64_t arg4[6], const uint64_t arg5[6])
+{
+ uint64_t x1;
+ fiat_secp384r1_uint1 x2;
+ fiat_secp384r1_uint1 x3;
+ uint64_t x4;
+ fiat_secp384r1_uint1 x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ fiat_secp384r1_uint1 x15;
+ uint64_t x16;
+ fiat_secp384r1_uint1 x17;
+ uint64_t x18;
+ fiat_secp384r1_uint1 x19;
+ uint64_t x20;
+ fiat_secp384r1_uint1 x21;
+ uint64_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint64_t x24;
+ fiat_secp384r1_uint1 x25;
+ uint64_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ fiat_secp384r1_uint1 x42;
+ uint64_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint64_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint64_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint64_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint64_t x51;
+ fiat_secp384r1_uint1 x52;
+ uint64_t x53;
+ fiat_secp384r1_uint1 x54;
+ uint64_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint64_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint64_t x59;
+ fiat_secp384r1_uint1 x60;
+ uint64_t x61;
+ fiat_secp384r1_uint1 x62;
+ uint64_t x63;
+ fiat_secp384r1_uint1 x64;
+ uint64_t x65;
+ fiat_secp384r1_uint1 x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ fiat_secp384r1_uint1 x74;
+ uint64_t x75;
+ fiat_secp384r1_uint1 x76;
+ uint64_t x77;
+ fiat_secp384r1_uint1 x78;
+ uint64_t x79;
+ fiat_secp384r1_uint1 x80;
+ uint64_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint64_t x83;
+ fiat_secp384r1_uint1 x84;
+ uint64_t x85;
+ uint64_t x86;
+ fiat_secp384r1_uint1 x87;
+ uint64_t x88;
+ fiat_secp384r1_uint1 x89;
+ uint64_t x90;
+ fiat_secp384r1_uint1 x91;
+ uint64_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint64_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint64_t x96;
+ fiat_secp384r1_uint1 x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ uint64_t x101;
+ uint64_t x102;
+ uint64_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint64_t x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ fiat_secp384r1_uint1 x113;
+ uint64_t x114;
+ fiat_secp384r1_uint1 x115;
+ uint64_t x116;
+ fiat_secp384r1_uint1 x117;
+ uint64_t x118;
+ fiat_secp384r1_uint1 x119;
+ uint64_t x120;
+ fiat_secp384r1_uint1 x121;
+ uint64_t x122;
+ fiat_secp384r1_uint1 x123;
+ uint64_t x124;
+ fiat_secp384r1_uint1 x125;
+ uint64_t x126;
+ uint64_t x127;
+ uint64_t x128;
+ uint64_t x129;
+ uint64_t x130;
+ uint64_t x131;
+ uint64_t x132;
+ fiat_secp384r1_uint1 x133;
+ uint64_t x134;
+ fiat_secp384r1_uint1 x135;
+ uint64_t x136;
+ fiat_secp384r1_uint1 x137;
+ uint64_t x138;
+ fiat_secp384r1_uint1 x139;
+ uint64_t x140;
+ fiat_secp384r1_uint1 x141;
+ uint64_t x142;
+ fiat_secp384r1_uint1 x143;
+ uint64_t x144;
+ fiat_secp384r1_uint1 x145;
+ uint64_t x146;
+ fiat_secp384r1_uint1 x147;
+ uint64_t x148;
+ fiat_secp384r1_uint1 x149;
+ uint64_t x150;
+ fiat_secp384r1_uint1 x151;
+ uint64_t x152;
+ fiat_secp384r1_uint1 x153;
+ uint64_t x154;
+ fiat_secp384r1_uint1 x155;
+ uint64_t x156;
+ fiat_secp384r1_uint1 x157;
+ uint64_t x158;
+ fiat_secp384r1_uint1 x159;
+ uint64_t x160;
+ uint64_t x161;
+ uint64_t x162;
+ uint64_t x163;
+ uint64_t x164;
+ uint64_t x165;
+ uint64_t x166;
+ uint64_t x167;
+ uint64_t x168;
+ uint64_t x169;
+ uint64_t x170;
+ uint64_t x171;
+ uint64_t x172;
+ uint64_t x173;
+ uint64_t x174;
+ uint64_t x175;
+ uint64_t x176;
+ uint64_t x177;
+ uint64_t x178;
+ fiat_secp384r1_addcarryx_u64(&x1, &x2, 0x0, (~arg1), 0x1);
+ x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 63) &
+ (fiat_secp384r1_uint1)((arg3[0]) & 0x1));
+ fiat_secp384r1_addcarryx_u64(&x4, &x5, 0x0, (~arg1), 0x1);
+ fiat_secp384r1_cmovznz_u64(&x6, x3, arg1, x4);
+ fiat_secp384r1_cmovznz_u64(&x7, x3, (arg2[0]), (arg3[0]));
+ fiat_secp384r1_cmovznz_u64(&x8, x3, (arg2[1]), (arg3[1]));
+ fiat_secp384r1_cmovznz_u64(&x9, x3, (arg2[2]), (arg3[2]));
+ fiat_secp384r1_cmovznz_u64(&x10, x3, (arg2[3]), (arg3[3]));
+ fiat_secp384r1_cmovznz_u64(&x11, x3, (arg2[4]), (arg3[4]));
+ fiat_secp384r1_cmovznz_u64(&x12, x3, (arg2[5]), (arg3[5]));
+ fiat_secp384r1_cmovznz_u64(&x13, x3, (arg2[6]), (arg3[6]));
+ fiat_secp384r1_addcarryx_u64(&x14, &x15, 0x0, 0x1, (~(arg2[0])));
+ fiat_secp384r1_addcarryx_u64(&x16, &x17, x15, 0x0, (~(arg2[1])));
+ fiat_secp384r1_addcarryx_u64(&x18, &x19, x17, 0x0, (~(arg2[2])));
+ fiat_secp384r1_addcarryx_u64(&x20, &x21, x19, 0x0, (~(arg2[3])));
+ fiat_secp384r1_addcarryx_u64(&x22, &x23, x21, 0x0, (~(arg2[4])));
+ fiat_secp384r1_addcarryx_u64(&x24, &x25, x23, 0x0, (~(arg2[5])));
+ fiat_secp384r1_addcarryx_u64(&x26, &x27, x25, 0x0, (~(arg2[6])));
+ fiat_secp384r1_cmovznz_u64(&x28, x3, (arg3[0]), x14);
+ fiat_secp384r1_cmovznz_u64(&x29, x3, (arg3[1]), x16);
+ fiat_secp384r1_cmovznz_u64(&x30, x3, (arg3[2]), x18);
+ fiat_secp384r1_cmovznz_u64(&x31, x3, (arg3[3]), x20);
+ fiat_secp384r1_cmovznz_u64(&x32, x3, (arg3[4]), x22);
+ fiat_secp384r1_cmovznz_u64(&x33, x3, (arg3[5]), x24);
+ fiat_secp384r1_cmovznz_u64(&x34, x3, (arg3[6]), x26);
+ fiat_secp384r1_cmovznz_u64(&x35, x3, (arg4[0]), (arg5[0]));
+ fiat_secp384r1_cmovznz_u64(&x36, x3, (arg4[1]), (arg5[1]));
+ fiat_secp384r1_cmovznz_u64(&x37, x3, (arg4[2]), (arg5[2]));
+ fiat_secp384r1_cmovznz_u64(&x38, x3, (arg4[3]), (arg5[3]));
+ fiat_secp384r1_cmovznz_u64(&x39, x3, (arg4[4]), (arg5[4]));
+ fiat_secp384r1_cmovznz_u64(&x40, x3, (arg4[5]), (arg5[5]));
+ fiat_secp384r1_addcarryx_u64(&x41, &x42, 0x0, x35, x35);
+ fiat_secp384r1_addcarryx_u64(&x43, &x44, x42, x36, x36);
+ fiat_secp384r1_addcarryx_u64(&x45, &x46, x44, x37, x37);
+ fiat_secp384r1_addcarryx_u64(&x47, &x48, x46, x38, x38);
+ fiat_secp384r1_addcarryx_u64(&x49, &x50, x48, x39, x39);
+ fiat_secp384r1_addcarryx_u64(&x51, &x52, x50, x40, x40);
+ fiat_secp384r1_subborrowx_u64(&x53, &x54, 0x0, x41, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x55, &x56, x54, x43,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x57, &x58, x56, x45,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x59, &x60, x58, x47,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x61, &x62, x60, x49,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x63, &x64, x62, x51,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x65, &x66, x64, x52, 0x0);
+ x67 = (arg4[5]);
+ x68 = (arg4[4]);
+ x69 = (arg4[3]);
+ x70 = (arg4[2]);
+ x71 = (arg4[1]);
+ x72 = (arg4[0]);
+ fiat_secp384r1_subborrowx_u64(&x73, &x74, 0x0, 0x0, x72);
+ fiat_secp384r1_subborrowx_u64(&x75, &x76, x74, 0x0, x71);
+ fiat_secp384r1_subborrowx_u64(&x77, &x78, x76, 0x0, x70);
+ fiat_secp384r1_subborrowx_u64(&x79, &x80, x78, 0x0, x69);
+ fiat_secp384r1_subborrowx_u64(&x81, &x82, x80, 0x0, x68);
+ fiat_secp384r1_subborrowx_u64(&x83, &x84, x82, 0x0, x67);
+ fiat_secp384r1_cmovznz_u64(&x85, x84, 0x0, UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_addcarryx_u64(&x86, &x87, 0x0, x73,
+ (x85 & UINT32_C(0xffffffff)));
+ fiat_secp384r1_addcarryx_u64(&x88, &x89, x87, x75,
+ (x85 & UINT64_C(0xffffffff00000000)));
+ fiat_secp384r1_addcarryx_u64(&x90, &x91, x89, x77,
+ (x85 & UINT64_C(0xfffffffffffffffe)));
+ fiat_secp384r1_addcarryx_u64(&x92, &x93, x91, x79, x85);
+ fiat_secp384r1_addcarryx_u64(&x94, &x95, x93, x81, x85);
+ fiat_secp384r1_addcarryx_u64(&x96, &x97, x95, x83, x85);
+ fiat_secp384r1_cmovznz_u64(&x98, x3, (arg5[0]), x86);
+ fiat_secp384r1_cmovznz_u64(&x99, x3, (arg5[1]), x88);
+ fiat_secp384r1_cmovznz_u64(&x100, x3, (arg5[2]), x90);
+ fiat_secp384r1_cmovznz_u64(&x101, x3, (arg5[3]), x92);
+ fiat_secp384r1_cmovznz_u64(&x102, x3, (arg5[4]), x94);
+ fiat_secp384r1_cmovznz_u64(&x103, x3, (arg5[5]), x96);
+ x104 = (fiat_secp384r1_uint1)(x28 & 0x1);
+ fiat_secp384r1_cmovznz_u64(&x105, x104, 0x0, x7);
+ fiat_secp384r1_cmovznz_u64(&x106, x104, 0x0, x8);
+ fiat_secp384r1_cmovznz_u64(&x107, x104, 0x0, x9);
+ fiat_secp384r1_cmovznz_u64(&x108, x104, 0x0, x10);
+ fiat_secp384r1_cmovznz_u64(&x109, x104, 0x0, x11);
+ fiat_secp384r1_cmovznz_u64(&x110, x104, 0x0, x12);
+ fiat_secp384r1_cmovznz_u64(&x111, x104, 0x0, x13);
+ fiat_secp384r1_addcarryx_u64(&x112, &x113, 0x0, x28, x105);
+ fiat_secp384r1_addcarryx_u64(&x114, &x115, x113, x29, x106);
+ fiat_secp384r1_addcarryx_u64(&x116, &x117, x115, x30, x107);
+ fiat_secp384r1_addcarryx_u64(&x118, &x119, x117, x31, x108);
+ fiat_secp384r1_addcarryx_u64(&x120, &x121, x119, x32, x109);
+ fiat_secp384r1_addcarryx_u64(&x122, &x123, x121, x33, x110);
+ fiat_secp384r1_addcarryx_u64(&x124, &x125, x123, x34, x111);
+ fiat_secp384r1_cmovznz_u64(&x126, x104, 0x0, x35);
+ fiat_secp384r1_cmovznz_u64(&x127, x104, 0x0, x36);
+ fiat_secp384r1_cmovznz_u64(&x128, x104, 0x0, x37);
+ fiat_secp384r1_cmovznz_u64(&x129, x104, 0x0, x38);
+ fiat_secp384r1_cmovznz_u64(&x130, x104, 0x0, x39);
+ fiat_secp384r1_cmovznz_u64(&x131, x104, 0x0, x40);
+ fiat_secp384r1_addcarryx_u64(&x132, &x133, 0x0, x98, x126);
+ fiat_secp384r1_addcarryx_u64(&x134, &x135, x133, x99, x127);
+ fiat_secp384r1_addcarryx_u64(&x136, &x137, x135, x100, x128);
+ fiat_secp384r1_addcarryx_u64(&x138, &x139, x137, x101, x129);
+ fiat_secp384r1_addcarryx_u64(&x140, &x141, x139, x102, x130);
+ fiat_secp384r1_addcarryx_u64(&x142, &x143, x141, x103, x131);
+ fiat_secp384r1_subborrowx_u64(&x144, &x145, 0x0, x132,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u64(&x146, &x147, x145, x134,
+ UINT64_C(0xffffffff00000000));
+ fiat_secp384r1_subborrowx_u64(&x148, &x149, x147, x136,
+ UINT64_C(0xfffffffffffffffe));
+ fiat_secp384r1_subborrowx_u64(&x150, &x151, x149, x138,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x152, &x153, x151, x140,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x154, &x155, x153, x142,
+ UINT64_C(0xffffffffffffffff));
+ fiat_secp384r1_subborrowx_u64(&x156, &x157, x155, x143, 0x0);
+ fiat_secp384r1_addcarryx_u64(&x158, &x159, 0x0, x6, 0x1);
+ x160 = ((x112 >> 1) | ((x114 << 63) & UINT64_C(0xffffffffffffffff)));
+ x161 = ((x114 >> 1) | ((x116 << 63) & UINT64_C(0xffffffffffffffff)));
+ x162 = ((x116 >> 1) | ((x118 << 63) & UINT64_C(0xffffffffffffffff)));
+ x163 = ((x118 >> 1) | ((x120 << 63) & UINT64_C(0xffffffffffffffff)));
+ x164 = ((x120 >> 1) | ((x122 << 63) & UINT64_C(0xffffffffffffffff)));
+ x165 = ((x122 >> 1) | ((x124 << 63) & UINT64_C(0xffffffffffffffff)));
+ x166 = ((x124 & UINT64_C(0x8000000000000000)) | (x124 >> 1));
+ fiat_secp384r1_cmovznz_u64(&x167, x66, x53, x41);
+ fiat_secp384r1_cmovznz_u64(&x168, x66, x55, x43);
+ fiat_secp384r1_cmovznz_u64(&x169, x66, x57, x45);
+ fiat_secp384r1_cmovznz_u64(&x170, x66, x59, x47);
+ fiat_secp384r1_cmovznz_u64(&x171, x66, x61, x49);
+ fiat_secp384r1_cmovznz_u64(&x172, x66, x63, x51);
+ fiat_secp384r1_cmovznz_u64(&x173, x157, x144, x132);
+ fiat_secp384r1_cmovznz_u64(&x174, x157, x146, x134);
+ fiat_secp384r1_cmovznz_u64(&x175, x157, x148, x136);
+ fiat_secp384r1_cmovznz_u64(&x176, x157, x150, x138);
+ fiat_secp384r1_cmovznz_u64(&x177, x157, x152, x140);
+ fiat_secp384r1_cmovznz_u64(&x178, x157, x154, x142);
+ *out1 = x158;
+ out2[0] = x7;
+ out2[1] = x8;
+ out2[2] = x9;
+ out2[3] = x10;
+ out2[4] = x11;
+ out2[5] = x12;
+ out2[6] = x13;
+ out3[0] = x160;
+ out3[1] = x161;
+ out3[2] = x162;
+ out3[3] = x163;
+ out3[4] = x164;
+ out3[5] = x165;
+ out3[6] = x166;
+ out4[0] = x167;
+ out4[1] = x168;
+ out4[2] = x169;
+ out4[3] = x170;
+ out4[4] = x171;
+ out4[5] = x172;
+ out5[0] = x173;
+ out5[1] = x174;
+ out5[2] = x175;
+ out5[3] = x176;
+ out5[4] = x177;
+ out5[5] = x178;
+}
+
+/* END verbatim fiat code */
+
+/* curve-related constants */
+
+static const limb_t const_one[6] = {
+ UINT64_C(0xFFFFFFFF00000001), UINT64_C(0x00000000FFFFFFFF),
+ UINT64_C(0x0000000000000001), UINT64_C(0x0000000000000000),
+ UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000)
+};
+
+static const limb_t const_b[6] = {
+ UINT64_C(0x081188719D412DCC), UINT64_C(0xF729ADD87A4C32EC),
+ UINT64_C(0x77F2209B1920022E), UINT64_C(0xE3374BEE94938AE2),
+ UINT64_C(0xB62B21F41F022094), UINT64_C(0xCD08114B604FBFF9)
+};
+
+static const limb_t const_divstep[6] = {
+ UINT64_C(0xFFFFC80000005000), UINT64_C(0xFFFFB3FFFFFF83FF),
+ UINT64_C(0xFFFFF7FFFFFFFFFF), UINT64_C(0xFFFFEBFFFFFFEFFF),
+ UINT64_C(0x00000BFFFFFFF3FF), UINT64_C(0x0000500000003000)
+};
+
+static const limb_t const_psat[6] = {
+ UINT64_C(0x00000000FFFFFFFF), UINT64_C(0xFFFFFFFF00000000),
+ UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFF),
+ UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)
+};
+
+/* LUT for scalar multiplication by comb interleaving */
+static const pt_aff_t lut_cmb[21][16] = {
+ {
+ { { UINT64_C(0x3DD0756649C0B528), UINT64_C(0x20E378E2A0D6CE38),
+ UINT64_C(0x879C3AFC541B4D6E), UINT64_C(0x6454868459A30EFF),
+ UINT64_C(0x812FF723614EDE2B), UINT64_C(0x4D3AADC2299E1513) },
+ { UINT64_C(0x23043DAD4B03A4FE), UINT64_C(0xA1BFA8BF7BB4A9AC),
+ UINT64_C(0x8BADE7562E83B050), UINT64_C(0xC6C3521968F4FFD9),
+ UINT64_C(0xDD8002263969A840), UINT64_C(0x2B78ABC25A15C5E9) } },
+ { { UINT64_C(0x05E4DBE6C1DC4073), UINT64_C(0xC54EA9FFF04F779C),
+ UINT64_C(0x6B2034E9A170CCF0), UINT64_C(0x3A48D732D51C6C3E),
+ UINT64_C(0xE36F7E2D263AA470), UINT64_C(0xD283FE68E7C1C3AC) },
+ { UINT64_C(0x7E284821C04EE157), UINT64_C(0x92D789A77AE0E36D),
+ UINT64_C(0x132663C04EF67446), UINT64_C(0x68012D5AD2E1D0B4),
+ UINT64_C(0xF6DB68B15102B339), UINT64_C(0x465465FC983292AF) } },
+ { { UINT64_C(0xBB595EBA68F1F0DF), UINT64_C(0xC185C0CBCC873466),
+ UINT64_C(0x7F1EB1B5293C703B), UINT64_C(0x60DB2CF5AACC05E6),
+ UINT64_C(0xC676B987E2E8E4C6), UINT64_C(0xE1BB26B11D178FFB) },
+ { UINT64_C(0x2B694BA07073FA21), UINT64_C(0x22C16E2E72F34566),
+ UINT64_C(0x80B61B3101C35B99), UINT64_C(0x4B237FAF982C0411),
+ UINT64_C(0xE6C5944024DE236D), UINT64_C(0x4DB1C9D6E209E4A3) } },
+ { { UINT64_C(0xDF13B9D17D69222B), UINT64_C(0x4CE6415F874774B1),
+ UINT64_C(0x731EDCF8211FAA95), UINT64_C(0x5F4215D1659753ED),
+ UINT64_C(0xF893DB589DB2DF55), UINT64_C(0x932C9F811C89025B) },
+ { UINT64_C(0x0996B2207706A61E), UINT64_C(0x135349D5A8641C79),
+ UINT64_C(0x65AAD76F50130844), UINT64_C(0x0FF37C0401FFF780),
+ UINT64_C(0xF57F238E693B0706), UINT64_C(0xD90A16B6AF6C9B3E) } },
+ { { UINT64_C(0x2F5D200E2353B92F), UINT64_C(0xE35D87293FD7E4F9),
+ UINT64_C(0x26094833A96D745D), UINT64_C(0xDC351DC13CBFFF3F),
+ UINT64_C(0x26D464C6DAD54D6A), UINT64_C(0x5CAB1D1D53636C6A) },
+ { UINT64_C(0xF2813072B18EC0B0), UINT64_C(0x3777E270D742AA2F),
+ UINT64_C(0x27F061C7033CA7C2), UINT64_C(0xA6ECACCC68EAD0D8),
+ UINT64_C(0x7D9429F4EE69A754), UINT64_C(0xE770633431E8F5C6) } },
+ { { UINT64_C(0xC7708B19B68B8C7D), UINT64_C(0x4532077C44377ABA),
+ UINT64_C(0x0DCC67706CDAD64F), UINT64_C(0x01B8BF56147B6602),
+ UINT64_C(0xF8D89885F0561D79), UINT64_C(0x9C19E9FC7BA9C437) },
+ { UINT64_C(0x764EB146BDC4BA25), UINT64_C(0x604FE46BAC144B83),
+ UINT64_C(0x3CE813298A77E780), UINT64_C(0x2E070F36FE9E682E),
+ UINT64_C(0x41821D0C3A53287A), UINT64_C(0x9AA62F9F3533F918) } },
+ { { UINT64_C(0x9B7AEB7E75CCBDFB), UINT64_C(0xB25E28C5F6749A95),
+ UINT64_C(0x8A7A8E4633B7D4AE), UINT64_C(0xDB5203A8D9C1BD56),
+ UINT64_C(0xD2657265ED22DF97), UINT64_C(0xB51C56E18CF23C94) },
+ { UINT64_C(0xF4D394596C3D812D), UINT64_C(0xD8E88F1A87CAE0C2),
+ UINT64_C(0x789A2A48CF4D0FE3), UINT64_C(0xB7FEAC2DFEC38D60),
+ UINT64_C(0x81FDBD1C3B490EC3), UINT64_C(0x4617ADB7CC6979E1) } },
+ { { UINT64_C(0x446AD8884709F4A9), UINT64_C(0x2B7210E2EC3DABD8),
+ UINT64_C(0x83CCF19550E07B34), UINT64_C(0x59500917789B3075),
+ UINT64_C(0x0FC01FD4EB085993), UINT64_C(0xFB62D26F4903026B) },
+ { UINT64_C(0x2309CC9D6FE989BB), UINT64_C(0x61609CBD144BD586),
+ UINT64_C(0x4B23D3A0DE06610C), UINT64_C(0xDDDC2866D898F470),
+ UINT64_C(0x8733FC41400C5797), UINT64_C(0x5A68C6FED0BC2716) } },
+ { { UINT64_C(0x8903E1304B4A3CD0), UINT64_C(0x3EA4EA4C8FF1F43E),
+ UINT64_C(0xE6FC3F2AF655A10D), UINT64_C(0x7BE3737D524FFEFC),
+ UINT64_C(0x9F6928555330455E), UINT64_C(0x524F166EE475CE70) },
+ { UINT64_C(0x3FCC69CD6C12F055), UINT64_C(0x4E23B6FFD5B9C0DA),
+ UINT64_C(0x49CE6993336BF183), UINT64_C(0xF87D6D854A54504A),
+ UINT64_C(0x25EB5DF1B3C2677A), UINT64_C(0xAC37986F55B164C9) } },
+ { { UINT64_C(0x82A2ED4ABAA84C08), UINT64_C(0x22C4CC5F41A8C912),
+ UINT64_C(0xCA109C3B154AAD5E), UINT64_C(0x23891298FC38538E),
+ UINT64_C(0xB3B6639C539802AE), UINT64_C(0xFA0F1F450390D706) },
+ { UINT64_C(0x46B78E5DB0DC21D0), UINT64_C(0xA8C72D3CC3DA2EAC),
+ UINT64_C(0x9170B3786FF2F643), UINT64_C(0x3F5A799BB67F30C3),
+ UINT64_C(0x15D1DC778264B672), UINT64_C(0xA1D47B23E9577764) } },
+ { { UINT64_C(0x08265E510422CE2F), UINT64_C(0x88E0D496DD2F9E21),
+ UINT64_C(0x30128AA06177F75D), UINT64_C(0x2E59AB62BD9EBE69),
+ UINT64_C(0x1B1A0F6C5DF0E537), UINT64_C(0xAB16C626DAC012B5) },
+ { UINT64_C(0x8014214B008C5DE7), UINT64_C(0xAA740A9E38F17BEA),
+ UINT64_C(0x262EBB498A149098), UINT64_C(0xB454111E8527CD59),
+ UINT64_C(0x266AD15AACEA5817), UINT64_C(0x21824F411353CCBA) } },
+ { { UINT64_C(0xD1B4E74D12E3683B), UINT64_C(0x990ED20B569B8EF6),
+ UINT64_C(0xB9D3DD25429C0A18), UINT64_C(0x1C75B8AB2A351783),
+ UINT64_C(0x61E4CA2B905432F0), UINT64_C(0x80826A69EEA8F224) },
+ { UINT64_C(0x7FC33A6BEC52ABAD), UINT64_C(0x0BCCA3F0A65E4813),
+ UINT64_C(0x7AD8A132A527CEBE), UINT64_C(0xF0138950EAF22C7E),
+ UINT64_C(0x282D2437566718C1), UINT64_C(0x9DFCCB0DE2212559) } },
+ { { UINT64_C(0x1E93722758CE3B83), UINT64_C(0xBB280DFA3CB3FB36),
+ UINT64_C(0x57D0F3D2E2BE174A), UINT64_C(0x9BD51B99208ABE1E),
+ UINT64_C(0x3809AB50DE248024), UINT64_C(0xC29C6E2CA5BB7331) },
+ { UINT64_C(0x9944FD2E61124F05), UINT64_C(0x83CCBC4E9009E391),
+ UINT64_C(0x01628F059424A3CC), UINT64_C(0xD6A2F51DEA8E4344),
+ UINT64_C(0xDA3E1A3D4CEBC96E), UINT64_C(0x1FE6FB42E97809DC) } },
+ { { UINT64_C(0xA04482D2467D66E4), UINT64_C(0xCF1912934D78291D),
+ UINT64_C(0x8E0D4168482396F9), UINT64_C(0x7228E2D5D18F14D0),
+ UINT64_C(0x2F7E8D509C6A58FE), UINT64_C(0xE8CA780E373E5AEC) },
+ { UINT64_C(0x42AAD1D61B68E9F8), UINT64_C(0x58A6D7F569E2F8F4),
+ UINT64_C(0xD779ADFE31DA1BEA), UINT64_C(0x7D26540638C85A85),
+ UINT64_C(0x67E67195D44D3CDF), UINT64_C(0x17820A0BC5134ED7) } },
+ { { UINT64_C(0x019D6AC5D3021470), UINT64_C(0x25846B66780443D6),
+ UINT64_C(0xCE3C15ED55C97647), UINT64_C(0x3DC22D490E3FEB0F),
+ UINT64_C(0x2065B7CBA7DF26E4), UINT64_C(0xC8B00AE8187CEA1F) },
+ { UINT64_C(0x1A5284A0865DDED3), UINT64_C(0x293C164920C83DE2),
+ UINT64_C(0xAB178D26CCE851B3), UINT64_C(0x8E6DB10B404505FB),
+ UINT64_C(0xF6F57E7190C82033), UINT64_C(0x1D2A1C015977F16C) } },
+ { { UINT64_C(0xA39C89317C8906A4), UINT64_C(0xB6E7ECDD9E821EE6),
+ UINT64_C(0x2ECF8340F0DF4FE6), UINT64_C(0xD42F7DC953C14965),
+ UINT64_C(0x1AFB51A3E3BA8285), UINT64_C(0x6C07C4040A3305D1) },
+ { UINT64_C(0xDAB83288127FC1DA), UINT64_C(0xBC0A699B374C4B08),
+ UINT64_C(0x402A9BAB42EB20DD), UINT64_C(0xD7DD464F045A7A1C),
+ UINT64_C(0x5B3D0D6D36BEECC4), UINT64_C(0x475A3E756398A19D) } },
+ },
+ {
+ { { UINT64_C(0x31BDB48372876AE8), UINT64_C(0xE3325D98961ED1BF),
+ UINT64_C(0x18C042469B6FC64D), UINT64_C(0x0DCC15FA15786B8C),
+ UINT64_C(0x81ACDB068E63DA4A), UINT64_C(0xD3A4B643DADA70FB) },
+ { UINT64_C(0x46361AFEDEA424EB), UINT64_C(0xDC2D2CAE89B92970),
+ UINT64_C(0xF389B61B615694E6), UINT64_C(0x7036DEF1872951D2),
+ UINT64_C(0x40FD3BDAD93BADC7), UINT64_C(0x45AB6321380A68D3) } },
+ { { UINT64_C(0x23C1F74481A2703A), UINT64_C(0x1A5D075CB9859136),
+ UINT64_C(0xA4F82C9D5AFD1BFD), UINT64_C(0xA3D1E9A4F89D76FE),
+ UINT64_C(0x964F705075702F80), UINT64_C(0x182BF349F56C089D) },
+ { UINT64_C(0xE205FA8FBE0DA6E1), UINT64_C(0x32905EB90A40F8F3),
+ UINT64_C(0x331A1004356D4395), UINT64_C(0x58B78901FDBBDFDE),
+ UINT64_C(0xA52A15979BA00E71), UINT64_C(0xE0092E1F55497A30) } },
+ { { UINT64_C(0x5562A85670EE8F39), UINT64_C(0x86B0C11764E52A9C),
+ UINT64_C(0xC19F317409C75B8C), UINT64_C(0x21C7CC3124923F80),
+ UINT64_C(0xE63FE47F8F5B291E), UINT64_C(0x3D6D3C050DC08B05) },
+ { UINT64_C(0x58AE455EEE0C39A1), UINT64_C(0x78BEA4310AD97942),
+ UINT64_C(0x42C7C97F3EE3989C), UINT64_C(0xC1B03AF5F38759AE),
+ UINT64_C(0x1A673C75BCF46899), UINT64_C(0x4831B7D38D508C7D) } },
+ { { UINT64_C(0x76512D1BC552E354), UINT64_C(0x2B7EB6DF273020FD),
+ UINT64_C(0xD1C73AA8025A5F25), UINT64_C(0x2ABA19295CBD2A40),
+ UINT64_C(0xB53CADC3C88D61C6), UINT64_C(0x7E66A95E098290F3) },
+ { UINT64_C(0x72800ECBAF4C5073), UINT64_C(0x81F2725E9DC63FAF),
+ UINT64_C(0x14BF92A7282BA9D1), UINT64_C(0x90629672BD5F1BB2),
+ UINT64_C(0x362F68EBA97C6C96), UINT64_C(0xB1D3BB8B7EA9D601) } },
+ { { UINT64_C(0x73878F7FA9C94429), UINT64_C(0xB35C3BC8456CA6D8),
+ UINT64_C(0xD96F0B3CF721923A), UINT64_C(0x28D8F06CE6D44FA1),
+ UINT64_C(0x94EFDCDCD5CD671A), UINT64_C(0x0299AB933F97D481) },
+ { UINT64_C(0xB7CED6EA2FD1D324), UINT64_C(0xBD6832087E932EC2),
+ UINT64_C(0x24ED31FBCB755A6E), UINT64_C(0xA636098EE48781D2),
+ UINT64_C(0x8687C63CF0A4F297), UINT64_C(0xBB52344007478526) } },
+ { { UINT64_C(0x2E5F741934124B56), UINT64_C(0x1F223AE14B3F02CA),
+ UINT64_C(0x6345B427E8336C7E), UINT64_C(0x92123E16F5D0E3D0),
+ UINT64_C(0xDAF0D14D45E79F3A), UINT64_C(0x6ACA67656F3BD0C6) },
+ { UINT64_C(0xF6169FAB403813F4), UINT64_C(0x31DC39C0334A4C59),
+ UINT64_C(0x74C46753D589866D), UINT64_C(0x5741511D984C6A5D),
+ UINT64_C(0xF263128797FED2D3), UINT64_C(0x5687CA1B11614886) } },
+ { { UINT64_C(0x076D902A33836D4B), UINT64_C(0xEC6C5C4324AFB557),
+ UINT64_C(0xA0FE2D1CA0516A0F), UINT64_C(0x6FB8D73700D22ECC),
+ UINT64_C(0xF1DE9077DAF1D7B3), UINT64_C(0xE4695F77D4C0C1EB) },
+ { UINT64_C(0x5F0FD8A8B4375573), UINT64_C(0x762383595E50944F),
+ UINT64_C(0x65EA2F28635CD76F), UINT64_C(0x0854776925FDE7B0),
+ UINT64_C(0xB2345A2E51944304), UINT64_C(0x86EFA2F7A16C980D) } },
+ { { UINT64_C(0x4CCBE2D0BF4D1D63), UINT64_C(0x32E33401397366D5),
+ UINT64_C(0xC83AFDDE71BDA2CE), UINT64_C(0x8DACE2AC478ED9E6),
+ UINT64_C(0x3AC6A559763FDD9E), UINT64_C(0x0FFDB04CB398558F) },
+ { UINT64_C(0x6C1B99B2AFB9D6B8), UINT64_C(0x572BA39C27F815DD),
+ UINT64_C(0x9DE73EE70DBCF842), UINT64_C(0x2A3ED58929267B88),
+ UINT64_C(0xD46A7FD315EBBBB3), UINT64_C(0xD1D01863E29400C7) } },
+ { { UINT64_C(0x8FB101D1E1F89EC5), UINT64_C(0xB87A1F53F8508042),
+ UINT64_C(0x28C8DB240ED7BEEF), UINT64_C(0x3940F845ACE8660A),
+ UINT64_C(0x4EACB619C6D453FD), UINT64_C(0x2E044C982BAD6160) },
+ { UINT64_C(0x8792854880B16C02), UINT64_C(0xF0D4BEB3C0A9EB64),
+ UINT64_C(0xD785B4AFC183C195), UINT64_C(0x23AAB0E65E6C46EA),
+ UINT64_C(0x30F7E104A930FECA), UINT64_C(0x6A1A7B8BD55C10FB) } },
+ { { UINT64_C(0xDA74EAEBDBFED1AA), UINT64_C(0xC8A59223DF0B025C),
+ UINT64_C(0x7EF7DC85D5B627F7), UINT64_C(0x02A13AE1197D7624),
+ UINT64_C(0x119E9BE12F785A9B), UINT64_C(0xC0B7572F00D6B219) },
+ { UINT64_C(0x9B1E51266D4CAF30), UINT64_C(0xA16A51170A840BD1),
+ UINT64_C(0x5BE17B910E9CCF43), UINT64_C(0x5BDBEDDD69CF2C9C),
+ UINT64_C(0x9FFBFBCF4CF4F289), UINT64_C(0xE1A621836C355CE9) } },
+ { { UINT64_C(0x056199D9A7B2FCCF), UINT64_C(0x51F2E7B6CE1D784E),
+ UINT64_C(0xA1D09C47339E2FF0), UINT64_C(0xC8E64890B836D0A9),
+ UINT64_C(0x2F781DCBC0D07EBE), UINT64_C(0x5CF3C2AD3ACF934C) },
+ { UINT64_C(0xE55DB190A17E26AE), UINT64_C(0xC9C61E1F91245513),
+ UINT64_C(0x83D7E6CF61998C15), UINT64_C(0x4DB33C85E41D38E3),
+ UINT64_C(0x74D5F91DC2FEE43D), UINT64_C(0x7EBBDB4536BBC826) } },
+ { { UINT64_C(0xE20EC7E9CB655A9D), UINT64_C(0x4977EB925C47D421),
+ UINT64_C(0xA237E12C3B9D72FA), UINT64_C(0xCAAEDBC1CBF7B145),
+ UINT64_C(0x5200F5B23B77AAA3), UINT64_C(0x32EDED55BDBE5380) },
+ { UINT64_C(0x74E38A40E7C9B80A), UINT64_C(0x3A3F0CF8AB6DE911),
+ UINT64_C(0x56DCDD7AAD16AAF0), UINT64_C(0x3D2924498E861D5E),
+ UINT64_C(0xD6C61878985733E2), UINT64_C(0x2401FE7D6AA6CD5B) } },
+ { { UINT64_C(0xABB3DC75B42E3686), UINT64_C(0xAE712419B4C57E61),
+ UINT64_C(0x2C565F72B21B009B), UINT64_C(0xA5F1DA2E710C3699),
+ UINT64_C(0x771099A0A5EBA59A), UINT64_C(0x4DA88F4AC10017A0) },
+ { UINT64_C(0x987FFFD31927B56D), UINT64_C(0xB98CB8ECC4E33478),
+ UINT64_C(0xB224A971C2248166), UINT64_C(0x5470F554DE1DC794),
+ UINT64_C(0xD747CC24E31FF983), UINT64_C(0xB91745E9B5B22DAE) } },
+ { { UINT64_C(0x6CCBFED072F34420), UINT64_C(0x95045E4DA53039D2),
+ UINT64_C(0x3B6C11545A793944), UINT64_C(0xAA114145DDB6B799),
+ UINT64_C(0xABC15CA4252B7637), UINT64_C(0x5745A35BA5744634) },
+ { UINT64_C(0x05DC6BDEDA596FC0), UINT64_C(0xCD52C18CA8020881),
+ UINT64_C(0x03FA9F47D296BAD0), UINT64_C(0xD8E2C1297268E139),
+ UINT64_C(0x58C1A98D9EC450B0), UINT64_C(0x909638DADE48B20D) } },
+ { { UINT64_C(0x7AFC30D49B7F8311), UINT64_C(0x82A0042242368EA3),
+ UINT64_C(0xBFF951986F5F9865), UINT64_C(0x9B24F612FC0A070F),
+ UINT64_C(0x22C06CF2620F489D), UINT64_C(0x3C7ED052780F7DBB) },
+ { UINT64_C(0xDB87AB1834DAFE9B), UINT64_C(0x20C03B409C4BBCA1),
+ UINT64_C(0x5D718CF059A42341), UINT64_C(0x9863170669E84538),
+ UINT64_C(0x5557192BD27D64E1), UINT64_C(0x08B4EC52DA822766) } },
+ { { UINT64_C(0xB2D986F6D66C1A59), UINT64_C(0x927DEB1678E0E423),
+ UINT64_C(0x9E673CDE49C3DEDC), UINT64_C(0xFA362D84F7ECB6CF),
+ UINT64_C(0x078E5F401BA17340), UINT64_C(0x934CA5D11F4E489C) },
+ { UINT64_C(0xC03C073164EEF493), UINT64_C(0x631A353BD7931A7E),
+ UINT64_C(0x8E7CC3BB65DD74F1), UINT64_C(0xD55864C5702676A5),
+ UINT64_C(0x6D306AC4439F04BD), UINT64_C(0x58544F672BAFED57) } },
+ },
+ {
+ { { UINT64_C(0xB083BA6AEC074AEA), UINT64_C(0x46FAC5EF7F0B505B),
+ UINT64_C(0x95367A21FC82DC03), UINT64_C(0x227BE26A9D3679D8),
+ UINT64_C(0xC70F6D6C7E9724C0), UINT64_C(0xCD68C757F9EBEC0F) },
+ { UINT64_C(0x29DDE03E8FF321B2), UINT64_C(0xF84AD7BB031939DC),
+ UINT64_C(0xDAF590C90F602F4B), UINT64_C(0x17C5288849722BC4),
+ UINT64_C(0xA8DF99F0089B22B6), UINT64_C(0xC21BC5D4E59B9B90) } },
+ { { UINT64_C(0x4936C6A08A31973F), UINT64_C(0x54D442FA83B8C205),
+ UINT64_C(0x03AEE8B45714F2C6), UINT64_C(0x139BD6923F5AC25A),
+ UINT64_C(0x6A2E42BAB5B33794), UINT64_C(0x50FA11643FF7BBA9) },
+ { UINT64_C(0xB61D8643F7E2C099), UINT64_C(0x2366C993BD5C6637),
+ UINT64_C(0x62110E1472EB77FA), UINT64_C(0x3D5B96F13B99C635),
+ UINT64_C(0x956ECF64F674C9F2), UINT64_C(0xC56F7E51EF2BA250) } },
+ { { UINT64_C(0x246FFCB6FF602C1B), UINT64_C(0x1E1A1D746E1258E0),
+ UINT64_C(0xB4B43AE2250E6676), UINT64_C(0x95C1B5F0924CE5FA),
+ UINT64_C(0x2555795BEBD8C776), UINT64_C(0x4C1E03DCACD9D9D0) },
+ { UINT64_C(0xE1D74AA69CE90C61), UINT64_C(0xA88C0769A9C4B9F9),
+ UINT64_C(0xDF74DF2795AF56DE), UINT64_C(0x24B10C5FB331B6F4),
+ UINT64_C(0xB0A6DF9A6559E137), UINT64_C(0x6ACC1B8FC06637F2) } },
+ { { UINT64_C(0xBD8C086834B4E381), UINT64_C(0x278CACC730DFF271),
+ UINT64_C(0x87ED12DE02459389), UINT64_C(0x3F7D98FFDEF840B6),
+ UINT64_C(0x71EEE0CB5F0B56E1), UINT64_C(0x462B5C9BD8D9BE87) },
+ { UINT64_C(0xE6B50B5A98094C0F), UINT64_C(0x26F3B274508C67CE),
+ UINT64_C(0x418B1BD17CB1F992), UINT64_C(0x607818ED4FF11827),
+ UINT64_C(0xE630D93A9B042C63), UINT64_C(0x38B9EFF38C779AE3) } },
+ { { UINT64_C(0xE8767D36729C5431), UINT64_C(0xA8BD07C0BB94642C),
+ UINT64_C(0x0C11FC8E58F2E5B2), UINT64_C(0xD8912D48547533FE),
+ UINT64_C(0xAAE14F5E230D91FB), UINT64_C(0xC122051A676DFBA0) },
+ { UINT64_C(0x9ED4501F5EA93078), UINT64_C(0x2758515CBD4BEE0A),
+ UINT64_C(0x97733C6C94D21F52), UINT64_C(0x139BCD6D4AD306A2),
+ UINT64_C(0x0AAECBDC298123CC), UINT64_C(0x102B8A311CB7C7C9) } },
+ { { UINT64_C(0x22A28E59FAF46675), UINT64_C(0x1075730810A31E7D),
+ UINT64_C(0xC7EEAC842B4C2F4F), UINT64_C(0xBA370148B5EF5184),
+ UINT64_C(0x4A5A28668732E055), UINT64_C(0x14B8DCDCB887C36F) },
+ { UINT64_C(0xDBA8C85C433F093D), UINT64_C(0x73DF549D1C9A201C),
+ UINT64_C(0x69AA0D7B70F927D8), UINT64_C(0xFA3A8685D7D2493A),
+ UINT64_C(0x6F48A2550A7F4013), UINT64_C(0xD20C8BF9DD393067) } },
+ { { UINT64_C(0x4EC874EA81625E78), UINT64_C(0x8B8D8B5A3FBE9267),
+ UINT64_C(0xA3D9D1649421EC2F), UINT64_C(0x490E92D9880EA295),
+ UINT64_C(0x745D1EDCD8F3B6DA), UINT64_C(0x0116628B8F18BA03) },
+ { UINT64_C(0x0FF6BCE0834EADCE), UINT64_C(0x464697F2000827F7),
+ UINT64_C(0x08DCCF84498D724E), UINT64_C(0x7896D3651E88304C),
+ UINT64_C(0xE63EBCCE135E3622), UINT64_C(0xFB942E8EDC007521) } },
+ { { UINT64_C(0xBB155A66A3688621), UINT64_C(0xED2FD7CDF91B52A3),
+ UINT64_C(0x52798F5DEA20CB88), UINT64_C(0x069CE105373F7DD8),
+ UINT64_C(0xF9392EC78CA78F6B), UINT64_C(0xB3013E256B335169) },
+ { UINT64_C(0x1D92F8006B11715C), UINT64_C(0xADD4050EFF9DC464),
+ UINT64_C(0x2AC226598465B84A), UINT64_C(0x2729D646465B2BD6),
+ UINT64_C(0x6202344AE4EFF9DD), UINT64_C(0x51F3198FCD9B90B9) } },
+ { { UINT64_C(0x17CE54EFE5F0AE1D), UINT64_C(0x984E8204B09852AF),
+ UINT64_C(0x3365B37AC4B27A71), UINT64_C(0x720E3152A00E0A9C),
+ UINT64_C(0x3692F70D925BD606), UINT64_C(0xBE6E699D7BC7E9AB) },
+ { UINT64_C(0xD75C041F4C89A3C0), UINT64_C(0x8B9F592D8DC100C0),
+ UINT64_C(0x30750F3AAD228F71), UINT64_C(0x1B9ECF84E8B17A11),
+ UINT64_C(0xDF2025620FBFA8A2), UINT64_C(0x45C811FCAA1B6D67) } },
+ { { UINT64_C(0xEC5B84B71A5151F8), UINT64_C(0x118E59E8550AB2D2),
+ UINT64_C(0x2CCDEDA4049BD735), UINT64_C(0xC99CBA719CD62F0F),
+ UINT64_C(0x69B8040A62C9E4F8), UINT64_C(0x16F1A31A110B8283) },
+ { UINT64_C(0x53F6380298E908A3), UINT64_C(0x308CB6EFD862F9DE),
+ UINT64_C(0xE185DAD8A521A95A), UINT64_C(0x4D8FE9A4097F75CA),
+ UINT64_C(0xD1ECCEC71CA07D53), UINT64_C(0x13DFA1DC0DB07E83) } },
+ { { UINT64_C(0xDDAF9DC60F591A76), UINT64_C(0xE1A6D7CC1685F412),
+ UINT64_C(0x153DE557002B6E8D), UINT64_C(0x730C38BCC6DA37D9),
+ UINT64_C(0xAE1806220914B597), UINT64_C(0x84F98103DD8C3A0A) },
+ { UINT64_C(0x369C53988DA205B0), UINT64_C(0xA3D95B813888A720),
+ UINT64_C(0x1F3F8BBFE10E2806), UINT64_C(0x48663DF54530D1F3),
+ UINT64_C(0x320523B43E377713), UINT64_C(0xE8B1A575C7894814) } },
+ { { UINT64_C(0x330668712EE8EA07), UINT64_C(0xC6FB4EC560DA199D),
+ UINT64_C(0x33231860F4370A05), UINT64_C(0x7ABECE72C6DE4E26),
+ UINT64_C(0xDE8D4BD8EBDECE7A), UINT64_C(0xC90EE6571CBE93C7) },
+ { UINT64_C(0x0246751B85AC2509), UINT64_C(0xD0EF142C30380245),
+ UINT64_C(0x086DF9C47C76E39C), UINT64_C(0x68F1304FB789FB56),
+ UINT64_C(0x23E4CB98A5E4BD56), UINT64_C(0x69A4C63C64663DCA) } },
+ { { UINT64_C(0x6C72B6AF7CB34E63), UINT64_C(0x073C40CD6DFC23FE),
+ UINT64_C(0xBDEEE7A1C936693A), UINT64_C(0xBC858E806EFAD378),
+ UINT64_C(0xEAD719FFF5BE55D4), UINT64_C(0xC8C3238F04552F5F) },
+ { UINT64_C(0x0952C068928D5784), UINT64_C(0x89DFDF2294C58F2B),
+ UINT64_C(0x332DEDF367502C50), UINT64_C(0x3ED2FA3AAC0BE258),
+ UINT64_C(0xAEDC9B8A7C5C8244), UINT64_C(0x43A761B9DC0EA34F) } },
+ { { UINT64_C(0x8FD683A2CC5E21A5), UINT64_C(0x5F444C6EFBA2BB68),
+ UINT64_C(0x709ACD0EAF05586D), UINT64_C(0x8EFA54D2DE8FB348),
+ UINT64_C(0x35276B7134CFE29E), UINT64_C(0x77A06FCD941EAC8C) },
+ { UINT64_C(0x5815792D928322DD), UINT64_C(0x82FF356B67F7CB59),
+ UINT64_C(0x71E40A78304980F4), UINT64_C(0xC8645C273667D021),
+ UINT64_C(0xE785741CAEBAE28F), UINT64_C(0xB2C1BC7553ECAC37) } },
+ { { UINT64_C(0x633EB24F1D0A74DB), UINT64_C(0xF1F55E56FA752512),
+ UINT64_C(0x75FECA688EFE11DE), UINT64_C(0xC80FD91CE6BF19EC),
+ UINT64_C(0xAD0BAFEC2A14C908), UINT64_C(0x4E1C4ACAADE4031F) },
+ { UINT64_C(0x463A815B1EB1549A), UINT64_C(0x5AD4253C668F1298),
+ UINT64_C(0x5CB3866238A37151), UINT64_C(0x34BB1CCFAFF16B96),
+ UINT64_C(0xDCA93B13EE731AB0), UINT64_C(0x9F3CE5CC9BE01A0B) } },
+ { { UINT64_C(0x75DB5723A110D331), UINT64_C(0x67C66F6A7123D89F),
+ UINT64_C(0x27ABBD4B4009D570), UINT64_C(0xACDA6F84C73451BC),
+ UINT64_C(0xE4B9A23905575ACF), UINT64_C(0x3C2DB7EFAB2D3D6C) },
+ { UINT64_C(0x01CCDD0829115145), UINT64_C(0x9E0602FE57B5814A),
+ UINT64_C(0x679B35C287862838), UINT64_C(0x0277DC4C38AD598D),
+ UINT64_C(0xEF80A2136D896DD4), UINT64_C(0xC8812213E7B9047B) } },
+ },
+ {
+ { { UINT64_C(0xAC6DBDF6EDC9CE62), UINT64_C(0xA58F5B440F9C006E),
+ UINT64_C(0x16694DE3DC28E1B0), UINT64_C(0x2D039CF2A6647711),
+ UINT64_C(0xA13BBE6FC5B08B4B), UINT64_C(0xE44DA93010EBD8CE) },
+ { UINT64_C(0xCD47208719649A16), UINT64_C(0xE18F4E44683E5DF1),
+ UINT64_C(0xB3F66303929BFA28), UINT64_C(0x7C378E43818249BF),
+ UINT64_C(0x76068C80847F7CD9), UINT64_C(0xEE3DB6D1987EBA16) } },
+ { { UINT64_C(0xCBBD8576C42A2F52), UINT64_C(0x9ACC6F709D2B06BB),
+ UINT64_C(0xE5CB56202E6B72A4), UINT64_C(0x5738EA0E7C024443),
+ UINT64_C(0x8ED06170B55368F3), UINT64_C(0xE54C99BB1AEED44F) },
+ { UINT64_C(0x3D90A6B2E2E0D8B2), UINT64_C(0x21718977CF7B2856),
+ UINT64_C(0x089093DCC5612AEC), UINT64_C(0xC272EF6F99C1BACC),
+ UINT64_C(0x47DB3B43DC43EAAD), UINT64_C(0x730F30E40832D891) } },
+ { { UINT64_C(0x9FFE55630C7FECDB), UINT64_C(0x55CC67B6F88101E5),
+ UINT64_C(0x3039F981CBEFA3C7), UINT64_C(0x2AB06883667BFD64),
+ UINT64_C(0x9007A2574340E3DF), UINT64_C(0x1AC3F3FA5A3A49CA) },
+ { UINT64_C(0x9C7BE629C97E20FD), UINT64_C(0xF61823D3A3DAE003),
+ UINT64_C(0xFFE7FF39E7380DBA), UINT64_C(0x620BB9B59FACC3B8),
+ UINT64_C(0x2DDCB8CD31AE422C), UINT64_C(0x1DE3BCFAD12C3C43) } },
+ { { UINT64_C(0x8C074946D6E0F9A9), UINT64_C(0x662FA99551C3B05B),
+ UINT64_C(0x6CDAE96904BB2048), UINT64_C(0x6DEC9594D6DC8B60),
+ UINT64_C(0x8D26586954438BBC), UINT64_C(0x88E983E31B0E95A5) },
+ { UINT64_C(0x8189F11460CBF838), UINT64_C(0x77190697771DC46B),
+ UINT64_C(0x775775A227F8EC1A), UINT64_C(0x7A125240607E3739),
+ UINT64_C(0xAFAE84E74F793E4E), UINT64_C(0x44FA17F35BF5BAF4) } },
+ { { UINT64_C(0xA21E69A5D03AC439), UINT64_C(0x2069C5FC88AA8094),
+ UINT64_C(0xB041EEA78C08F206), UINT64_C(0x55B9D4613D65B8ED),
+ UINT64_C(0x951EA25CD392C7C4), UINT64_C(0x4B9A1CEC9D166232) },
+ { UINT64_C(0xC184FCD8FCF931A4), UINT64_C(0xBA59AD44063AD374),
+ UINT64_C(0x1868AD2A1AA9796F), UINT64_C(0x38A34018DFF29832),
+ UINT64_C(0x01FC880103DF8070), UINT64_C(0x1282CCE048DD334A) } },
+ { { UINT64_C(0x76AA955726D8503C), UINT64_C(0xBE962B636BC3E3D0),
+ UINT64_C(0xF5CA93E597DE8841), UINT64_C(0x1561B05EAF3F2C16),
+ UINT64_C(0x34BE00AAD34BFF98), UINT64_C(0xEA21E6E9D23D2925) },
+ { UINT64_C(0x55713230394C3AFB), UINT64_C(0xEAF0529BD6C8BECA),
+ UINT64_C(0xFF38A743202B9A11), UINT64_C(0xA13E39FC6D3A398B),
+ UINT64_C(0x8CBD644B86E2615A), UINT64_C(0x92063988191057EC) } },
+ { { UINT64_C(0x787835CE13F89146), UINT64_C(0x7FCD42CC69446C3F),
+ UINT64_C(0x0DA2AA98840E679D), UINT64_C(0x44F2052318779A1B),
+ UINT64_C(0xE3A3B34FEFBF5935), UINT64_C(0xA5D2CFD0B9947B70) },
+ { UINT64_C(0xAE2AF4EF27F4E16F), UINT64_C(0xA7FA70D2B9D21322),
+ UINT64_C(0x68084919B3FD566B), UINT64_C(0xF04D71C8D7AAD6AB),
+ UINT64_C(0xDBEA21E410BC4260), UINT64_C(0xAA7DC6658D949B42) } },
+ { { UINT64_C(0xD8E958A06CCB8213), UINT64_C(0x118D9DB991900B54),
+ UINT64_C(0x09BB9D4985E8CED6), UINT64_C(0x410E9FB524019281),
+ UINT64_C(0x3B31B4E16D74C86E), UINT64_C(0x52BC0252020BB77D) },
+ { UINT64_C(0x5616A26F27092CE4), UINT64_C(0x67774DBCA08F65CD),
+ UINT64_C(0x560AD494C08BD569), UINT64_C(0xBE26DA36AD498783),
+ UINT64_C(0x0276C8AB7F019C91), UINT64_C(0x09843ADA5248266E) } },
+ { { UINT64_C(0xA0AE88A77D963CF2), UINT64_C(0x91EF8986D0E84920),
+ UINT64_C(0xC7EFE344F8C58104), UINT64_C(0x0A25D9FDECA20773),
+ UINT64_C(0x9D989FAA00D8F1D5), UINT64_C(0x4204C8CEC8B06264) },
+ { UINT64_C(0x717C12E0BE1A2796), UINT64_C(0x1FA4BA8CC190C728),
+ UINT64_C(0xA245CA8D8C8A59BA), UINT64_C(0xE3C374757672B935),
+ UINT64_C(0x083D5E402E4D6375), UINT64_C(0x0B8D5AB35455E16E) } },
+ { { UINT64_C(0x1DB17DBFEED765D4), UINT64_C(0xBBC9B1BEA5DDB965),
+ UINT64_C(0x1948F76DDFC12ABC), UINT64_C(0x2C2714E5134EF489),
+ UINT64_C(0x60CE2EE8741C600F), UINT64_C(0x32396F22F80E6E63) },
+ { UINT64_C(0x421DAC7522537F59), UINT64_C(0x58FB73C649475DF5),
+ UINT64_C(0x0ABF28856F18F1C7), UINT64_C(0x364744689A398D16),
+ UINT64_C(0x87A661A7BF673B87), UINT64_C(0x3E80698F73819E17) } },
+ { { UINT64_C(0xDFE4979353784CC4), UINT64_C(0x4280EAB0486D508F),
+ UINT64_C(0x119593FFE534F5A4), UINT64_C(0x98AEFADD9F63242F),
+ UINT64_C(0x9AE6A24AC4829CAE), UINT64_C(0xF2373CA558E8BA80) },
+ { UINT64_C(0x4017AF7E51765FB3), UINT64_C(0xD1E40F7CAF4AEC4B),
+ UINT64_C(0x87372C7A0898E3BC), UINT64_C(0x688982B285452CA9),
+ UINT64_C(0x71E0B4BFB1E50BCA), UINT64_C(0x21FD2DBFF70E714A) } },
+ { { UINT64_C(0xEE6E8820FB78DDAC), UINT64_C(0x0BAED29C063892CD),
+ UINT64_C(0x5F33049C28C0588D), UINT64_C(0x90C2515E18DBC432),
+ UINT64_C(0xB8A1B1433B4CB0BD), UINT64_C(0x0AB5C0C968103043) },
+ { UINT64_C(0xF3788FA04005EC40), UINT64_C(0x82571C99039EE115),
+ UINT64_C(0xEE8FCED593260BED), UINT64_C(0x5A9BAF7910836D18),
+ UINT64_C(0x7C258B09C46AA4F6), UINT64_C(0x46ECC5E837F53D31) } },
+ { { UINT64_C(0xFA32C0DCBFE0DD98), UINT64_C(0x66EFAFC4962B1066),
+ UINT64_C(0xBA81D33E64BDF5EB), UINT64_C(0x36C28536FC7FC512),
+ UINT64_C(0x0C95176BE0B4FA97), UINT64_C(0x47DDE29B3B9BC64A) },
+ { UINT64_C(0x08D986FD5C173B36), UINT64_C(0x46D84B526CF3F28C),
+ UINT64_C(0x6F6ED6C3F026BDB9), UINT64_C(0xAC90668B68206DC5),
+ UINT64_C(0xE8ED5D98ECBE4E70), UINT64_C(0xCFFF61DDDC1A6974) } },
+ { { UINT64_C(0xFF5C3A2977B1A5C1), UINT64_C(0x10C27E4A0DDF995D),
+ UINT64_C(0xCB745F77E23363E3), UINT64_C(0xD765DF6F32F399A3),
+ UINT64_C(0xF0CA0C2F8A99E109), UINT64_C(0xC3A6BFB71E025CA0) },
+ { UINT64_C(0x830B2C0A4F9D9FA5), UINT64_C(0xAE914CACBD1A84E5),
+ UINT64_C(0x30B35ED8A4FEBCC1), UINT64_C(0xCB902B4684CFBF2E),
+ UINT64_C(0x0BD4762825FC6375), UINT64_C(0xA858A53C85509D04) } },
+ { { UINT64_C(0x8B995D0C552E0A3F), UINT64_C(0xEDBD4E9417BE9FF7),
+ UINT64_C(0x3432E83995085178), UINT64_C(0x0FE5C18180C256F5),
+ UINT64_C(0x05A64EA8EBF9597C), UINT64_C(0x6ED44BB13F80371F) },
+ { UINT64_C(0x6A29A05EFE4C12EE), UINT64_C(0x3E436A43E0BB83B3),
+ UINT64_C(0x38365D9A74D72921), UINT64_C(0x3F5EE823C38E1ED7),
+ UINT64_C(0x09A53213E8FA063F), UINT64_C(0x1E7FE47AB435E713) } },
+ { { UINT64_C(0xE4D9BC94FDDD17F3), UINT64_C(0xC74B8FEDC1016C20),
+ UINT64_C(0x095DE39BB49C060E), UINT64_C(0xDBCC67958AC0DF00),
+ UINT64_C(0x4CF6BAEB1C34F4DF), UINT64_C(0x72C55C21E8390170) },
+ { UINT64_C(0x4F17BFD2F6C48E79), UINT64_C(0x18BF4DA0017A80BA),
+ UINT64_C(0xCF51D829BCF4B138), UINT64_C(0x598AEE5FF48F8B0D),
+ UINT64_C(0x83FAEE5620F10809), UINT64_C(0x4615D4DC779F0850) } },
+ },
+ {
+ { { UINT64_C(0x22313DEE5852B59B), UINT64_C(0x6F56C8E8B6A0B37F),
+ UINT64_C(0x43D6EEAEA76EC380), UINT64_C(0xA16551360275AD36),
+ UINT64_C(0xE5C1B65ADF095BDA), UINT64_C(0xBD1FFA8D367C44B0) },
+ { UINT64_C(0xE2B419C26B48AF2B), UINT64_C(0x57BBBD973DA194C8),
+ UINT64_C(0xB5FBE51FA2BAFF05), UINT64_C(0xA0594D706269B5D0),
+ UINT64_C(0x0B07B70523E8D667), UINT64_C(0xAE1976B563E016E7) } },
+ { { UINT64_C(0x2FDE4893FBECAAAE), UINT64_C(0x444346DE30332229),
+ UINT64_C(0x157B8A5B09456ED5), UINT64_C(0x73606A7925797C6C),
+ UINT64_C(0xA9D0F47C33C14C06), UINT64_C(0x7BC8962CFAF971CA) },
+ { UINT64_C(0x6E763C5165909DFD), UINT64_C(0x1BBBE41B14A9BF42),
+ UINT64_C(0xD95B7ECBC49E9EFC), UINT64_C(0x0C317927B38F2B59),
+ UINT64_C(0x97912B53B3C397DB), UINT64_C(0xCB3879AA45C7ABC7) } },
+ { { UINT64_C(0xCD81BDCF24359B81), UINT64_C(0x6FD326E2DB4C321C),
+ UINT64_C(0x4CB0228BF8EBE39C), UINT64_C(0x496A9DCEB2CDD852),
+ UINT64_C(0x0F115A1AD0E9B3AF), UINT64_C(0xAA08BF36D8EEEF8A) },
+ { UINT64_C(0x5232A51506E5E739), UINT64_C(0x21FAE9D58407A551),
+ UINT64_C(0x289D18B08994B4E8), UINT64_C(0xB4E346A809097A52),
+ UINT64_C(0xC641510F324621D0), UINT64_C(0xC567FD4A95A41AB8) } },
+ { { UINT64_C(0x261578C7D57C8DE9), UINT64_C(0xB9BC491F3836C5C8),
+ UINT64_C(0x993266B414C8038F), UINT64_C(0xBACAD755FAA7CC39),
+ UINT64_C(0x418C4DEFD69B7E27), UINT64_C(0x53FDC5CDAE751533) },
+ { UINT64_C(0x6F3BD329C3EEA63A), UINT64_C(0xA7A22091E53DD29E),
+ UINT64_C(0xB7164F73DC4C54EC), UINT64_C(0xCA66290D44D3D74E),
+ UINT64_C(0xF77C62424C9EA511), UINT64_C(0x34337F551F714C49) } },
+ { { UINT64_C(0x5ED2B216A64B6C4B), UINT64_C(0x1C38794F3AAE640D),
+ UINT64_C(0x30BBAEE08905794F), UINT64_C(0x0D9EE41EC8699CFB),
+ UINT64_C(0xAF38DAF2CF7B7C29), UINT64_C(0x0D6A05CA43E53513) },
+ { UINT64_C(0xBE96C6442606AB56), UINT64_C(0x13E7A072E9EB9734),
+ UINT64_C(0xF96694455FF50CD7), UINT64_C(0x68EF26B547DA6F1D),
+ UINT64_C(0xF002873823687CB7), UINT64_C(0x5ED9C8766217C1CE) } },
+ { { UINT64_C(0x423BA5130A3A9691), UINT64_C(0xF421B1E7B3179296),
+ UINT64_C(0x6B51BCDB1A871E1B), UINT64_C(0x6E3BB5B5464E4300),
+ UINT64_C(0x24171E2EFC6C54CC), UINT64_C(0xA9DFA947D3E58DC2) },
+ { UINT64_C(0x175B33099DE9CFA7), UINT64_C(0x707B25292D1015DA),
+ UINT64_C(0xCBB95F17993EA65A), UINT64_C(0x935150630447450D),
+ UINT64_C(0x0F47B2051B2753C9), UINT64_C(0x4A0BAB14E7D427CF) } },
+ { { UINT64_C(0xA39DEF39B5AA7CA1), UINT64_C(0x591CB173C47C33DF),
+ UINT64_C(0xA09DAC796BBAB872), UINT64_C(0x3EF9D7CF7208BA2F),
+ UINT64_C(0x3CC189317A0A34FC), UINT64_C(0xAE31C62BBCC3380F) },
+ { UINT64_C(0xD72A67940287C0B4), UINT64_C(0x3373382C68E334F1),
+ UINT64_C(0xD0310CA8BD20C6A6), UINT64_C(0xA2734B8742C033FD),
+ UINT64_C(0xA5D390F18DCE4509), UINT64_C(0xFC84E74B3E1AFCB5) } },
+ { { UINT64_C(0xB028334DF2CD8A9C), UINT64_C(0xB8719291570F76F6),
+ UINT64_C(0x662A386E01065A2D), UINT64_C(0xDF1634CB53D940AE),
+ UINT64_C(0x625A7B838F5B41F9), UINT64_C(0xA033E4FEEE6AA1B4) },
+ { UINT64_C(0x51E9D4631E42BABB), UINT64_C(0x660BC2E40D388468),
+ UINT64_C(0x3F702189FCBB114A), UINT64_C(0x6B46FE35B414CA78),
+ UINT64_C(0x328F6CF24A57316B), UINT64_C(0x917423B5381AD156) } },
+ { { UINT64_C(0xAC19306E5373A607), UINT64_C(0x471DF8E3191D0969),
+ UINT64_C(0x380ADE35B9720D83), UINT64_C(0x7423FDF548F1FD5C),
+ UINT64_C(0x8B090C9F49CABC95), UINT64_C(0xB768E8CDC9842F2F) },
+ { UINT64_C(0x399F456DE56162D6), UINT64_C(0xBB6BA2404F326791),
+ UINT64_C(0x8F4FBA3B342590BE), UINT64_C(0x053986B93DFB6B3E),
+ UINT64_C(0xBB6739F1190C7425), UINT64_C(0x32D4A55332F7E95F) } },
+ { { UINT64_C(0x0205A0EC0DDBFB21), UINT64_C(0x3010327D33AC3407),
+ UINT64_C(0xCF2F4DB33348999B), UINT64_C(0x660DB9F41551604A),
+ UINT64_C(0xC346C69A5D38D335), UINT64_C(0x64AAB3D338882479) },
+ { UINT64_C(0xA096B5E76AE44403), UINT64_C(0x6B4C9571645F76CD),
+ UINT64_C(0x72E1CD5F4711120F), UINT64_C(0x93EC42ACF27CC3E1),
+ UINT64_C(0x2D18D004A72ABB12), UINT64_C(0x232E9568C9841A04) } },
+ { { UINT64_C(0xFF01DB223CC7F908), UINT64_C(0x9F214F8FD13CDD3B),
+ UINT64_C(0x38DADBB7E0B014B5), UINT64_C(0x2C548CCC94245C95),
+ UINT64_C(0x714BE331809AFCE3), UINT64_C(0xBCC644109BFE957E) },
+ { UINT64_C(0xC21C2D215B957F80), UINT64_C(0xBA2D4FDCBB8A4C42),
+ UINT64_C(0xFA6CD4AF74817CEC), UINT64_C(0x9E7FB523C528EAD6),
+ UINT64_C(0xAED781FF7714B10E), UINT64_C(0xB52BB59294F04455) } },
+ { { UINT64_C(0xA578BD69868CC68B), UINT64_C(0xA40FDC8D603F2C08),
+ UINT64_C(0x53D79BD12D81B042), UINT64_C(0x1B136AF3A7587EAB),
+ UINT64_C(0x1ED4F939868A16DB), UINT64_C(0x775A61FBD0B98273) },
+ { UINT64_C(0xBA5C12A6E56BEF8C), UINT64_C(0xF926CE52DDDC8595),
+ UINT64_C(0xA13F5C8F586FE1F8), UINT64_C(0xEAC9F7F2060DBB54),
+ UINT64_C(0x70C0AC3A51AF4342), UINT64_C(0xC16E303C79CDA450) } },
+ { { UINT64_C(0xD0DADD6C8113F4EA), UINT64_C(0xF14E392207BDF09F),
+ UINT64_C(0x3FE5E9C2AA7D877C), UINT64_C(0x9EA95C1948779264),
+ UINT64_C(0xE93F65A74FCB8344), UINT64_C(0x9F40837E76D925A4) },
+ { UINT64_C(0x0EA6DA3F8271FFC7), UINT64_C(0x557FA529CC8F9B19),
+ UINT64_C(0x2613DBF178E6DDFD), UINT64_C(0x7A7523B836B1E954),
+ UINT64_C(0x20EB3168406A87FB), UINT64_C(0x64C21C1403ABA56A) } },
+ { { UINT64_C(0xE86C9C2DC032DD5F), UINT64_C(0x158CEB8E86F16A21),
+ UINT64_C(0x0279FF5368326AF1), UINT64_C(0x1FFE2E2B59F12BA5),
+ UINT64_C(0xD75A46DB86826D45), UINT64_C(0xE19B48411E33E6AC) },
+ { UINT64_C(0x5F0CC5240E52991C), UINT64_C(0x645871F98B116286),
+ UINT64_C(0xAB3B4B1EFCAEC5D3), UINT64_C(0x994C8DF051D0F698),
+ UINT64_C(0x06F890AFE5D13040), UINT64_C(0x72D9DC235F96C7C2) } },
+ { { UINT64_C(0x7C018DEEE7886A80), UINT64_C(0xFA2093308786E4A3),
+ UINT64_C(0xCEC8E2A3A4415CA1), UINT64_C(0x5C736FC1CC83CC60),
+ UINT64_C(0xFEF9788CF00C259F), UINT64_C(0xED5C01CBDD29A6AD) },
+ { UINT64_C(0x87834A033E20825B), UINT64_C(0x13B1239D123F9358),
+ UINT64_C(0x7E8869D0FBC286C1), UINT64_C(0xC4AB5AA324CE8609),
+ UINT64_C(0x38716BEEB6349208), UINT64_C(0x0BDF4F99B322AE21) } },
+ { { UINT64_C(0x6B97A2BF53E3494B), UINT64_C(0xA8AA05C570F7A13E),
+ UINT64_C(0x209709C2F1305B51), UINT64_C(0x57B31888DAB76F2C),
+ UINT64_C(0x75B2ECD7AA2A406A), UINT64_C(0x88801A00A35374A4) },
+ { UINT64_C(0xE1458D1C45C0471B), UINT64_C(0x5760E306322C1AB0),
+ UINT64_C(0x789A0AF1AD6AB0A6), UINT64_C(0x74398DE1F458B9CE),
+ UINT64_C(0x1652FF9F32E0C65F), UINT64_C(0xFAF1F9D5FFFB3A52) } },
+ },
+ {
+ { { UINT64_C(0xA05C751CD1D1B007), UINT64_C(0x016C213B0213E478),
+ UINT64_C(0x9C56E26CF4C98FEE), UINT64_C(0x6084F8B9E7B3A7C7),
+ UINT64_C(0xA0B042F6DECC1646), UINT64_C(0x4A6F3C1AFBF3A0BC) },
+ { UINT64_C(0x94524C2C51C9F909), UINT64_C(0xF3B3AD403A6D3748),
+ UINT64_C(0x18792D6E7CE1F9F5), UINT64_C(0x8EBC2FD7FC0C34FA),
+ UINT64_C(0x032A9F41780A1693), UINT64_C(0x34F9801E56A60019) } },
+ { { UINT64_C(0xB398290CF0DB3751), UINT64_C(0x01170580BA42C976),
+ UINT64_C(0x3E71AA2956560B89), UINT64_C(0x80817AAC50E6647B),
+ UINT64_C(0x35C833ADA0BE42DA), UINT64_C(0xFA3C6148F1BABA4E) },
+ { UINT64_C(0xC57BE645CD8F6253), UINT64_C(0x77CEE46BC657AD0D),
+ UINT64_C(0x830077310DEFD908), UINT64_C(0x92FE9BCE899CBA56),
+ UINT64_C(0x48450EC4BCEFFB5A), UINT64_C(0xE615148DF2F5F4BF) } },
+ { { UINT64_C(0xF55EDABB90B86166), UINT64_C(0x27F7D784075430A2),
+ UINT64_C(0xF53E822B9BF17161), UINT64_C(0x4A5B3B93AFE808DC),
+ UINT64_C(0x590BBBDED7272F55), UINT64_C(0x233D63FAEAEA79A1) },
+ { UINT64_C(0xD7042BEAFE1EBA07), UINT64_C(0xD2B9AEA010750D7E),
+ UINT64_C(0xD8D1E69031078AA5), UINT64_C(0x9E837F187E37BC8B),
+ UINT64_C(0x9558FF4F85008975), UINT64_C(0x93EDB837421FE867) } },
+ { { UINT64_C(0xAA6489DF83D55B5A), UINT64_C(0xEA092E4986BF27F7),
+ UINT64_C(0x4D8943A95FA2EFEC), UINT64_C(0xC9BAAE53720E1A8C),
+ UINT64_C(0xC055444B95A4F8A3), UINT64_C(0x93BD01E8A7C1206B) },
+ { UINT64_C(0xD97765B6714A27DF), UINT64_C(0xD622D954193F1B16),
+ UINT64_C(0x115CC35AF1503B15), UINT64_C(0x1DD5359FA9FA21F8),
+ UINT64_C(0x197C32996DFED1F1), UINT64_C(0xDEE8B7C9F77F2679) } },
+ { { UINT64_C(0x5405179F394FD855), UINT64_C(0xC9D6E24449FDFB33),
+ UINT64_C(0x70EBCAB4BD903393), UINT64_C(0x0D3A3899A2C56780),
+ UINT64_C(0x012C7256683D1A0A), UINT64_C(0xC688FC8880A48F3B) },
+ { UINT64_C(0x180957546F7DF527), UINT64_C(0x9E339B4B71315D16),
+ UINT64_C(0x90560C28A956BB12), UINT64_C(0x2BECEA60D42EEE8D),
+ UINT64_C(0x82AEB9A750632653), UINT64_C(0xED34353EDFA5CD6A) } },
+ { { UINT64_C(0x82154D2C91AECCE4), UINT64_C(0x312C60705041887F),
+ UINT64_C(0xECF589F3FB9FBD71), UINT64_C(0x67660A7DB524BDE4),
+ UINT64_C(0xE99B029D724ACF23), UINT64_C(0xDF06E4AF6D1CD891) },
+ { UINT64_C(0x07806CB580EE304D), UINT64_C(0x0C70BB9F7443A8F8),
+ UINT64_C(0x01EC341408B0830A), UINT64_C(0xFD7B63C35A81510B),
+ UINT64_C(0xE90A0A39453B5F93), UINT64_C(0xAB700F8F9BC71725) } },
+ { { UINT64_C(0x9401AEC2B9F00793), UINT64_C(0x064EC4F4B997F0BF),
+ UINT64_C(0xDC0CC1FD849240C8), UINT64_C(0x39A75F37B6E92D72),
+ UINT64_C(0xAA43CA5D0224A4AB), UINT64_C(0x9C4D632554614C47) },
+ { UINT64_C(0x1767366FC6709DA3), UINT64_C(0xA6B482D123479232),
+ UINT64_C(0x54DC6DDC84D63E85), UINT64_C(0x0ACCB5ADC99D3B9E),
+ UINT64_C(0x211716BBE8AA3ABF), UINT64_C(0xD0FE25AD69EC6406) } },
+ { { UINT64_C(0x0D5C1769DF85C705), UINT64_C(0x7086C93DA409DCD1),
+ UINT64_C(0x9710839D0E8D75D8), UINT64_C(0x17B7DB75EBDD4177),
+ UINT64_C(0xAF69EB58F649A809), UINT64_C(0x6EF19EA28A84E220) },
+ { UINT64_C(0x36EB5C6665C278B2), UINT64_C(0xD2A1512881EA9D65),
+ UINT64_C(0x4FCBA840769300AD), UINT64_C(0xC2052CCDC8E536E5),
+ UINT64_C(0x9CAEE014AC263B8F), UINT64_C(0x56F7ED7AF9239663) } },
+ { { UINT64_C(0xF6FA251FAC9E09E1), UINT64_C(0xA3775605955A2853),
+ UINT64_C(0x977B8D21F2A4BD78), UINT64_C(0xF68AA7FF3E096410),
+ UINT64_C(0x01AB055265F88419), UINT64_C(0xC4C8D77EBB93F64E) },
+ { UINT64_C(0x718251113451FE64), UINT64_C(0xFA0F905B46F9BAF0),
+ UINT64_C(0x79BE3BF3CA49EF1A), UINT64_C(0x831109B26CB02071),
+ UINT64_C(0x765F935FC4DDBFE5), UINT64_C(0x6F99CD1480E5A3BA) } },
+ { { UINT64_C(0xD2E8DA04234F91FF), UINT64_C(0x4DED4D6D813867AA),
+ UINT64_C(0x3B50175DE0A0D945), UINT64_C(0x55AC74064EB78137),
+ UINT64_C(0xE9FA7F6EE1D47730), UINT64_C(0x2C1715315CBF2176) },
+ { UINT64_C(0xA521788F2BE7A47D), UINT64_C(0x95B15A273FCF1AB3),
+ UINT64_C(0xAADA6401F28A946A), UINT64_C(0x628B2EF48B4E898B),
+ UINT64_C(0x0E6F46296D6592CC), UINT64_C(0x997C7094A723CADD) } },
+ { { UINT64_C(0x878BCE116AFE80C6), UINT64_C(0xA89ABC9D007BBA38),
+ UINT64_C(0xB0C1F87BA7CC267F), UINT64_C(0x86D33B9D5104FF04),
+ UINT64_C(0xB0504B1B2EF1BA42), UINT64_C(0x21693048B2827E88) },
+ { UINT64_C(0x11F1CCD579CFCD14), UINT64_C(0x59C09FFA94AD227E),
+ UINT64_C(0x95A4ADCB3EA91ACF), UINT64_C(0x1346238BB4370BAA),
+ UINT64_C(0xB099D2023E1367B0), UINT64_C(0xCF5BBDE690F23CEA) } },
+ { { UINT64_C(0x453299BBBCB3BE5E), UINT64_C(0x123C588E38E9FF97),
+ UINT64_C(0x8C115DD9F6A2E521), UINT64_C(0x6E333C11FF7D4B98),
+ UINT64_C(0x9DD061E5DA73E736), UINT64_C(0xC6AB7B3A5CA53056) },
+ { UINT64_C(0xF1EF3EE35B30A76B), UINT64_C(0xADD6B44A961BA11F),
+ UINT64_C(0x7BB00B752CA6E030), UINT64_C(0x270272E82FE270AD),
+ UINT64_C(0x23BC6F4F241A9239), UINT64_C(0x88581E130BB94A94) } },
+ { { UINT64_C(0xBD225A6924EEF67F), UINT64_C(0x7CFD96140412CEB7),
+ UINT64_C(0xF6DE167999AC298E), UINT64_C(0xB20FD895ED6C3571),
+ UINT64_C(0x03C73B7861836C56), UINT64_C(0xEE3C3A16ABA6CB34) },
+ { UINT64_C(0x9E8C56674138408A), UINT64_C(0xEC25FCB12DD6EBDF),
+ UINT64_C(0xC54C33FDDBBDF6E3), UINT64_C(0x93E0913B4A3C9DD4),
+ UINT64_C(0x66D7D13535EDEED4), UINT64_C(0xD29A36C4453FB66E) } },
+ { { UINT64_C(0x7F192F039F1943AF), UINT64_C(0x6488163F4E0B5FB0),
+ UINT64_C(0x66A45C6953599226), UINT64_C(0x924E2E439AD15A73),
+ UINT64_C(0x8B553DB742A99D76), UINT64_C(0x4BC6B53B0451F521) },
+ { UINT64_C(0xC029B5EF101F8AD6), UINT64_C(0x6A4DA71CC507EED9),
+ UINT64_C(0x3ADFAEC030BB22F3), UINT64_C(0x81BCAF7AB514F85B),
+ UINT64_C(0x2E1E6EFF5A7E60D3), UINT64_C(0x5270ABC0AE39D42F) } },
+ { { UINT64_C(0x86D56DEB3901F0F8), UINT64_C(0x1D0BC792EED5F650),
+ UINT64_C(0x1A2DDFD8CA1114A3), UINT64_C(0x94ABF4B1F1DD316D),
+ UINT64_C(0xF72179E43D9F18EF), UINT64_C(0x52A0921E9AA2CABF) },
+ { UINT64_C(0xECDA9E27A7452883), UINT64_C(0x7E90850AAFD771B4),
+ UINT64_C(0xD40F87EA9CC0465C), UINT64_C(0x8CFCB60A865CDA36),
+ UINT64_C(0x3DBEC2CC7C650942), UINT64_C(0x071A4EE7E718CA9D) } },
+ { { UINT64_C(0x73C0E4FF276AC5F3), UINT64_C(0xE7BA5A6ABDB97EA1),
+ UINT64_C(0x638CA54EC5808398), UINT64_C(0x8258DC82413855E5),
+ UINT64_C(0x35DDD2E957F07614), UINT64_C(0xF98DD6921DC13BF9) },
+ { UINT64_C(0x3A4C0088F16DCD84), UINT64_C(0xF192EADD833D83F9),
+ UINT64_C(0x3C26C931A6D61D29), UINT64_C(0x589FDD52DE0AD7A1),
+ UINT64_C(0x7CD83DD20442D37F), UINT64_C(0x1E47E777403ECBFC) } },
+ },
+ {
+ { { UINT64_C(0x2AF8ED8170D4D7BC), UINT64_C(0xABC3E15FB632435C),
+ UINT64_C(0x4C0E726F78219356), UINT64_C(0x8C1962A1B87254C4),
+ UINT64_C(0x30796A71C9E7691A), UINT64_C(0xD453EF19A75A12EE) },
+ { UINT64_C(0x535F42C213AE4964), UINT64_C(0x86831C3C0DA9586A),
+ UINT64_C(0xB7F1EF35E39A7A58), UINT64_C(0xA2789AE2D459B91A),
+ UINT64_C(0xEADBCA7F02FD429D), UINT64_C(0x94F215D465290F57) } },
+ { { UINT64_C(0x37ED2BE51CFB79AC), UINT64_C(0x801946F3E7AF84C3),
+ UINT64_C(0xB061AD8AE77C2F00), UINT64_C(0xE87E1A9A44DE16A8),
+ UINT64_C(0xDF4F57C87EE490FF), UINT64_C(0x4E793B49005993ED) },
+ { UINT64_C(0xE1036387BCCB593F), UINT64_C(0xF174941195E09B80),
+ UINT64_C(0x59CB20D15AB42F91), UINT64_C(0xA738A18DAC0FF033),
+ UINT64_C(0xDA501A2E2AC1E7F4), UINT64_C(0x1B67EDA084D8A6E0) } },
+ { { UINT64_C(0x1D27EFCE1080E90B), UINT64_C(0xA28152463FD01DC6),
+ UINT64_C(0x99A3FB83CAA26D18), UINT64_C(0xD27E6133B82BABBE),
+ UINT64_C(0x61030DFDD783DD60), UINT64_C(0x295A291373C78CB8) },
+ { UINT64_C(0x8707A2CF68BE6A92), UINT64_C(0xC9C2FB98EEB3474A),
+ UINT64_C(0x7C3FD412A2B176B8), UINT64_C(0xD5B52E2FC7202101),
+ UINT64_C(0x24A63030F0A6D536), UINT64_C(0x05842DE304648EC0) } },
+ { { UINT64_C(0x67477CDC30577AC9), UINT64_C(0x51DD9775244F92A8),
+ UINT64_C(0x31FD60B9917EEC66), UINT64_C(0xACD95BD4D66C5C1D),
+ UINT64_C(0x2E0551F3BF9508BA), UINT64_C(0x121168E1688CB243) },
+ { UINT64_C(0x8C0397404540D230), UINT64_C(0xC4ED3CF6009ECDF9),
+ UINT64_C(0x191825E144DB62AF), UINT64_C(0x3EE8ACABC4A030DA),
+ UINT64_C(0x8AB154A894081504), UINT64_C(0x1FE09E4B486C9CD0) } },
+ { { UINT64_C(0x512F82F9D113450B), UINT64_C(0x5878C9012DBC9197),
+ UINT64_C(0xDB87412BE13F355B), UINT64_C(0x0A0A4A9B935B8A5E),
+ UINT64_C(0x818587BDF25A5351), UINT64_C(0xE807931031E3D9C7) },
+ { UINT64_C(0x8B1D47C7611BC1B1), UINT64_C(0x51722B5872A823F2),
+ UINT64_C(0x6F97EE8A53B36B3E), UINT64_C(0x6E085AAC946DD453),
+ UINT64_C(0x2EC5057DE65E6533), UINT64_C(0xF82D9D714BB18801) } },
+ { { UINT64_C(0xAD81FA938BA5AA8E), UINT64_C(0x723E628E8F7AA69E),
+ UINT64_C(0x0BA7C2DEEF35937C), UINT64_C(0x83A43EC56DECFB40),
+ UINT64_C(0xF520F849E60C4F2D), UINT64_C(0x8260E8AE457E3B5E) },
+ { UINT64_C(0x7CE874F0BF1D9ED7), UINT64_C(0x5FDE35537F1A5466),
+ UINT64_C(0x5A63777C0C162DBB), UINT64_C(0x0FD04F8CDAD87289),
+ UINT64_C(0xCA2D9E0E640761D5), UINT64_C(0x4615CFF838501ADB) } },
+ { { UINT64_C(0x9422789B110B4A25), UINT64_C(0x5C26779F70AD8CC1),
+ UINT64_C(0x4EE6A748EC4F1E14), UINT64_C(0xFB584A0D5C7AB5E0),
+ UINT64_C(0xED1DCB0BFB21EE66), UINT64_C(0xDBED1F0011C6863C) },
+ { UINT64_C(0xD2969269B1B1D187), UINT64_C(0xF7D0C3F2AFE964E6),
+ UINT64_C(0xE05EE93F12BB865E), UINT64_C(0x1AFB7BEEED79118E),
+ UINT64_C(0x220AF1380F0FE453), UINT64_C(0x1463AA1A52782AB9) } },
+ { { UINT64_C(0x7C139D56D7DBE5F9), UINT64_C(0xFC16E6110B83685B),
+ UINT64_C(0xFA723C029018463C), UINT64_C(0xC472458C840BF5D7),
+ UINT64_C(0x4D8093590AF07591), UINT64_C(0x418D88303308DFD9) },
+ { UINT64_C(0x9B381E040C365AE3), UINT64_C(0x3780BF33F8190FD1),
+ UINT64_C(0x45397418DD03E854), UINT64_C(0xA95D030F4E51E491),
+ UINT64_C(0x87C8C686E3286CEA), UINT64_C(0x01C773BF900B5F83) } },
+ { { UINT64_C(0xDABE347578673B02), UINT64_C(0x4F0F25CEF6E7395E),
+ UINT64_C(0x3117ABB9D181AD45), UINT64_C(0x4B559F88AA13DE0B),
+ UINT64_C(0xFD8EFE78EA7C9745), UINT64_C(0x080600475DD21682) },
+ { UINT64_C(0xC0F5DE4BD4C86FFC), UINT64_C(0x4BB14B1EF21AB6A2),
+ UINT64_C(0xACB53A6CF50C1D12), UINT64_C(0x46AAC4505CC9162E),
+ UINT64_C(0x049C51E02DE240B6), UINT64_C(0xBB2DC016E383C3B0) } },
+ { { UINT64_C(0xA3C56AD28E438C92), UINT64_C(0x7C43F98FB2CEAF1A),
+ UINT64_C(0x397C44F7E2150778), UINT64_C(0x48D17AB771A24131),
+ UINT64_C(0xCC5138631E2ACDA9), UINT64_C(0x2C76A55EF0C9BAC9) },
+ { UINT64_C(0x4D74CDCE7EA4BB7B), UINT64_C(0x834BD5BFB1B3C2BA),
+ UINT64_C(0x46E2911ECCC310A4), UINT64_C(0xD3DE84AA0FC1BF13),
+ UINT64_C(0x27F2892F80A03AD3), UINT64_C(0x85B476203BD2F08B) } },
+ { { UINT64_C(0xAB1CB818567AF533), UINT64_C(0x273B4537BAC2705A),
+ UINT64_C(0x133066C422C84AB6), UINT64_C(0xC3590DE64830BFC1),
+ UINT64_C(0xEA2978695E4742D0), UINT64_C(0xF6D8C6944F3164C0) },
+ { UINT64_C(0x09E85F3DC1249588), UINT64_C(0x6C2BB05D4EC64DF7),
+ UINT64_C(0xD267115E8B78000F), UINT64_C(0x07C5D7AEC7E4A316),
+ UINT64_C(0xCB1187BA4619E5BD), UINT64_C(0x57B1D4EFA43F7EEE) } },
+ { { UINT64_C(0x3618891FC8176A96), UINT64_C(0x62C4B084E5808B97),
+ UINT64_C(0xDE5585464DD95D6E), UINT64_C(0x27A8133E730B2EA4),
+ UINT64_C(0xE07CEEC36AF318A0), UINT64_C(0x0ACC1286CE24FD2C) },
+ { UINT64_C(0x8A48FE4ADD4D307C), UINT64_C(0x71A9BA9C18CDE0DA),
+ UINT64_C(0x655E2B66D5D79747), UINT64_C(0x409FE856A79AEDC7),
+ UINT64_C(0xC5A9F244D287E5CF), UINT64_C(0xCCE103844E82EC39) } },
+ { { UINT64_C(0x00675BA7F25D364C), UINT64_C(0x7A7F162968D36BDF),
+ UINT64_C(0x35EC468AA9E23F29), UINT64_C(0xF797AC502D926E6C),
+ UINT64_C(0x639BA4534B4F4376), UINT64_C(0xD71B430F51FF9519) },
+ { UINT64_C(0xB8C439EC2CF5635C), UINT64_C(0x0CE4C8D181980393),
+ UINT64_C(0x4C5362A964123B15), UINT64_C(0x6E0421E0FFDCF096),
+ UINT64_C(0x624A855F10D1F914), UINT64_C(0x7D8F3AB7614DCD29) } },
+ { { UINT64_C(0xD9219ADAB3493CE0), UINT64_C(0x971B243A52F09AE5),
+ UINT64_C(0xC16C9BF8E24E3674), UINT64_C(0x026D408DCE68C7CD),
+ UINT64_C(0xF9B33DD9358209E3), UINT64_C(0x02D0595DF3B2A206) },
+ { UINT64_C(0xBF99427160D15640), UINT64_C(0x6DA7A04E15B5466A),
+ UINT64_C(0x03AA4ED81CADB50D), UINT64_C(0x1548F029129A4253),
+ UINT64_C(0x41741F7EB842865A), UINT64_C(0x859FE0A4A3F88C98) } },
+ { { UINT64_C(0x80DE085A05FD7553), UINT64_C(0x4A4AB91EB897566B),
+ UINT64_C(0x33BCD4752F1C173F), UINT64_C(0x4E238896C100C013),
+ UINT64_C(0x1C88500DD614B34B), UINT64_C(0x0401C5F6C3BA9E23) },
+ { UINT64_C(0x8E8003C4D0AF0DE5), UINT64_C(0x19B1DFB59D0DCBB9),
+ UINT64_C(0x4A3640A9EBEF7AB6), UINT64_C(0xEDAFD65B959B15F6),
+ UINT64_C(0x8092EF7F7FB95821), UINT64_C(0xAB8DD52ECE2E45D1) } },
+ { { UINT64_C(0xD1F2D6B8B9CFE6BF), UINT64_C(0x6358810B00073F6F),
+ UINT64_C(0x5FCE5993D712106E), UINT64_C(0x5EE6B2711C024C91),
+ UINT64_C(0xD0248FF5453DB663), UINT64_C(0xD6D81CB2ADB835E8) },
+ { UINT64_C(0x8696CFECFDFCB4C7), UINT64_C(0x696B7FCB53BC9045),
+ UINT64_C(0xAB4D3807DDA56981), UINT64_C(0x2F9980521E4B943B),
+ UINT64_C(0x8AA76ADB166B7F18), UINT64_C(0x6393430152A2D7ED) } },
+ },
+ {
+ { { UINT64_C(0xBBCCCE39A368EFF6), UINT64_C(0xD8CAABDF8CEB5C43),
+ UINT64_C(0x9EAE35A5D2252FDA), UINT64_C(0xA8F4F20954E7DD49),
+ UINT64_C(0xA56D72A6295100FD), UINT64_C(0x20FC1FE856767727) },
+ { UINT64_C(0xBF60B2480BBAA5AB), UINT64_C(0xA4F3CE5A313911F2),
+ UINT64_C(0xC2A67AD4B93DAB9C), UINT64_C(0x18CD0ED022D71F39),
+ UINT64_C(0x04380C425F304DB2), UINT64_C(0x26420CBB6729C821) } },
+ { { UINT64_C(0x26BD07D6BDFBCAE8), UINT64_C(0x10B5173FDF01A80A),
+ UINT64_C(0xD831C5466798B96C), UINT64_C(0x1D6B41081D3F3859),
+ UINT64_C(0x501D38EC991B9EC7), UINT64_C(0x26319283D78431A9) },
+ { UINT64_C(0x8B85BAF7118B343C), UINT64_C(0x4696CDDD58DEF7D0),
+ UINT64_C(0xEFC7C1107ACDCF58), UINT64_C(0xD9AF415C848D5842),
+ UINT64_C(0x6B5A06BC0AC7FDAC), UINT64_C(0x7D623E0DA344319B) } },
+ { { UINT64_C(0x4C0D78060C9D3547), UINT64_C(0x993F048DCF2AED47),
+ UINT64_C(0x5217C453E4B57E22), UINT64_C(0xB4669E35F4172B28),
+ UINT64_C(0x509A3CD049F999F8), UINT64_C(0xD19F863287C69D41) },
+ { UINT64_C(0xE14D01E84C8FDED0), UINT64_C(0x342880FDEAFD9E1C),
+ UINT64_C(0x0E17BFF270DC2BF0), UINT64_C(0x46560B7BC0186400),
+ UINT64_C(0xE28C7B9C49A4DD34), UINT64_C(0x182119160F325D06) } },
+ { { UINT64_C(0x46D70888D7E02E18), UINT64_C(0x7C806954D9F11FD9),
+ UINT64_C(0xE4948FCA4FBEA271), UINT64_C(0x7D6C7765BD80A9DF),
+ UINT64_C(0x1B470EA6F3871C71), UINT64_C(0xD62DE2448330A570) },
+ { UINT64_C(0xDAECDDC1C659C3A7), UINT64_C(0x8621E513077F7AFC),
+ UINT64_C(0x56C7CD84CAEEEF13), UINT64_C(0xC60C910FC685A356),
+ UINT64_C(0xE68BC5C59DD93DDC), UINT64_C(0xD904E89FFEB64895) } },
+ { { UINT64_C(0x75D874FB8BA7917A), UINT64_C(0x18FA7F53FD043BD4),
+ UINT64_C(0x212A0AD71FC3979E), UINT64_C(0x5703A7D95D6EAC0E),
+ UINT64_C(0x222F7188017DEAD5), UINT64_C(0x1EC687B70F6C1817) },
+ { UINT64_C(0x23412FC3238BACB6), UINT64_C(0xB85D70E954CED154),
+ UINT64_C(0xD4E06722BDA674D0), UINT64_C(0x3EA5F17836F5A0C2),
+ UINT64_C(0x7E7D79CFF5C6D2CA), UINT64_C(0x1FFF94643DBB3C73) } },
+ { { UINT64_C(0x916E19D0F163E4A8), UINT64_C(0x1E6740E71489DF17),
+ UINT64_C(0x1EAF9723339F3A47), UINT64_C(0x22F0ED1A124B8DAD),
+ UINT64_C(0x39C9166C49C3DD04), UINT64_C(0x628E7FD4CE1E9ACC) },
+ { UINT64_C(0x124DDF2740031676), UINT64_C(0x002569391EDDB9BE),
+ UINT64_C(0xD39E25E7D360B0DA), UINT64_C(0x6E3015A84AA6C4C9),
+ UINT64_C(0xC6A2F643623EDA09), UINT64_C(0xBEFF2D1250AA99FB) } },
+ { { UINT64_C(0x1FEEF7CE93EE8089), UINT64_C(0xC6B180BC252DD7BD),
+ UINT64_C(0xA16FB20B1788F051), UINT64_C(0xD86FD392E046ED39),
+ UINT64_C(0xDA0A36119378CE1D), UINT64_C(0x121EF3E7A5F7A61D) },
+ { UINT64_C(0x94D2206192D13CAE), UINT64_C(0x5076046A77C72E08),
+ UINT64_C(0xF18BC2337D2308B9), UINT64_C(0x004DB3C517F977B1),
+ UINT64_C(0xD05AE3990471C11D), UINT64_C(0x86A2A55785CD1726) } },
+ { { UINT64_C(0xB8D9B28672107804), UINT64_C(0xB5A7C4133303B79B),
+ UINT64_C(0x927EEF785FA37DED), UINT64_C(0xA1C5CF1EAD67DABA),
+ UINT64_C(0xAA5E3FB27360E7C7), UINT64_C(0x8354E61A0A0C0993) },
+ { UINT64_C(0x2EC73AF97F5458CC), UINT64_C(0xDE4CB48848474325),
+ UINT64_C(0x2DD134C77209BC69), UINT64_C(0xB70C5567451A2ABE),
+ UINT64_C(0x2CD1B2008E293018), UINT64_C(0x15F8DA7AD33C0D72) } },
+ { { UINT64_C(0x5DC386D0A8790657), UINT64_C(0xA4FDF676BC4D88BB),
+ UINT64_C(0x1B21F38F48BC6C49), UINT64_C(0xCDCC7FAA543A7003),
+ UINT64_C(0xEA97E7AA8C9CF72C), UINT64_C(0xA6B883F450D938A8) },
+ { UINT64_C(0x51936F3AA3A10F27), UINT64_C(0x0170785FDECC76BF),
+ UINT64_C(0x7539ECE1908C578A), UINT64_C(0x5D9C8A8E0F3E8C25),
+ UINT64_C(0x8681B43B9E4717A7), UINT64_C(0x94F42507A9D83E39) } },
+ { { UINT64_C(0xBBE11CA8A55ADDE7), UINT64_C(0x39E6F5CF3BC0896B),
+ UINT64_C(0x1447314E1D2D8D94), UINT64_C(0x45B481255B012F8A),
+ UINT64_C(0x41AD23FA08AD5283), UINT64_C(0x837243E241D13774) },
+ { UINT64_C(0x1FC0BD9DBADCAA46), UINT64_C(0x8DF164ED26E84CAE),
+ UINT64_C(0x8FF70EC041017176), UINT64_C(0x23AD4BCE5C848BA7),
+ UINT64_C(0x89246FDE97A19CBB), UINT64_C(0xA5EF987B78397991) } },
+ { { UINT64_C(0x111AF1B74757964D), UINT64_C(0x1D25D351DDBBF258),
+ UINT64_C(0x4161E7767D2B06D6), UINT64_C(0x6EFD26911CAC0C5B),
+ UINT64_C(0x633B95DB211BFAEB), UINT64_C(0x9BEDFA5AE2BDF701) },
+ { UINT64_C(0xADAC2B0B73E099C8), UINT64_C(0x436F0023BFB16BFF),
+ UINT64_C(0xB91B100230F55854), UINT64_C(0xAF6A2097F4C6C8B7),
+ UINT64_C(0x3FF65CED3AD7B3D9), UINT64_C(0x6FA2626F330E56DF) } },
+ { { UINT64_C(0x3D28BF2DFFCCFD07), UINT64_C(0x0514F6FFD989603B),
+ UINT64_C(0xB95196295514787A), UINT64_C(0xA1848121C3DB4E9C),
+ UINT64_C(0x47FE2E392A3D4595), UINT64_C(0x506F5D8211B73ED4) },
+ { UINT64_C(0xA2257AE7A600D8BB), UINT64_C(0xD659DBD10F9F122C),
+ UINT64_C(0xDB0FDC6764DF160F), UINT64_C(0xFF3793397CB19690),
+ UINT64_C(0xDF4366B898E72EC1), UINT64_C(0x97E72BECDF437EB8) } },
+ { { UINT64_C(0x81DCEA271C81E5D9), UINT64_C(0x7E1B6CDA6717FC49),
+ UINT64_C(0xAA36B3B511EAE80D), UINT64_C(0x1306687C3CD7CBB3),
+ UINT64_C(0xED670235C4E89064), UINT64_C(0x9D3B000958A94760) },
+ { UINT64_C(0x5A64E158E6A6333C), UINT64_C(0x1A8B4A3649453203),
+ UINT64_C(0xF1CAD7241F77CC21), UINT64_C(0x693EBB4B70518EF7),
+ UINT64_C(0xFB47BD810F39C91A), UINT64_C(0xCFE63DA2FA4BC64B) } },
+ { { UINT64_C(0x82C1C684EAA66108), UINT64_C(0xE32262184CFE79FC),
+ UINT64_C(0x3F28B72B849C720E), UINT64_C(0x137FB3558FEE1CA8),
+ UINT64_C(0x4D18A9CDE4F90C4E), UINT64_C(0xC0344227CC3E46FA) },
+ { UINT64_C(0x4FD5C08E79CDA392), UINT64_C(0x65DB20DB8ADC87B5),
+ UINT64_C(0x86F95D5B916C1B84), UINT64_C(0x7EDA387117BB2B7C),
+ UINT64_C(0x18CCF7E7669A533B), UINT64_C(0x5E92421CECAD0E06) } },
+ { { UINT64_C(0x26063E124174B08B), UINT64_C(0xE621D9BE70DE8E4D),
+ UINT64_C(0xAEA0FD0F5ECDF350), UINT64_C(0x0D9F69E49C20E5C9),
+ UINT64_C(0xD3DADEB90BBE2918), UINT64_C(0xD7B9B5DB58AA2F71) },
+ { UINT64_C(0x7A971DD73364CAF8), UINT64_C(0x702616A3C25D4BE4),
+ UINT64_C(0xA30F0FA1A9E30071), UINT64_C(0x98AB24385573BC69),
+ UINT64_C(0xCBC63CDF6FEC2E22), UINT64_C(0x965F90EDCC901B9B) } },
+ { { UINT64_C(0xD53B592D71E15BB3), UINT64_C(0x1F03C0E98820E0D0),
+ UINT64_C(0xCE93947D3CCCB726), UINT64_C(0x2790FEE01D547590),
+ UINT64_C(0x4401D847C59CDD7A), UINT64_C(0x72D69120A926DD9D) },
+ { UINT64_C(0x38B8F21D4229F289), UINT64_C(0x9F412E407FE978AF),
+ UINT64_C(0xAE07901BCDB59AF1), UINT64_C(0x1E6BE5EBD1D4715E),
+ UINT64_C(0x3715BD8B18C96BEF), UINT64_C(0x4B71F6E6E11B3798) } },
+ },
+ {
+ { { UINT64_C(0x11A8FDE5F0CE2DF4), UINT64_C(0xBC70CA3EFA8D26DF),
+ UINT64_C(0x6818C275C74DFE82), UINT64_C(0x2B0294AC38373A50),
+ UINT64_C(0x584C4061E8E5F88F), UINT64_C(0x1C05C1CA7342383A) },
+ { UINT64_C(0x263895B3911430EC), UINT64_C(0xEF9B0032A5171453),
+ UINT64_C(0x144359DA84DA7F0C), UINT64_C(0x76E3095A924A09F2),
+ UINT64_C(0x612986E3D69AD835), UINT64_C(0x70E03ADA392122AF) } },
+ { { UINT64_C(0xFEB707EE67AAD17B), UINT64_C(0xBB21B28783042995),
+ UINT64_C(0x26DE16459A0D32BA), UINT64_C(0x9A2FF38A1FFB9266),
+ UINT64_C(0x4E5AD96D8F578B4A), UINT64_C(0x26CC0655883E7443) },
+ { UINT64_C(0x1D8EECAB2EE9367A), UINT64_C(0x42B84337881DE2F8),
+ UINT64_C(0xE49B2FAED758AE41), UINT64_C(0x6A9A22904A85D867),
+ UINT64_C(0x2FB89DCEE68CBA86), UINT64_C(0xBC2526357F09A982) } },
+ { { UINT64_C(0xADC794368C61AAAC), UINT64_C(0x24C7FD135E926563),
+ UINT64_C(0xEF9FAAA40406C129), UINT64_C(0xF4E6388C8B658D3C),
+ UINT64_C(0x7262BEB41E435BAF), UINT64_C(0x3BF622CCFDAEAC99) },
+ { UINT64_C(0xD359F7D84E1AEDDC), UINT64_C(0x05DC4F8CD78C17B7),
+ UINT64_C(0xB18CF03229498BA5), UINT64_C(0xC67388CA85BF35AD),
+ UINT64_C(0x8A7A6AA262AA4BC8), UINT64_C(0x0B8F458E72F4627A) } },
+ { { UINT64_C(0x3FB812EEC68E4488), UINT64_C(0x53C5EAA460EF7281),
+ UINT64_C(0xE57241838FBEFBE4), UINT64_C(0x2B7D49F4A4B24A05),
+ UINT64_C(0x23B138D0710C0A43), UINT64_C(0x16A5B4C1A85EC1DB) },
+ { UINT64_C(0x7CC1F3D7305FEB02), UINT64_C(0x52F7947D5B6C1B54),
+ UINT64_C(0x1BDA23128F56981C), UINT64_C(0x68663EAEB4080A01),
+ UINT64_C(0x8DD7BA7E9F999B7F), UINT64_C(0xD8768D19B686580C) } },
+ { { UINT64_C(0xBCD0E0AD7AFDDA94), UINT64_C(0x95A0DBBE34A30687),
+ UINT64_C(0xBBE3C3DF8C5E2665), UINT64_C(0x742BECD8EBF2BC16),
+ UINT64_C(0x300CEB483FA163A6), UINT64_C(0x0C5D02EE4663354B) },
+ { UINT64_C(0xE4FB9AD6B5E606A4), UINT64_C(0x93F507B8CF49FF95),
+ UINT64_C(0x9406A90C585C193B), UINT64_C(0xAD1440C14ECF9517),
+ UINT64_C(0x184CB4759CEA53F1), UINT64_C(0x6855C4748EF11302) } },
+ { { UINT64_C(0x00ECB523EDCAFA52), UINT64_C(0x0DA0AE0E086F69D3),
+ UINT64_C(0xC384DE15C242F347), UINT64_C(0xFB050E6E848C12B7),
+ UINT64_C(0x22F6765464E015CE), UINT64_C(0xCBDC2A487CA122F2) },
+ { UINT64_C(0xA940D973445FB02C), UINT64_C(0x00F31E783767D89D),
+ UINT64_C(0x2B65A237613DABDD), UINT64_C(0x2BE0AB05C875AE09),
+ UINT64_C(0xB22E54FDBA204F8E), UINT64_C(0x65E2029D0F7687B9) } },
+ { { UINT64_C(0xFFD825381855A71C), UINT64_C(0x26A330B3438BD8D8),
+ UINT64_C(0x89628311F9D8C5F9), UINT64_C(0x8D5FB9CF953738A0),
+ UINT64_C(0xCB7159C9EDFCD4E5), UINT64_C(0xD64E52302064C7C2) },
+ { UINT64_C(0xF858ED80689F3CFE), UINT64_C(0x4830E30956128B67),
+ UINT64_C(0x2E1692DAE0E90688), UINT64_C(0xAB818913CA9CC232),
+ UINT64_C(0xE2E30C23A5D229A6), UINT64_C(0xA544E8B10E740E23) } },
+ { { UINT64_C(0x1C15E569DC61E6CC), UINT64_C(0x8FD7296758FC7800),
+ UINT64_C(0xE61E7DB737A9DFC5), UINT64_C(0x3F34A9C65AFD7822),
+ UINT64_C(0x0A11274219E80773), UINT64_C(0xA353460C4760FC58) },
+ { UINT64_C(0x2FB7DEEBB3124C71), UINT64_C(0x484636272D4009CC),
+ UINT64_C(0x399D1933C3A10370), UINT64_C(0x7EB1945054388DBD),
+ UINT64_C(0x8ECCE6397C2A006A), UINT64_C(0x3D565DAF55C932A0) } },
+ { { UINT64_C(0xCEF57A9FD9ADAE53), UINT64_C(0xE2EB27D7F83FD8CD),
+ UINT64_C(0x4AC8F7199BBD2DDE), UINT64_C(0x604283AAE91ABFB7),
+ UINT64_C(0xB6A4E11534799F87), UINT64_C(0x2B253224E4C2A8F3) },
+ { UINT64_C(0xC34F8B92C8782294), UINT64_C(0xC74D697DFCC2CB6B),
+ UINT64_C(0xD990411BC2C84C46), UINT64_C(0x2807B5C631EA4955),
+ UINT64_C(0x14AE2B93B9EB27F5), UINT64_C(0xF0AE96A76163EDFA) } },
+ { { UINT64_C(0xA7BDCBB442DB7180), UINT64_C(0xC9FAA41FEDCA752F),
+ UINT64_C(0x147F91B4E820F401), UINT64_C(0x1E6CEF86F5F2645F),
+ UINT64_C(0xB4AB4D7F31FE711D), UINT64_C(0xCE68FB3C743EF882) },
+ { UINT64_C(0xB9D7D6823EF2FCFF), UINT64_C(0xF6893811020DCAFD),
+ UINT64_C(0x30D9A50CBF81E760), UINT64_C(0x7F247D06B9B87228),
+ UINT64_C(0x143D4FEC5F40CFC0), UINT64_C(0x21D78D73329B2A88) } },
+ { { UINT64_C(0x06B3FF8AED3F2055), UINT64_C(0x50482C77522BE214),
+ UINT64_C(0x8DF69CD8DDF54620), UINT64_C(0x6D1DB204F78A1165),
+ UINT64_C(0x459AE4A29AFE6BF2), UINT64_C(0xC23A9FFD24AC871E) },
+ { UINT64_C(0xB7FD22E389E85D81), UINT64_C(0x297F1F6B122E9978),
+ UINT64_C(0xAB283D66144BE1CE), UINT64_C(0xC1F90AC2C00C614E),
+ UINT64_C(0x5465576E3224CD09), UINT64_C(0x8E8D910D441B6059) } },
+ { { UINT64_C(0xF73A060AAAA228BC), UINT64_C(0xCF1B078356EFF87D),
+ UINT64_C(0x11EF17C0A54C9133), UINT64_C(0x9E476B1576A4DAA5),
+ UINT64_C(0x5624FEAC8018FB92), UINT64_C(0x9826A0FCCFEEC1B9) },
+ { UINT64_C(0xB732F7FE2DFE2046), UINT64_C(0x9260BD9F3B40DA6A),
+ UINT64_C(0xCC9F908F4F231773), UINT64_C(0x4827FEB9DAFC0D55),
+ UINT64_C(0x07D32E85538ACE95), UINT64_C(0xAD9F897CB8EDAF37) } },
+ { { UINT64_C(0x2F75B82FE3415498), UINT64_C(0xF99CAC5FF1015F30),
+ UINT64_C(0x766408247D7F25DE), UINT64_C(0x714BC9CDEE74C047),
+ UINT64_C(0x70F847BF07448879), UINT64_C(0xA14481DE072165C0) },
+ { UINT64_C(0x9BFA59E3DB1140A8), UINT64_C(0x7B9C7FF0FCD13502),
+ UINT64_C(0xF4D7538E68459ABF), UINT64_C(0xED93A791C8FC6AD2),
+ UINT64_C(0xA8BBE2A8B51BD9B2), UINT64_C(0x084B5A279FB34008) } },
+ { { UINT64_C(0xB3BB9545EB138C84), UINT64_C(0x59C3489C3FC88BFD),
+ UINT64_C(0x3A97FF6385F53EC7), UINT64_C(0x40FDF5A60AA69C3D),
+ UINT64_C(0x0E8CCEC753D19668), UINT64_C(0x0AA72EF933FAA661) },
+ { UINT64_C(0xF5C5A6CF9B1E684B), UINT64_C(0x630F937131A22EA1),
+ UINT64_C(0x06B2AAC2AC60F7EA), UINT64_C(0xB181CAE25BC37D80),
+ UINT64_C(0x4601A929247B13EA), UINT64_C(0x8A71C3865F739797) } },
+ { { UINT64_C(0x545387B3AB134786), UINT64_C(0x3179BB061599B64A),
+ UINT64_C(0xB0A6198607593574), UINT64_C(0xC7E39B2163FA7C3B),
+ UINT64_C(0xA1173F8691585D13), UINT64_C(0x09D5CC8ECB9525CD) },
+ { UINT64_C(0xAAD44FFD8F3A3451), UINT64_C(0x702B04F225820CC5),
+ UINT64_C(0xE90CAC491CB66C17), UINT64_C(0x40F6B547EE161DC4),
+ UINT64_C(0xC08BB8B41BA4AC4E), UINT64_C(0x7DC064FBAE5A6BC1) } },
+ { { UINT64_C(0x90A5E8719D76DDC7), UINT64_C(0x39DC8FAEEDFC8E2E),
+ UINT64_C(0x98467A235B079C62), UINT64_C(0xE25E378505450C98),
+ UINT64_C(0x2FE23A4D96140083), UINT64_C(0x65CE3B9AE9900312) },
+ { UINT64_C(0x1D87D0886B72B5D9), UINT64_C(0x72F53220FD9AFC82),
+ UINT64_C(0xC63C7C159E1F71FA), UINT64_C(0x90DF26EA8D449637),
+ UINT64_C(0x97089F40C1C2B215), UINT64_C(0x83AF266442317FAA) } },
+ },
+ {
+ { { UINT64_C(0xFA2DB51A8D688E31), UINT64_C(0x225B696CA09C88D4),
+ UINT64_C(0x9F88AF1D6059171F), UINT64_C(0x1C5FEA5E782A0993),
+ UINT64_C(0xE0FB15884EC710D3), UINT64_C(0xFAF372E5D32CE365) },
+ { UINT64_C(0xD9F896AB26506F45), UINT64_C(0x8D3503388373C724),
+ UINT64_C(0x1B76992DCA6E7342), UINT64_C(0x76338FCA6FD0C08B),
+ UINT64_C(0xC3EA4C65A00F5C23), UINT64_C(0xDFAB29B3B316B35B) } },
+ { { UINT64_C(0x84E5541F483AEBF9), UINT64_C(0x8ADFF7DC49165772),
+ UINT64_C(0xE0A43AD69BEAAD3C), UINT64_C(0x97DD1820F51C2714),
+ UINT64_C(0xAC2B4CB457EA5B0C), UINT64_C(0x87DBD011D11767CA) },
+ { UINT64_C(0x18CCF36CBFC7957A), UINT64_C(0xD4A088411BC79227),
+ UINT64_C(0x9811CE43D8D292A8), UINT64_C(0x72C5FC68D58C4EE7),
+ UINT64_C(0x5BC0F0BED35C65A7), UINT64_C(0x0B446DBCCBBF9669) } },
+ { { UINT64_C(0x7EBA3DA69CEE9BCE), UINT64_C(0x3E2C1248D5377750),
+ UINT64_C(0x8C917D982B93D8B2), UINT64_C(0xCA8FC6AC7CAD1F75),
+ UINT64_C(0x5F581F19A0FF150A), UINT64_C(0x872CC14AE08327FA) },
+ { UINT64_C(0xC774F187E9333188), UINT64_C(0x528ED4AC497AF7E8),
+ UINT64_C(0xCE036E9B8AD72B10), UINT64_C(0x463F9EBB917986CF),
+ UINT64_C(0xBE5163281325CF9B), UINT64_C(0xD28D5C50DD7E5FEA) } },
+ { { UINT64_C(0x714C1D1BDD58BBE3), UINT64_C(0x85BA01AE039AFD0F),
+ UINT64_C(0x7F23EA3A6951AC80), UINT64_C(0x5C599290AC00C837),
+ UINT64_C(0xF6EFA2B3BF24CC1B), UINT64_C(0x393D8E421E84462B) },
+ { UINT64_C(0x9BDA627DF8B89453), UINT64_C(0xE66FFF2EB23E0D1B),
+ UINT64_C(0xD1EE7089C3B94EC2), UINT64_C(0xF75DBA6E3031699A),
+ UINT64_C(0x8FF75F79242B2453), UINT64_C(0xE721EDEB289BFED4) } },
+ { { UINT64_C(0x083215A1C1390FA8), UINT64_C(0x901D686A6DCE8CE0),
+ UINT64_C(0x4AB1BA62837073FF), UINT64_C(0x10C287AA34BEABA5),
+ UINT64_C(0xB4931AF446985239), UINT64_C(0x07639899B053C4DC) },
+ { UINT64_C(0x29E7F44DE721EECD), UINT64_C(0x6581718257B3FF48),
+ UINT64_C(0x198542E25054E2E0), UINT64_C(0x923C9E1584616DE8),
+ UINT64_C(0x2A9C15E1AD465BB9), UINT64_C(0xD8D4EFC716319245) } },
+ { { UINT64_C(0x72DC79439961A674), UINT64_C(0x839A0A52A0E13668),
+ UINT64_C(0xD7A53FA9334945EA), UINT64_C(0xDB21DB77E7AA25DB),
+ UINT64_C(0xB6675A7D66E96DA3), UINT64_C(0x2C31C406E66F33C0) },
+ { UINT64_C(0x45020B626EC7B9CB), UINT64_C(0xFF46E9CD0391F267),
+ UINT64_C(0x7DABD7440FA2F221), UINT64_C(0x9A32364B9D4A2A3E),
+ UINT64_C(0xF0F84AE852D2E47A), UINT64_C(0xD0B872BB888F488A) } },
+ { { UINT64_C(0x531E4CEFC9790EEF), UINT64_C(0xF7B5735E2B8D1A58),
+ UINT64_C(0xB8882F1EEF568511), UINT64_C(0xAFB08D1C86A86DB3),
+ UINT64_C(0x88CB9DF2F54DE8C7), UINT64_C(0xA44234F19A683282) },
+ { UINT64_C(0xBC1B3D3AA6E9AB2E), UINT64_C(0xEFA071FB87FC99EE),
+ UINT64_C(0xFA3C737DA102DC0F), UINT64_C(0xDF3248A6D6A0CBD2),
+ UINT64_C(0x6E62A4FF1ECC1BF4), UINT64_C(0xF718F940C8F1BC17) } },
+ { { UINT64_C(0x2C8B0AAD4F63F026), UINT64_C(0x2AFF623850B253CC),
+ UINT64_C(0xCAB3E94210C4D122), UINT64_C(0x52B59F0407CD2816),
+ UINT64_C(0x22322803982C41FC), UINT64_C(0x38844E668CF50B19) },
+ { UINT64_C(0x42A959F7BE3264CD), UINT64_C(0xBDDC24BD6C983524),
+ UINT64_C(0xA489EB0C462B8640), UINT64_C(0xB7C0509298029BE7),
+ UINT64_C(0xD5546B5FA1ADDC64), UINT64_C(0xE7CAC1FCA0C655AF) } },
+ { { UINT64_C(0x1454719847636F97), UINT64_C(0x6FA67481EBCDCCFF),
+ UINT64_C(0xC164872F395D3258), UINT64_C(0xB8CECAFEEE6ACDBC),
+ UINT64_C(0x3FBFE5F3A933F180), UINT64_C(0xEC20CAC2898C3B1E) },
+ { UINT64_C(0x6A031BEE87DA73F9), UINT64_C(0xD1E667D15C5AF46E),
+ UINT64_C(0xCB3DC1681DC6EEF9), UINT64_C(0x2DD1BD9433D310C0),
+ UINT64_C(0x0F78D4939207E438), UINT64_C(0xC233D544A99C0E75) } },
+ { { UINT64_C(0x228F19F19E2A0113), UINT64_C(0x58495BE50E1A5D37),
+ UINT64_C(0x97E08F6938D7F364), UINT64_C(0x1EC3BA3E510759B0),
+ UINT64_C(0x3682F19AE03CD40D), UINT64_C(0xC87745D8F9E16D68) },
+ { UINT64_C(0xFD527AB509A642EA), UINT64_C(0x6308EEBDF9C81F27),
+ UINT64_C(0xFA9F666C550C5D68), UINT64_C(0xDEBA436F584AB153),
+ UINT64_C(0x1D4861D35B63E939), UINT64_C(0x073BED9BC9850221) } },
+ { { UINT64_C(0x802BCCF08B171246), UINT64_C(0xFFF7D15A733B072F),
+ UINT64_C(0xEA3862664CBFA4EF), UINT64_C(0x9E5B5073D635946B),
+ UINT64_C(0x16E9A979FA81BE95), UINT64_C(0x41E8716EB14F701F) },
+ { UINT64_C(0x25782E0F101A6719), UINT64_C(0x442C4875C9D66959),
+ UINT64_C(0x52D845D92B85D153), UINT64_C(0xFF9251382E831117),
+ UINT64_C(0x01B700CC8E02434B), UINT64_C(0xD2DB7F8EEC0BAE3E) } },
+ { { UINT64_C(0x1B225300966A4872), UINT64_C(0x40C149BE566F537B),
+ UINT64_C(0x3335F4D2CB680021), UINT64_C(0x773D0263778E5F5F),
+ UINT64_C(0x1D9B7602666FA9ED), UINT64_C(0x52490A102E6200CF) },
+ { UINT64_C(0x8434C7DD961F290B), UINT64_C(0x773AC15664456446),
+ UINT64_C(0x5E2BB78947B712BB), UINT64_C(0xFD3BCBFDBE0974AD),
+ UINT64_C(0x71AE9351791AD5D8), UINT64_C(0x1EE738BA6F4E1400) } },
+ { { UINT64_C(0x2FA428AB0BE8E26E), UINT64_C(0xFEFF0600BB4CF9FC),
+ UINT64_C(0x76F25CA9B2EA5FB0), UINT64_C(0xAB7FECF06835C5F4),
+ UINT64_C(0x649D077219D5F328), UINT64_C(0xABE7B895ACBCB12E) },
+ { UINT64_C(0xF2D1031AD69B1EA8), UINT64_C(0x46065D5DC60B0BBB),
+ UINT64_C(0xB0908DC185D798FF), UINT64_C(0x4E2420F0D2C9B18A),
+ UINT64_C(0x6B3A9BDDD30432A2), UINT64_C(0x501C3383C9B134AD) } },
+ { { UINT64_C(0x608F096798A21284), UINT64_C(0x5361BE86059CCEDE),
+ UINT64_C(0x3A40655CAFD87EF7), UINT64_C(0x03CF311759083AA2),
+ UINT64_C(0x57DB5F61B6C366D9), UINT64_C(0x29DC275B6DD0D232) },
+ { UINT64_C(0xBDAB24DD8FA67501), UINT64_C(0x5928F77565D08C37),
+ UINT64_C(0x9448A856645D466A), UINT64_C(0x6E6B5E2EC0E927A5),
+ UINT64_C(0xE884D546E80C6871), UINT64_C(0x10C881C953A9A851) } },
+ { { UINT64_C(0x355053749B627AA5), UINT64_C(0xE7CA1B577976677B),
+ UINT64_C(0x812397124976CE17), UINT64_C(0x96E9080B96DA31B9),
+ UINT64_C(0x458254ABCC64AA1F), UINT64_C(0xFEFF682148E674C9) },
+ { UINT64_C(0x8772F37A021F1488), UINT64_C(0x2E274E18AB56345C),
+ UINT64_C(0x7C7BE61C29823B76), UINT64_C(0x275DB7B29EEFB39E),
+ UINT64_C(0x83B10ED4BF5CBCEF), UINT64_C(0x40D7F5B4518E5183) } },
+ { { UINT64_C(0x315CCC01F960B41B), UINT64_C(0x90B417C91D99E722),
+ UINT64_C(0x84AFAA0D013463E0), UINT64_C(0xF133C5D813E6D9E1),
+ UINT64_C(0xD95C6ADC525B7430), UINT64_C(0x082C61AD7A25106A) },
+ { UINT64_C(0xABC1966DBA1CE179), UINT64_C(0xE0578B77A5DB529A),
+ UINT64_C(0x10988C05EC84107D), UINT64_C(0xFCADE5D71B207F83),
+ UINT64_C(0x0BEB6FDBC5BA83DB), UINT64_C(0x1C39B86D57537E34) } },
+ },
+ {
+ { { UINT64_C(0x5B0B5D692A7AECED), UINT64_C(0x4C03450C01DC545F),
+ UINT64_C(0x72AD0A4A404A3458), UINT64_C(0x1DE8E2559F467B60),
+ UINT64_C(0xA4B3570590634809), UINT64_C(0x76F30205706F0178) },
+ { UINT64_C(0x588D21AB4454F0E5), UINT64_C(0xD22DF54964134928),
+ UINT64_C(0xF4E7E73D241BCD90), UINT64_C(0xB8D8A1D22FACC7CC),
+ UINT64_C(0x483C35A71D25D2A0), UINT64_C(0x7F8D25451EF9F608) } },
+ { { UINT64_C(0xCB51F03954EBC926), UINT64_C(0xE235D356B8D4A7BB),
+ UINT64_C(0x93C8FAFAB41FE1A6), UINT64_C(0x6297701DA719F254),
+ UINT64_C(0x6E9165BC644F5CDE), UINT64_C(0x6506329D0C11C542) },
+ { UINT64_C(0xA2564809A92B4250), UINT64_C(0x0E9AC173889C2E3E),
+ UINT64_C(0x286A592622B1D1BE), UINT64_C(0x86A3D7526ECDD041),
+ UINT64_C(0x4B867E0A649F9524), UINT64_C(0x1FE7D95A0629CB0F) } },
+ { { UINT64_C(0xF4F66843CA5BAF54), UINT64_C(0x298DB357EFE7DB78),
+ UINT64_C(0xF607E86E7365712F), UINT64_C(0xD58822988A822BC0),
+ UINT64_C(0x2CFBD63AC61299B3), UINT64_C(0x6F713D9B67167B1A) },
+ { UINT64_C(0x750F673FDE0B077A), UINT64_C(0x07482708EE2178DA),
+ UINT64_C(0x5E6D5BD169123C75), UINT64_C(0x6A93D1B6EAB99B37),
+ UINT64_C(0x6EF4F7E68CAEC6A3), UINT64_C(0x7BE411D6CF3ED818) } },
+ { { UINT64_C(0xF92B307363A0A7D2), UINT64_C(0x32DA431C881DC8CF),
+ UINT64_C(0xE51BD5EDC578E3A3), UINT64_C(0xEFDA70D29587FA22),
+ UINT64_C(0xCFEC17089B2EBA85), UINT64_C(0x6AB51A4BAF7BA530) },
+ { UINT64_C(0x5AC155AE98174812), UINT64_C(0xCAF07A71CCB076E3),
+ UINT64_C(0x280E86C2C38718A7), UINT64_C(0x9D12DE73D63745B7),
+ UINT64_C(0x0E8EA855BF8A79AA), UINT64_C(0x5EB2BED8BD705BF7) } },
+ { { UINT64_C(0x33FE9578AE16DE53), UINT64_C(0x3AE85EB510BEC902),
+ UINT64_C(0xC4F4965844AF850E), UINT64_C(0x6EA222B3087DD658),
+ UINT64_C(0xB255E6FDA51F1447), UINT64_C(0xB35E4997117E3F48) },
+ { UINT64_C(0x562E813B05616CA1), UINT64_C(0xDF5925D68A61E156),
+ UINT64_C(0xB2FA8125571C728B), UINT64_C(0x00864805A2F2D1CF),
+ UINT64_C(0x2DC26F411BCCB6FF), UINT64_C(0xEBD5E09363AE37DD) } },
+ { { UINT64_C(0xD2D68BB30A285611), UINT64_C(0x3EAE7596DC8378F2),
+ UINT64_C(0x2DC6CCC66CC688A3), UINT64_C(0xC45E5713011F5DFB),
+ UINT64_C(0x6B9C4F6C62D34487), UINT64_C(0xFAD6F0771FC65551) },
+ { UINT64_C(0x5E3266E062B23B52), UINT64_C(0xF1DAF319E98F4715),
+ UINT64_C(0x064D12EA3ED0AE83), UINT64_C(0x5CCF9326564125CB),
+ UINT64_C(0x09057022C63C1E9F), UINT64_C(0x7171972CDC9B5D2E) } },
+ { { UINT64_C(0x2364FD9AEABD21B2), UINT64_C(0x3CE5F4BB9174AD6D),
+ UINT64_C(0xA4D6D5D0B38688C0), UINT64_C(0x2292A2D26D87FD7D),
+ UINT64_C(0x2A7D1B534CA02E54), UINT64_C(0x7BEE6E7EB4185715) },
+ { UINT64_C(0x73E546098FC63ACD), UINT64_C(0xF4D93A124064E09D),
+ UINT64_C(0xD20E157A2B92DAA5), UINT64_C(0x90D125DBC4B81A00),
+ UINT64_C(0xCB951C9E7682DE13), UINT64_C(0x1ABE58F427987545) } },
+ { { UINT64_C(0x6D35164030C70C8D), UINT64_C(0x8047D811CE2361B8),
+ UINT64_C(0x3F8B3D4FDF8E2C81), UINT64_C(0x5D59547733FA1F6C),
+ UINT64_C(0xF769FE5AE29B8A91), UINT64_C(0x26F0E606D737B2A2) },
+ { UINT64_C(0x70CBFA5DB8B31C6A), UINT64_C(0x0F883B4A863D3AEA),
+ UINT64_C(0x156A4479E386AE2F), UINT64_C(0xA17A2FCDADE8A684),
+ UINT64_C(0x78BDF958E2A7E335), UINT64_C(0xD1B4E6733B9E3041) } },
+ { { UINT64_C(0x1EAF48EC449A6D11), UINT64_C(0x6B94B8E46D2FA7B9),
+ UINT64_C(0x1D75D269728E4C1B), UINT64_C(0x91123819DD304E2C),
+ UINT64_C(0x0B34CAE388804F4B), UINT64_C(0x2BA192FBC5495E9A) },
+ { UINT64_C(0xC93FF6EFFF4D24BF), UINT64_C(0xF8C2C0B00342BA78),
+ UINT64_C(0x8041F769831EB94C), UINT64_C(0x353100747782985E),
+ UINT64_C(0xC755320B3AF84E83), UINT64_C(0x384B6D266F497E7F) } },
+ { { UINT64_C(0xEF92CD5917E6BD17), UINT64_C(0xA087305BA426965C),
+ UINT64_C(0x13895CE7AC47F773), UINT64_C(0xB85F2A9FE0BB2867),
+ UINT64_C(0x2926E6AA7CD7C58E), UINT64_C(0xE544EDA6450459C5) },
+ { UINT64_C(0x73DBC351B90A9849), UINT64_C(0x961183F6848EBE86),
+ UINT64_C(0xC45BB21080534712), UINT64_C(0x379D08D7A654D9A3),
+ UINT64_C(0x5B97CEF2BD3FFA9C), UINT64_C(0x0F469F34DDC2FCE5) } },
+ { { UINT64_C(0x6D1461080642F38D), UINT64_C(0x055171A0D21EB887),
+ UINT64_C(0x28DFFAB4D0DCEB28), UINT64_C(0x0D0E631298DE9CCD),
+ UINT64_C(0x750A9156118C3C3F), UINT64_C(0x8C1F1390B049D799) },
+ { UINT64_C(0xE4823858439607C5), UINT64_C(0x947E9BA05C111EAB),
+ UINT64_C(0x39C95616A355DF2E), UINT64_C(0xF5F6B98E10E54BDA),
+ UINT64_C(0xB0E0B33D142B876A), UINT64_C(0x71197D73EA18C90C) } },
+ { { UINT64_C(0x36A5139DF52BE819), UINT64_C(0xF60DDF3429A45D2B),
+ UINT64_C(0x0727EFECE9220E34), UINT64_C(0x431D33864EF7F446),
+ UINT64_C(0xC3165A64FCC4962C), UINT64_C(0xB7D926E1D64362BB) },
+ { UINT64_C(0x216BC61FD45F9350), UINT64_C(0xA974CB2FBBAED815),
+ UINT64_C(0x31DF342D86FB2F76), UINT64_C(0x3AB67E0501D78314),
+ UINT64_C(0x7AA951E0DEE33ED2), UINT64_C(0x318FBBBDCEC78D94) } },
+ { { UINT64_C(0xAD7EFB65B8FE0204), UINT64_C(0x0432E1C5230AB7F7),
+ UINT64_C(0x7563A62D9C967400), UINT64_C(0xD88B9C743524D4FF),
+ UINT64_C(0x16A1991CF1A823E3), UINT64_C(0xCF2F9BFEFA6F0FFB) },
+ { UINT64_C(0x55AAA946A50CA61F), UINT64_C(0x8CBBD3C8FED4CAB3),
+ UINT64_C(0x03A0FAB87651365A), UINT64_C(0x46B5234B62DC3913),
+ UINT64_C(0xFD875B28B558CBBD), UINT64_C(0xA48EC3AE11CEB361) } },
+ { { UINT64_C(0x5DD131A1B3ADBD8B), UINT64_C(0xF9FBCA3A29B45EF8),
+ UINT64_C(0x022048669341EE18), UINT64_C(0x8D13B89583BF9618),
+ UINT64_C(0x0E395BAEE807459C), UINT64_C(0xB9C110CCB190E7DB) },
+ { UINT64_C(0xA0DC345225D25063), UINT64_C(0x2FB78EC802371462),
+ UINT64_C(0xC3A9E7BB8975C2D5), UINT64_C(0x9466687285A78264),
+ UINT64_C(0x480D2CC28029AA92), UINT64_C(0x237086C75655726D) } },
+ { { UINT64_C(0x197F14BB65EB9EEE), UINT64_C(0xFC93125C9F12E5FD),
+ UINT64_C(0x9C20BC538BFBAE5E), UINT64_C(0xB35E21544BC053BA),
+ UINT64_C(0xE5FA9CC721C3898E), UINT64_C(0x502D72FFD42F950F) },
+ { UINT64_C(0x6812D38AD1EB8C31), UINT64_C(0x1F77F3F1080D30BB),
+ UINT64_C(0x18D128335A8B1E98), UINT64_C(0x7FD39FA9299196CE),
+ UINT64_C(0xFB8C9F11CF4ED6D6), UINT64_C(0x4C00F604D6363194) } },
+ { { UINT64_C(0x5C8AFCF9FA2A21C2), UINT64_C(0x71CBF2821928D133),
+ UINT64_C(0x56BEF28E42B29506), UINT64_C(0xAFBA250C70323DE2),
+ UINT64_C(0x3FE208D17DED2C30), UINT64_C(0xBD2CD213CE9AA598) },
+ { UINT64_C(0x52C5EC52CFEED070), UINT64_C(0x0A7223E7D3DA336B),
+ UINT64_C(0x7156A4EDCE156B46), UINT64_C(0x9AF6C499ED7E6159),
+ UINT64_C(0x9D7A679713C029AD), UINT64_C(0xE5B5C9249018DC77) } },
+ },
+ {
+ { { UINT64_C(0x3F2EFF53DE1E4E55), UINT64_C(0x6B749943E4D3ECC4),
+ UINT64_C(0xAF10B18A0DDE190D), UINT64_C(0xF491B98DA26B0409),
+ UINT64_C(0x66080782A2B1D944), UINT64_C(0x59277DC697E8C541) },
+ { UINT64_C(0xFDBFC5F6006F18AA), UINT64_C(0x435D165BFADD8BE1),
+ UINT64_C(0x8E5D263857645EF4), UINT64_C(0x31BCFDA6A0258363),
+ UINT64_C(0xF5330AB8D35D2503), UINT64_C(0xB71369F0C7CAB285) } },
+ { { UINT64_C(0xE6A19DCC40ACC5A8), UINT64_C(0x1C3A1FF1DBC6DBF8),
+ UINT64_C(0xB4D89B9FC6455613), UINT64_C(0x6CB0FE44A7390D0E),
+ UINT64_C(0xADE197A459EA135A), UINT64_C(0xDA6AA86520680982) },
+ { UINT64_C(0x03DB9BE95A442C1B), UINT64_C(0x221A2D732BFB93F2),
+ UINT64_C(0x44DEE8D4753C196C), UINT64_C(0x59ADCC700B7C6FF5),
+ UINT64_C(0xC6260EC24CA1B142), UINT64_C(0x4C3CB5C646CBD4F2) } },
+ { { UINT64_C(0x8A15D6FEA417111F), UINT64_C(0xFE4A16BD71D93FCC),
+ UINT64_C(0x7A7EE38C55BBE732), UINT64_C(0xEFF146A51FF94A9D),
+ UINT64_C(0xE572D13EDD585AB5), UINT64_C(0xD879790E06491A5D) },
+ { UINT64_C(0x9C84E1C52A58CB2E), UINT64_C(0xD79D13746C938630),
+ UINT64_C(0xDB12CD9B385F06C7), UINT64_C(0x0C93EB977A7759C3),
+ UINT64_C(0xF1F5B0FE683BD706), UINT64_C(0x541E4F7285EC3D50) } },
+ { { UINT64_C(0x9A0E153581833608), UINT64_C(0x5CCE871E6E2833AC),
+ UINT64_C(0xC17059EAFB29777C), UINT64_C(0x7E40E5FAE354CAFD),
+ UINT64_C(0x9CF594054D07C371), UINT64_C(0x64CE36B2A71C3945) },
+ { UINT64_C(0x69309E9656CAF487), UINT64_C(0x3D719E9F1AE3454B),
+ UINT64_C(0xF2164070E25823B6), UINT64_C(0xEAD851BD0BC27359),
+ UINT64_C(0x3D21BFE8B0925094), UINT64_C(0xA783B1E934A97F4E) } },
+ { { UINT64_C(0x406B0C269546491A), UINT64_C(0x9E5E15E2F293C4E5),
+ UINT64_C(0xC60D641315B164DB), UINT64_C(0x0DA46F530C75A78E),
+ UINT64_C(0x7C599BB7EA0C656B), UINT64_C(0x0F07A5121B1A8122) },
+ { UINT64_C(0x14C7204A15172686), UINT64_C(0x8FAEDFF85165625D),
+ UINT64_C(0x20F260CE37AEDE40), UINT64_C(0xC81F771E8F357FFE),
+ UINT64_C(0x25499197B0912557), UINT64_C(0x736197DC4C739C74) } },
+ { { UINT64_C(0x6151BAB1381B3462), UINT64_C(0x27E5A07843DBD344),
+ UINT64_C(0x2CB05BD6A1C3E9FB), UINT64_C(0x2A75976027CF2A11),
+ UINT64_C(0x0ADCF9DBFF43E702), UINT64_C(0x4BBF03E21F484146) },
+ { UINT64_C(0x0E74997F55B6521A), UINT64_C(0x15629231ADE17086),
+ UINT64_C(0x7F143E867493FC58), UINT64_C(0x60869095AF8B9670),
+ UINT64_C(0x482CFCD77E524869), UINT64_C(0x9E8060C31D454756) } },
+ { { UINT64_C(0xE495747AC88B4D3B), UINT64_C(0xB7559835AE8A948F),
+ UINT64_C(0x67EEF3A9DEB56853), UINT64_C(0x0E20E2699DEE5ADF),
+ UINT64_C(0x9031AF6761F0A1AA), UINT64_C(0x76669D32683402BC) },
+ { UINT64_C(0x90BD231306718B16), UINT64_C(0xE1B22A21864EFDAC),
+ UINT64_C(0xE4FFE9096620089F), UINT64_C(0xB84C842E3428E2D9),
+ UINT64_C(0x0E28C880FE3871FC), UINT64_C(0x8932F6983F21C200) } },
+ { { UINT64_C(0x603F00CE6C90EA5D), UINT64_C(0x6473930740A2F693),
+ UINT64_C(0xAF65148B2174E517), UINT64_C(0x162FC2CAF784AE74),
+ UINT64_C(0x0D9A88254D5F6458), UINT64_C(0x0C2D586143AACE93) },
+ { UINT64_C(0xBF1EADDE9F73CBFC), UINT64_C(0xDE9C34C09C68BBCA),
+ UINT64_C(0x6D95602D67EF8A1A), UINT64_C(0x0AF2581BA791B241),
+ UINT64_C(0x14F7736112CAD604), UINT64_C(0x19F2354DE2ACD1AD) } },
+ { { UINT64_C(0x272F78F60D60F263), UINT64_C(0xE7A8F4AF208FD785),
+ UINT64_C(0x10E191C636554F2C), UINT64_C(0x06D88551FD5CD0B3),
+ UINT64_C(0x29BF856857069C27), UINT64_C(0x3CE7ECD828AA6FAD) },
+ { UINT64_C(0x7D8A92D0E9F1A1D8), UINT64_C(0xD40C7FF8D30B5725),
+ UINT64_C(0x16BE6CB2F54CAEB8), UINT64_C(0x14CA471A14CB0A91),
+ UINT64_C(0xD5FF15B802733CAE), UINT64_C(0xCAF88D87DAA76580) } },
+ { { UINT64_C(0x39430E222C046592), UINT64_C(0x6CDAE81F1AD26706),
+ UINT64_C(0x8C102159A25D9106), UINT64_C(0x9A44057227CA9F30),
+ UINT64_C(0x8D34C43070287FBC), UINT64_C(0x9003A45529DB8AFA) },
+ { UINT64_C(0x91364CC37FD971AD), UINT64_C(0x7B3AA0489C60EDB7),
+ UINT64_C(0x58B0E008526F4DD8), UINT64_C(0xB7674454D86D98AE),
+ UINT64_C(0xC25F4051B2B45747), UINT64_C(0x8243BF9CCC043E8F) } },
+ { { UINT64_C(0xA89641C643A0C387), UINT64_C(0x6D92205C87B9AB17),
+ UINT64_C(0x37D691F4DAA0E102), UINT64_C(0xEB3E52D7CDE5312E),
+ UINT64_C(0x60D3C09916F518A2), UINT64_C(0x7854C0518A378EEB) },
+ { UINT64_C(0x7359DB514BBCAAC5), UINT64_C(0xF5B1B68C1713F102),
+ UINT64_C(0xDAEAE645E4398DE5), UINT64_C(0x8C8ACB6CD1ABFB82),
+ UINT64_C(0x2E8B76C3136423E2), UINT64_C(0x509DCB2DA8BA015E) } },
+ { { UINT64_C(0x2FF368159AD9C59C), UINT64_C(0xB189A4E8658E65B9),
+ UINT64_C(0x7D33DDBBEA786AD2), UINT64_C(0x96D0D648C0D2DC05),
+ UINT64_C(0x05E49256BFA03BE9), UINT64_C(0x0EA4E7A68BAF5A1C) },
+ { UINT64_C(0x3DDCE0B09F9AD5A8), UINT64_C(0xF78091959E49C2CB),
+ UINT64_C(0xBFCEF29D21782C2F), UINT64_C(0xE57AD39FC41BFD97),
+ UINT64_C(0xC04B93E81355AD19), UINT64_C(0xAABC9E6E59440F9F) } },
+ { { UINT64_C(0x7AA481035B6459DA), UINT64_C(0x83EF74770166E880),
+ UINT64_C(0x536182B1511CCE80), UINT64_C(0xAFDD2EEE73CA55AA),
+ UINT64_C(0xAB910D0DA8716143), UINT64_C(0x8BEAA42B83707250) },
+ { UINT64_C(0x4BCCFD898DA2AB3D), UINT64_C(0x1DBF68A9EC6AA105),
+ UINT64_C(0x32CE610868EB42DA), UINT64_C(0x5C2C2C858EA62E37),
+ UINT64_C(0x1ED2791FCD3088A7), UINT64_C(0x496B4FEBFF05070C) } },
+ { { UINT64_C(0x9FA9121A0AA629C5), UINT64_C(0xE286CFF157558BEC),
+ UINT64_C(0x4D9D657E59813A4D), UINT64_C(0xC4676A1626103519),
+ UINT64_C(0x616160B32BD4DF80), UINT64_C(0x26FB78CC30FBAE87) },
+ { UINT64_C(0x096070138F0F66BD), UINT64_C(0xDD4E2D0C03D9B90D),
+ UINT64_C(0x5D3A8912600D1B12), UINT64_C(0xF76DD52F4308E126),
+ UINT64_C(0x97CC04099E4FCCA6), UINT64_C(0x0CFBE31104C4DF7B) } },
+ { { UINT64_C(0x6CA62C1228437A23), UINT64_C(0x0DAF335340E7A003),
+ UINT64_C(0x1FD07DF0D20F8079), UINT64_C(0xEAE7969C3BBC9749),
+ UINT64_C(0x55861AFA9ECAD022), UINT64_C(0xEC41DAD91FBC3D4C) },
+ { UINT64_C(0x1FE4CB40DA8B261B), UINT64_C(0xC2671AB6427C5C9D),
+ UINT64_C(0xDFCDA7B8261D4939), UINT64_C(0x9E7B802B2072C0B9),
+ UINT64_C(0x3AFEE900C7828CC2), UINT64_C(0x3488BF28F6DE987F) } },
+ { { UINT64_C(0x33B9F2DE7BE1F89E), UINT64_C(0xD4E80821299B15C9),
+ UINT64_C(0x87A3067A0E13F37F), UINT64_C(0x6D4C09ED55FD239F),
+ UINT64_C(0x48B1042D92EF014F), UINT64_C(0xA382B2E0B385A759) },
+ { UINT64_C(0xBF571BB07F6F84F8), UINT64_C(0x25AFFA370CE87F50),
+ UINT64_C(0x826906D3FE54F1BC), UINT64_C(0x6B0421F4C53AE76A),
+ UINT64_C(0x44F85A3A4855EB3C), UINT64_C(0xF49E21518D1F2B27) } },
+ },
+ {
+ { { UINT64_C(0xC0426B775E3C647B), UINT64_C(0xBFCBD9398CF05348),
+ UINT64_C(0x31D312E3172C0D3D), UINT64_C(0x5F49FDE6EE754737),
+ UINT64_C(0x895530F06DA7EE61), UINT64_C(0xCF281B0AE8B3A5FB) },
+ { UINT64_C(0xFD14973541B8A543), UINT64_C(0x41A625A73080DD30),
+ UINT64_C(0xE2BAAE07653908CF), UINT64_C(0xC3D01436BA02A278),
+ UINT64_C(0xA0D0222E7B21B8F8), UINT64_C(0xFDC270E9D7EC1297) } },
+ { { UINT64_C(0x06A67BD29F101E64), UINT64_C(0xCB6E0AC7E1733A4A),
+ UINT64_C(0xEE0B5D5197BC62D2), UINT64_C(0x52B1703924C51874),
+ UINT64_C(0xFED1F42382A1A0D5), UINT64_C(0x55D90569DB6270AC) },
+ { UINT64_C(0x36BE4A9C5D73D533), UINT64_C(0xBE9266D6976ED4D5),
+ UINT64_C(0xC17436D3B8F8074B), UINT64_C(0x3BB4D399718545C6),
+ UINT64_C(0x8E1EA3555C757D21), UINT64_C(0xF7EDBC978C474366) } },
+ { { UINT64_C(0xEC72C6506EA83242), UINT64_C(0xF7DE7BE51B2D237F),
+ UINT64_C(0x3C5E22001819EFB0), UINT64_C(0xDF5AB6D68CDDE870),
+ UINT64_C(0x75A44E9D92A87AEE), UINT64_C(0xBDDC46F4BCF77F19) },
+ { UINT64_C(0x8191EFBD669B674D), UINT64_C(0x52884DF9ED71768F),
+ UINT64_C(0xE62BE58265CF242C), UINT64_C(0xAE99A3B180B1D17B),
+ UINT64_C(0x48CBB44692DE59A9), UINT64_C(0xD3C226CF2DCB3CE2) } },
+ { { UINT64_C(0x9580CDFB9FD94EC4), UINT64_C(0xED273A6C28631AD9),
+ UINT64_C(0x5D3D5F77C327F3E7), UINT64_C(0x05D5339C35353C5F),
+ UINT64_C(0xC56FB5FE5C258EB1), UINT64_C(0xEFF8425EEDCE1F79) },
+ { UINT64_C(0xAB7AA141CF83CF9C), UINT64_C(0xBD2A690A207D6D4F),
+ UINT64_C(0xE1241491458D9E52), UINT64_C(0xDD2448CCAA7F0F31),
+ UINT64_C(0xEC58D3C7F0FDA7AB), UINT64_C(0x7B6E122DC91BBA4D) } },
+ { { UINT64_C(0x2A2DEDAFB1B48156), UINT64_C(0xA0A2C63ABB93DB87),
+ UINT64_C(0xC655907808ACD99E), UINT64_C(0x03EA42AFFE4AC331),
+ UINT64_C(0x43D2C14AEB180ED6), UINT64_C(0xC2F293DDB1156A1A) },
+ { UINT64_C(0x1FAFABF5A9D81249), UINT64_C(0x39ADDEAD9A8EEE87),
+ UINT64_C(0x21E206F2119E2E92), UINT64_C(0xBC5DCC2ED74DCEB6),
+ UINT64_C(0x86647FA30A73A358), UINT64_C(0xEAD8BEA42F53F642) } },
+ { { UINT64_C(0x636225F591C09091), UINT64_C(0xCCF5070A71BDCFDF),
+ UINT64_C(0x0EF8D625B9668EE2), UINT64_C(0x57BDF6CDB5E04E4F),
+ UINT64_C(0xFC6AB0A67C75EA43), UINT64_C(0xEB6B8AFBF7FD6EF3) },
+ { UINT64_C(0x5B2AEEF02A3DF404), UINT64_C(0x31FD3B48B9823197),
+ UINT64_C(0x56226DB683A7EB23), UINT64_C(0x3772C21E5BB1ED2F),
+ UINT64_C(0x3E833624CD1ABA6A), UINT64_C(0xBAE58FFAAC672DAD) } },
+ { { UINT64_C(0xCE92224D31BA1705), UINT64_C(0x022C6ED2F0197F63),
+ UINT64_C(0x21F18D99A4DC1113), UINT64_C(0x5CD04DE803616BF1),
+ UINT64_C(0x6F9006799FF12E08), UINT64_C(0xF59A331548E61DDF) },
+ { UINT64_C(0x9474D42CB51BD024), UINT64_C(0x11A0A4139051E49D),
+ UINT64_C(0x79C92705DCE70EDB), UINT64_C(0x113CE27834198426),
+ UINT64_C(0x8978396FEA8616D2), UINT64_C(0x9A2A14D0EA894C36) } },
+ { { UINT64_C(0x4F1E1254604F6E4A), UINT64_C(0x4513B0880187D585),
+ UINT64_C(0x9022F25719E0F482), UINT64_C(0x51FB2A80E2239DBF),
+ UINT64_C(0x49940D9E998ED9D5), UINT64_C(0x0583D2416C932C5D) },
+ { UINT64_C(0x1188CEC8F25B73F7), UINT64_C(0xA28788CB3B3D06CD),
+ UINT64_C(0xDEA194ECA083DB5A), UINT64_C(0xD93A4F7E22DF4272),
+ UINT64_C(0x8D84E4BF6A009C49), UINT64_C(0x893D8DD93E3E4A9E) } },
+ { { UINT64_C(0x35E909EA33D31160), UINT64_C(0x5020316857172F1E),
+ UINT64_C(0x2707FC4451F3D866), UINT64_C(0xEB9D2018D2442A5D),
+ UINT64_C(0x904D72095DBFE378), UINT64_C(0x6DB132A35F13CF77) },
+ { UINT64_C(0x9D842BA67A3AF54B), UINT64_C(0x4E16EA195AA5B4F9),
+ UINT64_C(0x2BBA457CAF24228E), UINT64_C(0xCC04B3BB16F3C5FE),
+ UINT64_C(0xBAFAC51677E64944), UINT64_C(0x31580A34F08BCEE0) } },
+ { { UINT64_C(0xC6808DEE20C30ACA), UINT64_C(0xDADD216FA3EA2056),
+ UINT64_C(0xD331394E7A4A9F9D), UINT64_C(0x9E0441AD424C4026),
+ UINT64_C(0xAEED102F0AEB5350), UINT64_C(0xC6697FBBD45B09DA) },
+ { UINT64_C(0x52A2590EDEAC1496), UINT64_C(0x7142B831250B87AF),
+ UINT64_C(0xBEF2E68B6D0784A8), UINT64_C(0x5F62593AA5F71CEF),
+ UINT64_C(0x3B8F7616B5DA51A3), UINT64_C(0xC7A6FA0DB680F5FE) } },
+ { { UINT64_C(0x36C21DE699C8227C), UINT64_C(0xBEE3E867C26813B1),
+ UINT64_C(0x9B05F2E6BDD91549), UINT64_C(0x34FF2B1FA7D1110F),
+ UINT64_C(0x8E6953B937F67FD0), UINT64_C(0x56C7F18BC3183E20) },
+ { UINT64_C(0x48AF46DE9E2019ED), UINT64_C(0xDEAF972EF551BBBF),
+ UINT64_C(0x88EE38F8CC5E3EEF), UINT64_C(0xFB8D7A44392D6BAF),
+ UINT64_C(0x32293BFC0127187D), UINT64_C(0x7689E767E58647CC) } },
+ { { UINT64_C(0x00CE901B52168013), UINT64_C(0xC6BF8E38837AAE71),
+ UINT64_C(0xD6F11EFA167677D8), UINT64_C(0xE53BB48586C8E5CF),
+ UINT64_C(0x671167CEC48E74AB), UINT64_C(0x8A40218C8AD720A7) },
+ { UINT64_C(0x81E827A6E7C1191A), UINT64_C(0x54058F8DADDB153D),
+ UINT64_C(0x0BAF29250D950FA2), UINT64_C(0xC244674D576DDA13),
+ UINT64_C(0x8C4630AE41BCD13B), UINT64_C(0x6C2127BF5A077419) } },
+ { { UINT64_C(0xCF977FD5A83C501F), UINT64_C(0xD7C6DF36B6AB176F),
+ UINT64_C(0x117F6331397BC6B5), UINT64_C(0x72A6078BF7A2D491),
+ UINT64_C(0xE5A2AAED5242FE2E), UINT64_C(0x88ECFFDCFEBDC212) },
+ { UINT64_C(0xF2DBBF50CE33BA21), UINT64_C(0xE1343B76CEB19F07),
+ UINT64_C(0x1F32D4C9D2C28F71), UINT64_C(0x93FC64B418587685),
+ UINT64_C(0x39CEEF9BBA1F8BD1), UINT64_C(0x99C36A788D6D6BB0) } },
+ { { UINT64_C(0x0D0638173E9561CF), UINT64_C(0x1D8646AA3D33704D),
+ UINT64_C(0x8C4513847A08BA33), UINT64_C(0x96446BD3E02D6624),
+ UINT64_C(0x749849F02D6F4166), UINT64_C(0xE364DA0114268BF0) },
+ { UINT64_C(0x7CE4587E9AEBFCFD), UINT64_C(0xD468606456234393),
+ UINT64_C(0x00231D5116DF73B2), UINT64_C(0xF6A969B77279C78C),
+ UINT64_C(0x1FF1F6B66CB4117C), UINT64_C(0x30AEBC39D3EAB680) } },
+ { { UINT64_C(0x5CC97E6493EF00B9), UINT64_C(0xDAE13841972345AE),
+ UINT64_C(0x858391844788F43C), UINT64_C(0xD0FF521EE2E6CF3E),
+ UINT64_C(0xAED14A5B4B707C86), UINT64_C(0x7EAAE4A6D2523CF7) },
+ { UINT64_C(0x266472C5024C8AC6), UINT64_C(0xE47E1522C0170051),
+ UINT64_C(0x7B83DA6173826BAE), UINT64_C(0xE97E19F5CF543F0D),
+ UINT64_C(0x5D5248FA20BF38E2), UINT64_C(0x8A7C2F7DDF56A037) } },
+ { { UINT64_C(0xB04659DD87B0526C), UINT64_C(0x593C604A2307565E),
+ UINT64_C(0x49E522257C630AB8), UINT64_C(0x24C1D0C6DCE9CD23),
+ UINT64_C(0x6FDB241C85177079), UINT64_C(0x5F521D19F250C351) },
+ { UINT64_C(0xFB56134BA6FB61DF), UINT64_C(0xA4E70D69D75C07ED),
+ UINT64_C(0xB7A824487D8825A8), UINT64_C(0xA3AEA7D4DD64BBCC),
+ UINT64_C(0xD53E6E6C8692F539), UINT64_C(0x8DDDA83BF7AA4BC0) } },
+ },
+ {
+ { { UINT64_C(0x140A0F9FDD93D50A), UINT64_C(0x4799FFDE83B7ABAC),
+ UINT64_C(0x78FF7C2304A1F742), UINT64_C(0xC0568F51195BA34E),
+ UINT64_C(0xE97183603B7F78B4), UINT64_C(0x9CFD1FF1F9EFAA53) },
+ { UINT64_C(0xE924D2C5BB06022E), UINT64_C(0x9987FA86FAA2AF6D),
+ UINT64_C(0x4B12E73F6EE37E0F), UINT64_C(0x1836FDFA5E5A1DDE),
+ UINT64_C(0x7F1B92259DCD6416), UINT64_C(0xCB2C1B4D677544D8) } },
+ { { UINT64_C(0x0254486D9C213D95), UINT64_C(0x68A9DB56CB2F6E94),
+ UINT64_C(0xFB5858BA000F5491), UINT64_C(0x1315BDD934009FB6),
+ UINT64_C(0xB18A8E0AC42BDE30), UINT64_C(0xFDCF93D1F1070358) },
+ { UINT64_C(0xBEB1DB753022937E), UINT64_C(0x9B9ECA7ACAC20DB4),
+ UINT64_C(0x152214D4E4122B20), UINT64_C(0xD3E673F2AABCCC7B),
+ UINT64_C(0x94C50F64AED07571), UINT64_C(0xD767059AE66B4F17) } },
+ { { UINT64_C(0x40336B12DCD6D14B), UINT64_C(0xF6BCFF5DE3B4919C),
+ UINT64_C(0xC337048D9C841F0C), UINT64_C(0x4CE6D0251D617F50),
+ UINT64_C(0x00FEF2198117D379), UINT64_C(0x18B7C4E9F95BE243) },
+ { UINT64_C(0x98DE119E38DF08FF), UINT64_C(0xDFD803BD8D772D20),
+ UINT64_C(0x94125B720F9678BD), UINT64_C(0xFC5B57CD334ACE30),
+ UINT64_C(0x09486527B7E86E04), UINT64_C(0xFE9F8BCC6E552039) } },
+ { { UINT64_C(0x3B75C45BD6F5A10E), UINT64_C(0xFD4680F4C1C35F38),
+ UINT64_C(0x5450227DF8E0A113), UINT64_C(0x5E69F1AE73DDBA24),
+ UINT64_C(0x2007B80E57F24645), UINT64_C(0xC63695DC3D159741) },
+ { UINT64_C(0xCBE54D294530F623), UINT64_C(0x986AD5732869586B),
+ UINT64_C(0xE19F70594CC39F73), UINT64_C(0x80F00AB32B1B8DA9),
+ UINT64_C(0xB765AAF973F68D26), UINT64_C(0xBC79A394E993F829) } },
+ { { UINT64_C(0x9C441043F310D2A0), UINT64_C(0x2865EE58DC5EB106),
+ UINT64_C(0x71A959229CB8065C), UINT64_C(0x8EB3A733A052AF0F),
+ UINT64_C(0x56009F42B09D716E), UINT64_C(0xA7F923C5ABCBE6AD) },
+ { UINT64_C(0x263B7669FA375C01), UINT64_C(0x641C47E521EF27A2),
+ UINT64_C(0xA89B474EB08FFD25), UINT64_C(0x5BE8EC3FF0A239F3),
+ UINT64_C(0x0E79957A242A6C5A), UINT64_C(0x1DFB26D00C6C75F5) } },
+ { { UINT64_C(0x2FD97B9B9DFBF22A), UINT64_C(0xDEC16CC85643532D),
+ UINT64_C(0xDF0E6E3960FEE7C3), UINT64_C(0xD09AD7B6545860C8),
+ UINT64_C(0xCC16E98473FC3B7C), UINT64_C(0x6CE734C10D4E1555) },
+ { UINT64_C(0xC6EFE68B4B5F6032), UINT64_C(0x3A64F34C14F54073),
+ UINT64_C(0x25DA689CAC44DC95), UINT64_C(0x990C477E5358AD8A),
+ UINT64_C(0x00E958A5F36DA7DE), UINT64_C(0x902B7360C9B6F161) } },
+ { { UINT64_C(0x454AB42C9347B90A), UINT64_C(0xCAEBE64AA698B02B),
+ UINT64_C(0x119CDC69FB86FA40), UINT64_C(0x2E5CB7ADC3109281),
+ UINT64_C(0x67BB1EC5CD0C3D00), UINT64_C(0x5D430BC783F25BBF) },
+ { UINT64_C(0x69FD84A85CDE0ABB), UINT64_C(0x69DA263E9816B688),
+ UINT64_C(0xE52D93DF0E53CBB8), UINT64_C(0x42CF6F25ADD2D5A7),
+ UINT64_C(0x227BA59DC87CA88F), UINT64_C(0x7A1CA876DA738554) } },
+ { { UINT64_C(0x3FA5C1051CAC82C4), UINT64_C(0x23C760878A78C9BE),
+ UINT64_C(0xE98CDAD61C5CFA42), UINT64_C(0x09C302520A6C0421),
+ UINT64_C(0x149BAC7C42FC61B9), UINT64_C(0x3A1C22AC3004A3E2) },
+ { UINT64_C(0xDE6B0D6E202C7FED), UINT64_C(0xB2457377E7E63052),
+ UINT64_C(0x31725FD43706B3EF), UINT64_C(0xE16A347D2B1AFDBF),
+ UINT64_C(0xBE4850C48C29CF66), UINT64_C(0x8F51CC4D2939F23C) } },
+ { { UINT64_C(0x169E025B219AE6C1), UINT64_C(0x55FF526F116E1CA1),
+ UINT64_C(0x01B810A3B191F55D), UINT64_C(0x2D98127229588A69),
+ UINT64_C(0x53C9377048B92199), UINT64_C(0x8C7DD84E8A85236F) },
+ { UINT64_C(0x293D48B6CAACF958), UINT64_C(0x1F084ACB43572B30),
+ UINT64_C(0x628BFA2DFAD91F28), UINT64_C(0x8D627B11829386AF),
+ UINT64_C(0x3EC1DD00D44A77BE), UINT64_C(0x8D3B0D08649AC7F0) } },
+ { { UINT64_C(0x00A93DAA177513BF), UINT64_C(0x2EF0B96F42AD79E1),
+ UINT64_C(0x81F5AAF1A07129D9), UINT64_C(0xFC04B7EF923F2449),
+ UINT64_C(0x855DA79560CDB1B7), UINT64_C(0xB1EB5DABAD5D61D4) },
+ { UINT64_C(0xD2CEF1AE353FD028), UINT64_C(0xC21D54399EE94847),
+ UINT64_C(0x9ED552BB0380C1A8), UINT64_C(0xB156FE7A2BAC328F),
+ UINT64_C(0xBB7E01967213C6A4), UINT64_C(0x36002A331701ED5B) } },
+ { { UINT64_C(0x20B1632ADDC9EF4D), UINT64_C(0x2A35FF4C272D082B),
+ UINT64_C(0x30D39923F6CC9BD3), UINT64_C(0x6D879BC2E65C9D08),
+ UINT64_C(0xCE8274E16FA9983C), UINT64_C(0x652371E80EB7424F) },
+ { UINT64_C(0x32B77503C5C35282), UINT64_C(0xD7306333C885A931),
+ UINT64_C(0x8A16D71972955AA8), UINT64_C(0x5548F1637D51F882),
+ UINT64_C(0xB311DC66BABA59EF), UINT64_C(0x773D54480DB8F627) } },
+ { { UINT64_C(0x59B1B1347A62EB3B), UINT64_C(0x0F8CE157CCEEFB34),
+ UINT64_C(0x3FE842A8A798CB2B), UINT64_C(0xD01BC6260BF4161D),
+ UINT64_C(0x55EF6E554D016FDB), UINT64_C(0xCB561503B242B201) },
+ { UINT64_C(0x076EBC73AF4199C1), UINT64_C(0x39DEDCBB697244F7),
+ UINT64_C(0x9D184733040162BC), UINT64_C(0x902992C17F6B5FA6),
+ UINT64_C(0xAD1DE754BB4952B5), UINT64_C(0x7ACF1B93A121F6C8) } },
+ { { UINT64_C(0x7A56867C325C9B9A), UINT64_C(0x1A143999F3DC3D6A),
+ UINT64_C(0xCE10959003F5BCB8), UINT64_C(0x034E9035D6EEE5B7),
+ UINT64_C(0x2AFA81C8495DF1BC), UINT64_C(0x5EAB52DC08924D02) },
+ { UINT64_C(0xEE6AA014AA181904), UINT64_C(0xE62DEF09310AD621),
+ UINT64_C(0x6C9792FCC7538A03), UINT64_C(0xA89D3E883E41D789),
+ UINT64_C(0xD60FA11C9F94AE83), UINT64_C(0x5E16A8C2E0D6234A) } },
+ { { UINT64_C(0x87EC053DA9242F3B), UINT64_C(0x99544637F0E03545),
+ UINT64_C(0xEA0633FF6B7019E9), UINT64_C(0x8CB8AE0768DDDB5B),
+ UINT64_C(0x892E7C841A811AC7), UINT64_C(0xC7EF19EB73664249) },
+ { UINT64_C(0xD1B5819ACD1489E3), UINT64_C(0xF9C80FB0DE45D24A),
+ UINT64_C(0x045C21A683BB7491), UINT64_C(0xA65325BE73F7A47D),
+ UINT64_C(0x08D09F0E9C394F0C), UINT64_C(0xE7FB21C6268D4F08) } },
+ { { UINT64_C(0xC4CCAB956CA95C18), UINT64_C(0x563FFD56BC42E040),
+ UINT64_C(0xFA3C64D8E701C604), UINT64_C(0xC88D4426B0ABAFEE),
+ UINT64_C(0x1A353E5E8542E4C3), UINT64_C(0x9A2D8B7CED726186) },
+ { UINT64_C(0xD61CE19042D097FA), UINT64_C(0x6A63E280799A748B),
+ UINT64_C(0x0F48D0633225486B), UINT64_C(0x848F8FE142A3C443),
+ UINT64_C(0x2CCDE2508493CEF4), UINT64_C(0x5450A50845E77E7C) } },
+ { { UINT64_C(0xD0F4E24803112816), UINT64_C(0xFCAD9DDBCCBE9E16),
+ UINT64_C(0x177999BF5AE01EA0), UINT64_C(0xD20C78B9CE832DCE),
+ UINT64_C(0x3CC694FB50C8C646), UINT64_C(0x24D75968C93D4887) },
+ { UINT64_C(0x9F06366A87BC08AF), UINT64_C(0x59FAB50E7FD0DF2A),
+ UINT64_C(0x5FFCC7F76C4CC234), UINT64_C(0x87198DD765F52D86),
+ UINT64_C(0x5B9C94B0A855DF04), UINT64_C(0xD8BA6C738A067AD7) } },
+ },
+ {
+ { { UINT64_C(0x9E9AF3151C4C9D90), UINT64_C(0x8665C5A9D12E0A89),
+ UINT64_C(0x204ABD9258286493), UINT64_C(0x79959889B2E09205),
+ UINT64_C(0x0C727A3DFE56B101), UINT64_C(0xF366244C8B657F26) },
+ { UINT64_C(0xDE35D954CCA65BE2), UINT64_C(0x52EE1230B0FD41CE),
+ UINT64_C(0xFA03261F36019FEE), UINT64_C(0xAFDA42D966511D8F),
+ UINT64_C(0xF63211DD821148B9), UINT64_C(0x7B56AF7E6F13A3E1) } },
+ { { UINT64_C(0x47FE47995913E184), UINT64_C(0x5BBE584C82145900),
+ UINT64_C(0xB76CFA8B9A867173), UINT64_C(0x9BC87BF0514BF471),
+ UINT64_C(0x37392DCE71DCF1FC), UINT64_C(0xEC3EFAE03AD1EFA8) },
+ { UINT64_C(0xBBEA5A3414876451), UINT64_C(0x96E5F5436217090F),
+ UINT64_C(0x5B3D4ECD9B1665A9), UINT64_C(0xE7B0DF26E329DF22),
+ UINT64_C(0x18FB438E0BAA808D), UINT64_C(0x90757EBFDD516FAF) } },
+ { { UINT64_C(0x1E6F9A95D5A98D68), UINT64_C(0x759EA7DF849DA828),
+ UINT64_C(0x365D56256E8B4198), UINT64_C(0xE1B9C53B7A4A53F9),
+ UINT64_C(0x55DC1D50E32B9B16), UINT64_C(0xA4657EBBBB6D5701) },
+ { UINT64_C(0x4C270249EACC76E2), UINT64_C(0xBE49EC75162B1CC7),
+ UINT64_C(0x19A95B610689902B), UINT64_C(0xDD5706BFA4CFC5A8),
+ UINT64_C(0xD33BDB7314E5B424), UINT64_C(0x21311BD1E69EBA87) } },
+ { { UINT64_C(0x75BA2F9B72A21ACC), UINT64_C(0x356688D4A28EDB4C),
+ UINT64_C(0x3C339E0B610D080F), UINT64_C(0x614AC29333A99C2F),
+ UINT64_C(0xA5E23AF2AA580AFF), UINT64_C(0xA6BCB860E1FDBA3A) },
+ { UINT64_C(0xAA603365B43F9425), UINT64_C(0xAE8D7126F7EE4635),
+ UINT64_C(0xA2B2524456330A32), UINT64_C(0xC396B5BB9E025AA3),
+ UINT64_C(0xABBF77FAF8A0D5CF), UINT64_C(0xB322EE30EA31C83B) } },
+ { { UINT64_C(0x048813847890E234), UINT64_C(0x387F1159672E70C6),
+ UINT64_C(0x1468A6147B307F75), UINT64_C(0x56335B52ED85EC96),
+ UINT64_C(0xDA1BB60FD45BCAE9), UINT64_C(0x4D94F3F0F9FAEADD) },
+ { UINT64_C(0x6C6A7183FC78D86B), UINT64_C(0xA425B5C73018DEC6),
+ UINT64_C(0xB1549C332D877399), UINT64_C(0x6C41C50C92B2BC37),
+ UINT64_C(0x3A9F380C83EE0DDB), UINT64_C(0xDED5FEB6C4599E73) } },
+ { { UINT64_C(0x14D34C210B7F8354), UINT64_C(0x1475A1CD9177CE45),
+ UINT64_C(0x9F5F764A9B926E4B), UINT64_C(0x77260D1E05DD21FE),
+ UINT64_C(0x3C882480C4B937F7), UINT64_C(0xC92DCD39722372F2) },
+ { UINT64_C(0xF636A1BEEC6F657E), UINT64_C(0xB0E6C3121D30DD35),
+ UINT64_C(0xFE4B0528E4654EFE), UINT64_C(0x1C4A682021D230D2),
+ UINT64_C(0x615D2E4898FA45AB), UINT64_C(0x1F35D6D801FDBABF) } },
+ { { UINT64_C(0xA636EEB83A7B10D1), UINT64_C(0x4E1AE352F4A29E73),
+ UINT64_C(0x01704F5FE6BB1EC7), UINT64_C(0x75C04F720EF020AE),
+ UINT64_C(0x448D8CEE5A31E6A6), UINT64_C(0xE40A9C29208F994B) },
+ { UINT64_C(0x69E09A30FD8F9D5D), UINT64_C(0xE6A5F7EB449BAB7E),
+ UINT64_C(0xF25BC18A2AA1768B), UINT64_C(0x9449E4043C841234),
+ UINT64_C(0x7A3BF43E016A7BEF), UINT64_C(0xF25803E82A150B60) } },
+ { { UINT64_C(0xE44A2A57B215F9E0), UINT64_C(0x38B34DCE19066F0A),
+ UINT64_C(0x8BB91DAD40BB1BFB), UINT64_C(0x64C9F775E67735FC),
+ UINT64_C(0xDE14241788D613CD), UINT64_C(0xC5014FF51901D88D) },
+ { UINT64_C(0xA250341DF38116B0), UINT64_C(0xF96B9DD49D6CBCB2),
+ UINT64_C(0x15EC6C7276B3FAC2), UINT64_C(0x88F1952F8124C1E9),
+ UINT64_C(0x6B72F8EA975BE4F5), UINT64_C(0x23D288FF061F7530) } },
+ { { UINT64_C(0xEBFE3E5FAFB96CE3), UINT64_C(0x2275EDFBB1979537),
+ UINT64_C(0xC37AB9E8C97BA741), UINT64_C(0x446E4B1063D7C626),
+ UINT64_C(0xB73E2DCED025EB02), UINT64_C(0x1F952B517669EEA7) },
+ { UINT64_C(0xABDD00F66069A424), UINT64_C(0x1C0F9D9BDC298BFB),
+ UINT64_C(0x831B1FD3EB757B33), UINT64_C(0xD7DBE18359D60B32),
+ UINT64_C(0x663D1F369EF094B3), UINT64_C(0x1BD5732E67F7F11A) } },
+ { { UINT64_C(0x3C7FB3F5C75D8892), UINT64_C(0x2CFF9A0CBA68DA69),
+ UINT64_C(0x76455E8B60EC740B), UINT64_C(0x4B8D67FF167B88F0),
+ UINT64_C(0xEDEC0C025A4186B1), UINT64_C(0x127C462DBEBF35AB) },
+ { UINT64_C(0x9159C67E049430FC), UINT64_C(0x86B21DD2E7747320),
+ UINT64_C(0x0E0E01520CF27B89), UINT64_C(0x705F28F5CD1316B6),
+ UINT64_C(0x76751691BEAEA8A8), UINT64_C(0x4C73E282360C5B69) } },
+ { { UINT64_C(0x46BCC0D5FD7B3D74), UINT64_C(0x6F13C20E0DC4F410),
+ UINT64_C(0x98A1AF7D72F11CDF), UINT64_C(0x6099FD837928881C),
+ UINT64_C(0x66976356371BB94B), UINT64_C(0x673FBA7219B945AB) },
+ { UINT64_C(0xE4D8FA6EAED00700), UINT64_C(0xEA2313EC5C71A9F7),
+ UINT64_C(0xF9ED8268F99D4AEA), UINT64_C(0xADD8916442AB59C7),
+ UINT64_C(0xB37EB26F3F3A2D45), UINT64_C(0x0B39BD7AA924841E) } },
+ { { UINT64_C(0xD811EB32E03CDBBB), UINT64_C(0x12055F1D7CC3610E),
+ UINT64_C(0x6B23A1A0A9046E3F), UINT64_C(0x4D7121229DD4A749),
+ UINT64_C(0xB0C2ACA1B1BF0AC3), UINT64_C(0x71EFF575C1B0432F) },
+ { UINT64_C(0x6CD814922B44E285), UINT64_C(0x3088BD9CD87E8D20),
+ UINT64_C(0xACE218E5F567E8FA), UINT64_C(0xB3FA0424CF90CBBB),
+ UINT64_C(0xADBDA751770734D3), UINT64_C(0xBCD78BAD5AD6569A) } },
+ { { UINT64_C(0xCADB31FA7F39641F), UINT64_C(0x3EF3E295825E5562),
+ UINT64_C(0x4893C633F4094C64), UINT64_C(0x52F685F18ADDF432),
+ UINT64_C(0x9FD887AB7FDC9373), UINT64_C(0x47A9ADA0E8680E8B) },
+ { UINT64_C(0x579313B7F0CD44F6), UINT64_C(0xAC4B8668E188AE2E),
+ UINT64_C(0x648F43698FB145BD), UINT64_C(0xE0460AB374629E31),
+ UINT64_C(0xC25F28758FF2B05F), UINT64_C(0x4720C2B62D31EAEA) } },
+ { { UINT64_C(0x4603CDF413D48F80), UINT64_C(0x9ADB50E2A49725DA),
+ UINT64_C(0x8CD3305065DF63F0), UINT64_C(0x58D8B3BBCD643003),
+ UINT64_C(0x170A4F4AB739826B), UINT64_C(0x857772B51EAD0E17) },
+ { UINT64_C(0x01B78152E65320F1), UINT64_C(0xA6B4D845B7503FC0),
+ UINT64_C(0x0F5089B93DD50798), UINT64_C(0x488F200F5690B6BE),
+ UINT64_C(0x220B4ADF9E096F36), UINT64_C(0x474D7C9F8CE5BC7C) } },
+ { { UINT64_C(0xFED8C058C745F8C9), UINT64_C(0xB683179E291262D1),
+ UINT64_C(0x26ABD367D15EE88C), UINT64_C(0x29E8EED3F60A6249),
+ UINT64_C(0xED6008BB1E02D6E1), UINT64_C(0xD82ECF4CA6B12B8D) },
+ { UINT64_C(0x9929D021AAE4FA22), UINT64_C(0xBE4DEF14336A1AB3),
+ UINT64_C(0x529B7E098C80A312), UINT64_C(0xB059188DEE0EB0CE),
+ UINT64_C(0x1E42979A16DEAB7F), UINT64_C(0x2411034984EE9477) } },
+ { { UINT64_C(0xD65246852BE579CC), UINT64_C(0x849316F1C456FDED),
+ UINT64_C(0xC51B7DA42D1B67DA), UINT64_C(0xC25B539E41BC6D6A),
+ UINT64_C(0xE3B7CCA3A9BF8BED), UINT64_C(0x813EF18C045C15E4) },
+ { UINT64_C(0x5F3789A1697982C4), UINT64_C(0x4C1253698C435566),
+ UINT64_C(0x00A7AE6EDC0A92C6), UINT64_C(0x1ABC929B2F64A053),
+ UINT64_C(0xF4925C4C38666B44), UINT64_C(0xA81044B00F3DE7F6) } },
+ },
+ {
+ { { UINT64_C(0xBCC88422C2EC3731), UINT64_C(0x78A3E4D410DC4EC2),
+ UINT64_C(0x745DA1EF2571D6B1), UINT64_C(0xF01C2921739A956E),
+ UINT64_C(0xEFFD8065E4BFFC16), UINT64_C(0x6EFE62A1F36FE72C) },
+ { UINT64_C(0xF49E90D20F4629A4), UINT64_C(0xADD1DCC78CE646F4),
+ UINT64_C(0xCB78B583B7240D91), UINT64_C(0x2E1A7C3C03F8387F),
+ UINT64_C(0x16566C223200F2D9), UINT64_C(0x2361B14BAAF80A84) } },
+ { { UINT64_C(0xDB1CFFD2B5733309), UINT64_C(0x24BC250B0F9DD939),
+ UINT64_C(0xA4181E5AA3C1DB85), UINT64_C(0xE5183E51AC55D391),
+ UINT64_C(0x2793D5EFEFD270D0), UINT64_C(0x7D56F63DC0631546) },
+ { UINT64_C(0xECB40A590C1EE59D), UINT64_C(0xE613A9E4BB5BFA2C),
+ UINT64_C(0xA89B14AB6C5830F9), UINT64_C(0x4DC477DCA03F201E),
+ UINT64_C(0x5604F5DAC88C54F6), UINT64_C(0xD49264DC2ACFC66E) } },
+ { { UINT64_C(0x283DD7F01C4DFA95), UINT64_C(0xB898CC2C62C0B160),
+ UINT64_C(0xBA08C095870282AA), UINT64_C(0xB02B00D8F4E36324),
+ UINT64_C(0x53AADDC0604CECF2), UINT64_C(0xF1F927D384DDD24E) },
+ { UINT64_C(0x34BC00A0E2ABC9E1), UINT64_C(0x2DA1227D60289F88),
+ UINT64_C(0x5228EAAACEF68F74), UINT64_C(0x40A790D23C029351),
+ UINT64_C(0xE0E9AF5C8442E3B7), UINT64_C(0xA3214142A9F141E0) } },
+ { { UINT64_C(0x72F4949EF9A58E3D), UINT64_C(0x738C700BA48660A6),
+ UINT64_C(0x71B04726092A5805), UINT64_C(0xAD5C3C110F5CDB72),
+ UINT64_C(0xD4951F9E554BFC49), UINT64_C(0xEE594EE56131EBE7) },
+ { UINT64_C(0x37DA59F33C1AF0A9), UINT64_C(0xD7AFC73BCB040A63),
+ UINT64_C(0xD020962A4D89FA65), UINT64_C(0x2610C61E71D824F5),
+ UINT64_C(0x9C917DA73C050E31), UINT64_C(0x3840F92FE6E7EBFB) } },
+ { { UINT64_C(0x50FBD7FE8D8B8CED), UINT64_C(0xC7282F7547D240AE),
+ UINT64_C(0x79646A471930FF73), UINT64_C(0x2E0BAC4E2F7F5A77),
+ UINT64_C(0x0EE44FA526127E0B), UINT64_C(0x678881B782BC2AA7) },
+ { UINT64_C(0xB9E5D38467F5F497), UINT64_C(0x8F94A7D4A9B7106B),
+ UINT64_C(0xBF7E0B079D329F68), UINT64_C(0x169B93EA45D192FB),
+ UINT64_C(0xCCAA946720DBE8C0), UINT64_C(0xD4513A50938F9574) } },
+ { { UINT64_C(0x841C96B4054CB874), UINT64_C(0xD75B1AF1A3C26834),
+ UINT64_C(0x7237169DEE6575F0), UINT64_C(0xD71FC7E50322AADC),
+ UINT64_C(0xD7A23F1E949E3A8E), UINT64_C(0x77E2D102DD31D8C7) },
+ { UINT64_C(0x5AD69D09D10F5A1F), UINT64_C(0x526C9CB4B99D9A0B),
+ UINT64_C(0x521BB10B972B237D), UINT64_C(0x1E4CD42FA326F342),
+ UINT64_C(0x5BB6DB27F0F126CA), UINT64_C(0x587AF22CA4A515AD) } },
+ { { UINT64_C(0x1123A531B12E542F), UINT64_C(0x1D01A64DB9EB2811),
+ UINT64_C(0xA4A3515BF2D70F87), UINT64_C(0xFA205234B4BD0270),
+ UINT64_C(0x74B818305EDA26B9), UINT64_C(0x9305D6E656578E75) },
+ { UINT64_C(0xF38E69DE9F11BE19), UINT64_C(0x1E2A5C2344DBE89F),
+ UINT64_C(0x1077E7BCFD286654), UINT64_C(0xD36698940FCA4741),
+ UINT64_C(0x893BF904278F8497), UINT64_C(0xD6AC5F83EB3E14F4) } },
+ { { UINT64_C(0x327B9DAB488F5F74), UINT64_C(0x2B44F4B8CAB7364F),
+ UINT64_C(0xB4A6D22D19B6C6BD), UINT64_C(0xA087E613FC77CD3E),
+ UINT64_C(0x4558E327B0B49BC7), UINT64_C(0x188805BECD835D35) },
+ { UINT64_C(0x592F293CC1DC1007), UINT64_C(0xFAEE660F6AF02B44),
+ UINT64_C(0x5BFBB3BF904035F2), UINT64_C(0xD7C9AE6079C07E70),
+ UINT64_C(0xC5287DD4234896C2), UINT64_C(0xC4CE4523CB0E4121) } },
+ { { UINT64_C(0x3626B40658344831), UINT64_C(0xABCCE3568E55C984),
+ UINT64_C(0x495CC81C77241602), UINT64_C(0x4FB796766D70DF8F),
+ UINT64_C(0x6354B37C5B071DCA), UINT64_C(0x2CAD80A48C0FC0AD) },
+ { UINT64_C(0x18AADD51F68739B4), UINT64_C(0x1BFBB17747F09C6C),
+ UINT64_C(0x9355EA19A8FD51C4), UINT64_C(0x3D512A84EE58DB7B),
+ UINT64_C(0x70842AFDE9237640), UINT64_C(0x36F515CAACAF858D) } },
+ { { UINT64_C(0x3DDEC7C47E768B23), UINT64_C(0x97E13C53036D43ED),
+ UINT64_C(0x871E59253A39AB5F), UINT64_C(0x9AF292DE07E68E2B),
+ UINT64_C(0x411583494A40112E), UINT64_C(0xCDBB46AF3D4D97E6) },
+ { UINT64_C(0x2F8912933C0EBE40), UINT64_C(0x696C7EEE3EBAD1E5),
+ UINT64_C(0x8A5F3B6933B50D99), UINT64_C(0xB7BC48407ED47DDE),
+ UINT64_C(0x3A6F8E6C1E6706D8), UINT64_C(0x6A1479433D84BB8F) } },
+ { { UINT64_C(0xEC3A9C78603AE8D1), UINT64_C(0xBFE07E37228C29E5),
+ UINT64_C(0xB0385C5B396DBC2B), UINT64_C(0x7C14FE83DF85F41F),
+ UINT64_C(0xE2E64676ADFD463E), UINT64_C(0x5BEF10AA8BF9F23D) },
+ { UINT64_C(0xFA83EA0DF6BAB6DA), UINT64_C(0xCD0C8BA5966BF7E3),
+ UINT64_C(0xD62216B498501C2E), UINT64_C(0xB7F298A4C3E69F2D),
+ UINT64_C(0x42CEF13B9C8740F4), UINT64_C(0xBB317E520DD64307) } },
+ { { UINT64_C(0x22B6245C3FFEE775), UINT64_C(0x5C3F60BEB37CE7AA),
+ UINT64_C(0xDE195D40E1FEC0DF), UINT64_C(0x3BFAFBC5A0A82074),
+ UINT64_C(0xC36EC86AC72CA86A), UINT64_C(0x5606285113FD43EA) },
+ { UINT64_C(0x8686BE808E0B03A4), UINT64_C(0xC3BD1F93D540D440),
+ UINT64_C(0x13E4EBC0BF96CEC5), UINT64_C(0xE8E239849190C844),
+ UINT64_C(0x183593A600844802), UINT64_C(0x467168794D206878) } },
+ { { UINT64_C(0x358F394DB6F63D19), UINT64_C(0xA75D48496B052194),
+ UINT64_C(0x584035905C8D7975), UINT64_C(0x86DC9B6B6CBFBD77),
+ UINT64_C(0x2DB04D77647A51E5), UINT64_C(0x5E9A5B02F8950D88) },
+ { UINT64_C(0xCE69A7E5017168B0), UINT64_C(0x94630FACC4843AD3),
+ UINT64_C(0xB3B9D7361EFC44FF), UINT64_C(0xE729E9B6B14D7F93),
+ UINT64_C(0xA071FC60E0ED0ABC), UINT64_C(0xFC1A99718C8D9B83) } },
+ { { UINT64_C(0x49686031D138E975), UINT64_C(0x648640385A8EF0D1),
+ UINT64_C(0x32679713E7F7DE49), UINT64_C(0x5913234929D1CD1D),
+ UINT64_C(0x849AA23A20BE9ED2), UINT64_C(0x15D303E1284B3F33) },
+ { UINT64_C(0x37309475B63F9FE9), UINT64_C(0x327BAC8B45B7256A),
+ UINT64_C(0x291CD227D17FC5D3), UINT64_C(0x8291D8CDA973EDF1),
+ UINT64_C(0xF3843562437ABA09), UINT64_C(0x33FFB704271D0785) } },
+ { { UINT64_C(0x5248D6E447E11E5E), UINT64_C(0x0F66FC3C269C7ED3),
+ UINT64_C(0x18C0D2B9903E346E), UINT64_C(0xD81D9D974BEAE1B8),
+ UINT64_C(0x610326B0FC30FDF3), UINT64_C(0x2B13687019A7DFCD) },
+ { UINT64_C(0xEC75F70AB9527676), UINT64_C(0x90829F5129A3D897),
+ UINT64_C(0x92FE180997980302), UINT64_C(0xA3F2498E68474991),
+ UINT64_C(0x6A66307B0F22BBAD), UINT64_C(0x32014B9120378557) } },
+ { { UINT64_C(0x72CD7D553CD98610), UINT64_C(0xC3D560B074504ADF),
+ UINT64_C(0x23F0A982CEBB5D5D), UINT64_C(0x1431C15BB839DDB8),
+ UINT64_C(0x7E207CD8CEB72207), UINT64_C(0x28E0A848E7EFB28D) },
+ { UINT64_C(0xD22561FE1BD96F6E), UINT64_C(0x04812C1862A8236B),
+ UINT64_C(0xA0BF2334975491FA), UINT64_C(0x294F42A6435DF87F),
+ UINT64_C(0x2772B783A5D6F4F6), UINT64_C(0x348F92ED2724F853) } },
+ },
+ {
+ { { UINT64_C(0xC20FB9111A42E5E7), UINT64_C(0x075A678B81D12863),
+ UINT64_C(0x12BCBC6A5CC0AA89), UINT64_C(0x5279C6AB4FB9F01E),
+ UINT64_C(0xBC8E178911AE1B89), UINT64_C(0xAE74A706C290003C) },
+ { UINT64_C(0x9949D6EC79DF3F45), UINT64_C(0xBA18E26296C8D37F),
+ UINT64_C(0x68DE6EE2DD2275BF), UINT64_C(0xA9E4FFF8C419F1D5),
+ UINT64_C(0xBC759CA4A52B5A40), UINT64_C(0xFF18CBD863B0996D) } },
+ { { UINT64_C(0x73C57FDED7DD47E5), UINT64_C(0xB0FE5479D49A7F5D),
+ UINT64_C(0xD25C71F1CFB9821E), UINT64_C(0x9427E209CF6A1D68),
+ UINT64_C(0xBF3C3916ACD24E64), UINT64_C(0x7E9F5583BDA7B8B5) },
+ { UINT64_C(0xE7C5F7C8CF971E11), UINT64_C(0xEC16D5D73C7F035E),
+ UINT64_C(0x818DC472E66B277C), UINT64_C(0x4413FD47B2816F1E),
+ UINT64_C(0x40F262AF48383C6D), UINT64_C(0xFB0575844F190537) } },
+ { { UINT64_C(0x487EDC0708962F6B), UINT64_C(0x6002F1E7190A7E55),
+ UINT64_C(0x7FC62BEA10FDBA0C), UINT64_C(0xC836BBC52C3DBF33),
+ UINT64_C(0x4FDFB5C34F7D2A46), UINT64_C(0x824654DEDCA0DF71) },
+ { UINT64_C(0x30A076760C23902B), UINT64_C(0x7F1EBB9377FBBF37),
+ UINT64_C(0xD307D49DFACC13DB), UINT64_C(0x148D673AAE1A261A),
+ UINT64_C(0xE008F95B52D98650), UINT64_C(0xC76144409F558FDE) } },
+ { { UINT64_C(0x17CD6AF69CB16650), UINT64_C(0x86CC27C169F4EEBE),
+ UINT64_C(0x7E495B1D78822432), UINT64_C(0xFED338E31B974525),
+ UINT64_C(0x527743D386F3CE21), UINT64_C(0x87948AD3B515C896) },
+ { UINT64_C(0x9FDE7039B17F2FB8), UINT64_C(0xA2FA9A5FD9B89D96),
+ UINT64_C(0x5D46600B36FF74DC), UINT64_C(0x8EA74B048302C3C9),
+ UINT64_C(0xD560F570F744B5EB), UINT64_C(0xC921023BFE762402) } },
+ { { UINT64_C(0xA35AB657FFF4C8ED), UINT64_C(0x017C61248A5FABD7),
+ UINT64_C(0x5646302509ACDA28), UINT64_C(0x6038D36114CF238A),
+ UINT64_C(0x1428B1B6AF1B9F07), UINT64_C(0x5827FF447482E95C) },
+ { UINT64_C(0xCB997E18780FF362), UINT64_C(0x2B89D702E0BCAC1E),
+ UINT64_C(0xC632A0B5A837DDC8), UINT64_C(0xF3EFCF1F59762647),
+ UINT64_C(0xE9BA309A38B0D60A), UINT64_C(0x05DEABDD20B5FB37) } },
+ { { UINT64_C(0xD44E5DBACB8AF047), UINT64_C(0x15400CB4943CFE82),
+ UINT64_C(0xDBD695759DF88B67), UINT64_C(0x8299DB2BB2405A7D),
+ UINT64_C(0x46E3BF770B1D80CD), UINT64_C(0xC50CF66CE82BA3D9) },
+ { UINT64_C(0xB2910A07F2F747A9), UINT64_C(0xF6B669DB5ADC89C1),
+ UINT64_C(0x3B5EF1A09052B081), UINT64_C(0x0F5D5ED3B594ACE2),
+ UINT64_C(0xDA30B8D5D5F01320), UINT64_C(0x0D688C5EAAFCD58F) } },
+ { { UINT64_C(0x5EEE3A312A161074), UINT64_C(0x6BAAAE56EFE2BE37),
+ UINT64_C(0xF9787F61E3D78698), UINT64_C(0xC6836B2650630A30),
+ UINT64_C(0x7445B85D1445DEF1), UINT64_C(0xD72016A2D568A6A5) },
+ { UINT64_C(0x9DD6F533E355614F), UINT64_C(0x637E7E5F91E04588),
+ UINT64_C(0x42E142F3B9FB1391), UINT64_C(0x0D07C05C41AFE5DA),
+ UINT64_C(0xD7CD25C81394EDF1), UINT64_C(0xEBE6A0FCB99288EE) } },
+ { { UINT64_C(0xB8E63B7BBABBAD86), UINT64_C(0x63226A9F90D66766),
+ UINT64_C(0x263818365CF26666), UINT64_C(0xCCBD142D4CADD0BF),
+ UINT64_C(0xA070965E9AC29470), UINT64_C(0x6BDCA26025FF23ED) },
+ { UINT64_C(0xD4E00FD487DCA7B3), UINT64_C(0xA50978339E0E8734),
+ UINT64_C(0xF73F162E048173A4), UINT64_C(0xD23F91969C3C2FA2),
+ UINT64_C(0x9AB98B45E4AC397A), UINT64_C(0x2BAA0300543F2D4B) } },
+ { { UINT64_C(0xBBBE15E7C658C445), UINT64_C(0xB8CBCB20C28941D1),
+ UINT64_C(0x65549BE2027D6540), UINT64_C(0xEBBCA8021E8EF4F4),
+ UINT64_C(0x18214B4BD2ACA397), UINT64_C(0xCBEC7DE2E31784A3) },
+ { UINT64_C(0x96F0533F0116FDF3), UINT64_C(0x68911C905C8F5EE1),
+ UINT64_C(0x7DE9A3AED568603A), UINT64_C(0x3F56C52C6A3AD7B7),
+ UINT64_C(0x5BE9AFCA670B4D0E), UINT64_C(0x628BFEEE375DFE2F) } },
+ { { UINT64_C(0x97DAE81BDD4ADDB3), UINT64_C(0x12D2CF4E8704761B),
+ UINT64_C(0x5E820B403247788D), UINT64_C(0x82234B620051CA80),
+ UINT64_C(0x0C62704D6CB5EA74), UINT64_C(0xDE56042023941593) },
+ { UINT64_C(0xB3912A3CF1B04145), UINT64_C(0xE3967CD7AF93688D),
+ UINT64_C(0x2E2DCD2F58DABB4B), UINT64_C(0x6564836F0E303911),
+ UINT64_C(0x1F10F19BECE07C5C), UINT64_C(0xB47F07EED8919126) } },
+ { { UINT64_C(0xE3545085E9A2EEC9), UINT64_C(0x81866A972C8E51FE),
+ UINT64_C(0xD2BA7DB550027243), UINT64_C(0x29DAEAB54AE87DE4),
+ UINT64_C(0x5EF3D4B8684F9497), UINT64_C(0xE2DACE3B9D5D6873) },
+ { UINT64_C(0xF012C951FFD29C9C), UINT64_C(0x48289445ADBADA14),
+ UINT64_C(0x8751F50D89558C49), UINT64_C(0x75511A4F99E35BEE),
+ UINT64_C(0xEF802D6E7D59AA5F), UINT64_C(0x14FCAD65A2A795E2) } },
+ { { UINT64_C(0xC8EB00E808CB8F2C), UINT64_C(0x686075322B45BD86),
+ UINT64_C(0x7A29B45959969713), UINT64_C(0x5FA15B9BD684201B),
+ UINT64_C(0x1A853190B9E538EE), UINT64_C(0x4150605CD573D043) },
+ { UINT64_C(0xEF011D3BEB9FBB68), UINT64_C(0x6727998266AE32B6),
+ UINT64_C(0x861B86EA445DE5EC), UINT64_C(0x62837D18A34A50E1),
+ UINT64_C(0x228C006ABF5F0663), UINT64_C(0xE007FDE7396DB36A) } },
+ { { UINT64_C(0xDEE4F8815A916A55), UINT64_C(0x20DC0370F39C82CB),
+ UINT64_C(0xD9A7161540F09821), UINT64_C(0xD50AD8BFF7273492),
+ UINT64_C(0xA06F7D1232E7C4BF), UINT64_C(0xFA0F61544C5CEA36) },
+ { UINT64_C(0xF4FD9BED5FC49CFE), UINT64_C(0xD8CB45D1C9291678),
+ UINT64_C(0x94DB86CC7B92C9F2), UINT64_C(0x09CA5F3873C81169),
+ UINT64_C(0x109F40B0AEED06F0), UINT64_C(0x9F0360B214DCAA0A) } },
+ { { UINT64_C(0x4189B70DE12AD3E7), UINT64_C(0x5208ADB210B06607),
+ UINT64_C(0xEBD8E2A2EE8497FA), UINT64_C(0x61B1BD67E04F2ECB),
+ UINT64_C(0x0E2DDA724F3F5F99), UINT64_C(0xD5D96740F747B16D) },
+ { UINT64_C(0x308A48F6A6BF397F), UINT64_C(0x7021C3E523A93595),
+ UINT64_C(0xF10B022936470AA0), UINT64_C(0x7761E8EC4E03295B),
+ UINT64_C(0x16EFEF5807339770), UINT64_C(0x0D55D2DD5DA5DAA2) } },
+ { { UINT64_C(0x915EA6A38A22F87A), UINT64_C(0x191151C12E5A088E),
+ UINT64_C(0x190252F17F1D5CBE), UINT64_C(0xE43F59C33B0EC99B),
+ UINT64_C(0xBE8588D4FF2A6135), UINT64_C(0x103877CC2ECB4B9F) },
+ { UINT64_C(0x8F4147E5023CF92B), UINT64_C(0xC24384CC0CC2085B),
+ UINT64_C(0x6A2DB4A2D082D311), UINT64_C(0x06283811ED7BA9AE),
+ UINT64_C(0xE9A3F5322A8E1592), UINT64_C(0xAC20F0F45A59E894) } },
+ { { UINT64_C(0x788CAA5274AAB4B1), UINT64_C(0xEB84ABA12FEAFC7E),
+ UINT64_C(0x31DA71DAAC04FF77), UINT64_C(0x39D12EB924E4D0BF),
+ UINT64_C(0x4F2F292F87A34EF8), UINT64_C(0x9B324372A237A8ED) },
+ { UINT64_C(0xBB2D04B12EE3A82D), UINT64_C(0xED4FF367D18D36B2),
+ UINT64_C(0x99D231EEA6EA0138), UINT64_C(0x7C2D4F064F92E04A),
+ UINT64_C(0x78A82AB2CA272FD0), UINT64_C(0x7EC41340AB8CDC32) } },
+ },
+ {
+ { { UINT64_C(0xD23658C8D2E15A8C), UINT64_C(0x23F93DF716BA28CA),
+ UINT64_C(0x6DAB10EC082210F1), UINT64_C(0xFB1ADD91BFC36490),
+ UINT64_C(0xEDA8B02F9A4F2D14), UINT64_C(0x9060318C56560443) },
+ { UINT64_C(0x6C01479E64711AB2), UINT64_C(0x41446FC7E337EB85),
+ UINT64_C(0x4DCF3C1D71888397), UINT64_C(0x87A9C04E13C34FD2),
+ UINT64_C(0xFE0E08EC510C15AC), UINT64_C(0xFC0D0413C0F495D2) } },
+ { { UINT64_C(0xEB05C516156636C2), UINT64_C(0x2F613ABA090E93FC),
+ UINT64_C(0xCFD573CD489576F5), UINT64_C(0xE6535380535A8D57),
+ UINT64_C(0x13947314671436C4), UINT64_C(0x1172FB0C5F0A122D) },
+ { UINT64_C(0xAECC7EC1C12F58F6), UINT64_C(0xFE42F9578E41AFD2),
+ UINT64_C(0xDF96F6523D4221AA), UINT64_C(0xFEF5649F2851996B),
+ UINT64_C(0x46FB9F26D5CFB67E), UINT64_C(0xB047BFC7EF5C4052) } },
+ { { UINT64_C(0x5CBDC442F4484374), UINT64_C(0x6B156957F92452EF),
+ UINT64_C(0x58A26886C118D02A), UINT64_C(0x87FF74E675AAF276),
+ UINT64_C(0xB133BE95F65F6EC1), UINT64_C(0xA89B62844B1B8D32) },
+ { UINT64_C(0xDD8A8EF309C81004), UINT64_C(0x7F8225DB0CF21991),
+ UINT64_C(0xD525A6DB26623FAF), UINT64_C(0xF2368D40BAE15453),
+ UINT64_C(0x55D6A84D84F89FC9), UINT64_C(0xAF38358A86021A3E) } },
+ { { UINT64_C(0xBD048BDCFF52E280), UINT64_C(0x8A51D0B2526A1795),
+ UINT64_C(0x40AAA758A985AC0F), UINT64_C(0x6039BCDCF2C7ACE9),
+ UINT64_C(0x712092CC6AEC347D), UINT64_C(0x7976D0906B5ACAB7) },
+ { UINT64_C(0x1EBCF80D6EED9617), UINT64_C(0xB3A63149B0F404A4),
+ UINT64_C(0x3FDD3D1AD0B610EF), UINT64_C(0xDD3F6F9498C28AC7),
+ UINT64_C(0x650B77943A59750F), UINT64_C(0xEC59BAB12D3991AC) } },
+ { { UINT64_C(0x01F40E882E552766), UINT64_C(0x1FE3D50966F5354F),
+ UINT64_C(0x0E46D006B3A8EA7F), UINT64_C(0xF75AB629F831CD6A),
+ UINT64_C(0xDAD808D791465119), UINT64_C(0x442405AF17EF9B10) },
+ { UINT64_C(0xD5FE0A96672BDFCB), UINT64_C(0xA9DFA422355DBDEC),
+ UINT64_C(0xFDB79AA179B25636), UINT64_C(0xE7F26FFDEECE8AEC),
+ UINT64_C(0xB59255507EDD5AA2), UINT64_C(0x2C8F6FF08EB3A6C2) } },
+ { { UINT64_C(0x88887756757D6136), UINT64_C(0xAD9AC18388B92E72),
+ UINT64_C(0x92CB2FC48785D3EB), UINT64_C(0xD1A542FE9319764B),
+ UINT64_C(0xAF4CC78F626A62F8), UINT64_C(0x7F3F5FC926BFFAAE) },
+ { UINT64_C(0x0A203D4340AE2231), UINT64_C(0xA8BFD9E0387898E8),
+ UINT64_C(0x1A0C379C474B7DDD), UINT64_C(0x03855E0A34FD49EA),
+ UINT64_C(0x02B26223B3EF4AE1), UINT64_C(0x804BD8CFE399E0A3) } },
+ { { UINT64_C(0x11A9F3D0DE865713), UINT64_C(0x81E36B6BBDE98821),
+ UINT64_C(0x324996C86AA891D0), UINT64_C(0x7B95BDC1395682B5),
+ UINT64_C(0x47BF2219C1600563), UINT64_C(0x7A473F50643E38B4) },
+ { UINT64_C(0x0911F50AF5738288), UINT64_C(0xDF947A706F9C415B),
+ UINT64_C(0xBDB994F267A067F6), UINT64_C(0x3F4BEC1B88BE96CD),
+ UINT64_C(0x9820E931E56DD6D9), UINT64_C(0xB138F14F0A80F419) } },
+ { { UINT64_C(0xA11A1A8F0429077A), UINT64_C(0x2BB1E33D10351C68),
+ UINT64_C(0x3C25ABFE89459A27), UINT64_C(0x2D0091B86B8AC774),
+ UINT64_C(0xDAFC78533B2415D9), UINT64_C(0xDE713CF19201680D) },
+ { UINT64_C(0x8E5F445D68889D57), UINT64_C(0x608B209C60EABF5B),
+ UINT64_C(0x10EC0ACCF9CFA408), UINT64_C(0xD5256B9D4D1EE754),
+ UINT64_C(0xFF866BAB0AA6C18D), UINT64_C(0x9D196DB8ACB90A45) } },
+ { { UINT64_C(0xA46D76A9B9B081B2), UINT64_C(0xFC743A1062163C25),
+ UINT64_C(0xCD2A5C8D7761C392), UINT64_C(0x39BDDE0BBE808583),
+ UINT64_C(0x7C416021B98E4DFE), UINT64_C(0xF930E56365913A44) },
+ { UINT64_C(0xC3555F7E7585CF3C), UINT64_C(0xC737E3833D6333D5),
+ UINT64_C(0x5B60DBA4B430B03D), UINT64_C(0x42B715EBE7555404),
+ UINT64_C(0x571BDF5B7C7796E3), UINT64_C(0x33DC62C66DB6331F) } },
+ { { UINT64_C(0x3FB9CCB0E61DEE59), UINT64_C(0xC5185F2318B14DB9),
+ UINT64_C(0x1B2ADC4F845EF36C), UINT64_C(0x195D5B505C1A33AB),
+ UINT64_C(0x8CEA528E421F59D2), UINT64_C(0x7DFCCECFD2931CEA) },
+ { UINT64_C(0x51FFA1D58CF7E3F7), UINT64_C(0xF01B7886BDC9FB43),
+ UINT64_C(0xD65AB610261A0D35), UINT64_C(0x84BCBAFD7574A554),
+ UINT64_C(0x4B119956FAD70208), UINT64_C(0xDDC329C24FAB5243) } },
+ { { UINT64_C(0x1A08AA579CE92177), UINT64_C(0x3395E557DC2B5C36),
+ UINT64_C(0xFDFE7041394ED04E), UINT64_C(0xB797EB24C6DFCDDE),
+ UINT64_C(0x284A6B2ACB9DE5D6), UINT64_C(0xE0BD95C807222765) },
+ { UINT64_C(0x114A951B9FE678A7), UINT64_C(0xE7ECD0BD9E4954EC),
+ UINT64_C(0x7D4096FE79F0B8A9), UINT64_C(0xBDB26E9A09724FE2),
+ UINT64_C(0x08741AD8F787AF95), UINT64_C(0x2BF9727224045AD8) } },
+ { { UINT64_C(0xAB1FEDD9A9451D57), UINT64_C(0xDF4D91DF483E38C9),
+ UINT64_C(0x2D54D31124E9CF8E), UINT64_C(0x9C2A5AF87A22EEB6),
+ UINT64_C(0xBD9861EF0A43F123), UINT64_C(0x581EA6A238A18B7B) },
+ { UINT64_C(0xAF339C85296470A3), UINT64_C(0xF9603FCDAFD8203E),
+ UINT64_C(0x95D0535096763C28), UINT64_C(0x15445C16860EC831),
+ UINT64_C(0x2AFB87286867A323), UINT64_C(0x4B152D6D0C4838BF) } },
+ { { UINT64_C(0x45BA0E4F837CACBA), UINT64_C(0x7ADB38AEC0725275),
+ UINT64_C(0x19C82831942D3C28), UINT64_C(0x94F4731D6D0FE7DD),
+ UINT64_C(0xC3C07E134898F1E6), UINT64_C(0x76350EACED410B51) },
+ { UINT64_C(0x0FA8BECAF99AACFC), UINT64_C(0x2834D86F65FAF9CF),
+ UINT64_C(0x8E62846A6F3866AF), UINT64_C(0xDAA9BD4F3DFD6A2B),
+ UINT64_C(0xC27115BBA6132655), UINT64_C(0x83972DF7BD5A32C2) } },
+ { { UINT64_C(0xA330CB5BD513B825), UINT64_C(0xAE18B2D3EE37BEC3),
+ UINT64_C(0xFC3AB80AF780A902), UINT64_C(0xD7835BE2D607DDF1),
+ UINT64_C(0x8120F7675B6E4C2B), UINT64_C(0xAA8C385967E78CCB) },
+ { UINT64_C(0xA8DA8CE2AA0ED321), UINT64_C(0xCB8846FDD766341A),
+ UINT64_C(0xF2A342EE33DC9D9A), UINT64_C(0xA519E0BED0A18A80),
+ UINT64_C(0x9CDAA39CAF48DF4C), UINT64_C(0xA4B500CA7E0C19EE) } },
+ { { UINT64_C(0x83A7FD2F8217001B), UINT64_C(0x4F6FCF064296A8BA),
+ UINT64_C(0x7D74864391619927), UINT64_C(0x174C1075941E4D41),
+ UINT64_C(0x037EDEBDA64F5A6C), UINT64_C(0xCF64DB3A6E29DC56) },
+ { UINT64_C(0x150B3ACE37C0B9F4), UINT64_C(0x1323234A7168178B),
+ UINT64_C(0x1CE47014EF4D1879), UINT64_C(0xA22E374217FB4D5C),
+ UINT64_C(0x69B81822D985F794), UINT64_C(0x199C21C4081D7214) } },
+ { { UINT64_C(0x160BC7A18F04B4D2), UINT64_C(0x79CA81DDB10DE174),
+ UINT64_C(0xE2A280B02DA1E9C7), UINT64_C(0xB4F6BD991D6A0A29),
+ UINT64_C(0x57CF3EDD1C5B8F27), UINT64_C(0x7E34FC57158C2FD4) },
+ { UINT64_C(0x828CFD89CAC93459), UINT64_C(0x9E631B6FB7AF499F),
+ UINT64_C(0xF4DC8BC0DA26C135), UINT64_C(0x6128ED3937186735),
+ UINT64_C(0xBB45538B67BF0BA5), UINT64_C(0x1ADDD4C10064A3AB) } },
+ },
+ {
+ { { UINT64_C(0xC32730E8DD14D47E), UINT64_C(0xCDC1FD42C0F01E0F),
+ UINT64_C(0x2BACFDBF3F5CD846), UINT64_C(0x45F364167272D4DD),
+ UINT64_C(0xDD813A795EB75776), UINT64_C(0xB57885E450997BE2) },
+ { UINT64_C(0xDA054E2BDB8C9829), UINT64_C(0x4161D820AAB5A594),
+ UINT64_C(0x4C428F31026116A3), UINT64_C(0x372AF9A0DCD85E91),
+ UINT64_C(0xFDA6E903673ADC2D), UINT64_C(0x4526B8ACA8DB59E6) } },
+ { { UINT64_C(0x68FE359DE23A8472), UINT64_C(0x43EB12BD4CE3C101),
+ UINT64_C(0x0EC652C3FC704935), UINT64_C(0x1EEFF1F952E4E22D),
+ UINT64_C(0xBA6777CB083E3ADA), UINT64_C(0xAB52D7DC8BEFC871) },
+ { UINT64_C(0x4EDE689F497CBD59), UINT64_C(0xC8AE42B927577DD9),
+ UINT64_C(0xE0F080517AB83C27), UINT64_C(0x1F3D5F252C8C1F48),
+ UINT64_C(0x57991607AF241AAC), UINT64_C(0xC4458B0AB8A337E0) } },
+ { { UINT64_C(0x3DBB3FA651DD1BA9), UINT64_C(0xE53C1C4D545E960B),
+ UINT64_C(0x35AC6574793CE803), UINT64_C(0xB2697DC783DBCE4F),
+ UINT64_C(0xE35C5BF2E13CF6B0), UINT64_C(0x35034280B0C4A164) },
+ { UINT64_C(0xAA490908D9C0D3C1), UINT64_C(0x2CCE614DCB4D2E90),
+ UINT64_C(0xF646E96C54D504E4), UINT64_C(0xD74E7541B73310A3),
+ UINT64_C(0xEAD7159618BDE5DA), UINT64_C(0x96E7F4A8AA09AEF7) } },
+ { { UINT64_C(0xA8393A245D6E5F48), UINT64_C(0x2C8D7EA2F9175CE8),
+ UINT64_C(0xD8824E0255A20268), UINT64_C(0x9DD9A272A446BCC6),
+ UINT64_C(0xC929CDED5351499B), UINT64_C(0xEA5AD9ECCFE76535) },
+ { UINT64_C(0x26F3D7D9DC32D001), UINT64_C(0x51C3BE8343EB9689),
+ UINT64_C(0x91FDCC06759E6DDB), UINT64_C(0xAC2E1904E302B891),
+ UINT64_C(0xAD25C645C207E1F7), UINT64_C(0x28A70F0DAB3DEB4A) } },
+ { { UINT64_C(0x922D7F9703BEA8F1), UINT64_C(0x3AD820D4584570BE),
+ UINT64_C(0x0CE0A8503CD46B43), UINT64_C(0x4C07911FAE66743D),
+ UINT64_C(0x66519EB9FDA60023), UINT64_C(0x7F83004BEC2ACD9C) },
+ { UINT64_C(0x001E0B80C3117EAD), UINT64_C(0xBB72D5410722BA25),
+ UINT64_C(0x3AF7DB966E9A5078), UINT64_C(0x86C5774E701B6B4C),
+ UINT64_C(0xBD2C0E8E37824DB5), UINT64_C(0x3AE3028CBFAC286D) } },
+ { { UINT64_C(0x83D4D4A8A33E071B), UINT64_C(0x881C0A9261444BB5),
+ UINT64_C(0xEEA1E292520E3BC3), UINT64_C(0x5A5F4C3C2AAAB729),
+ UINT64_C(0x0B766C5EE63C7C94), UINT64_C(0x62BB8A9FBB2CC79C) },
+ { UINT64_C(0x97ADC7D2AA5DC49D), UINT64_C(0x30CC26B331718681),
+ UINT64_C(0xAC86E6FF56E86EDE), UINT64_C(0x37BCA7A2CD52F7F2),
+ UINT64_C(0x734D2C949CE6D87F), UINT64_C(0x06A71D71C2F7E0CA) } },
+ { { UINT64_C(0x559DCF75C6357D33), UINT64_C(0x4616D940652517DE),
+ UINT64_C(0x3D576B981CCF207B), UINT64_C(0x51E2D1EF1979F631),
+ UINT64_C(0x57517DDD06AE8296), UINT64_C(0x309A3D7FD6E7151F) },
+ { UINT64_C(0xBA2A23E60E3A6FE5), UINT64_C(0x76CF674AD28B22C3),
+ UINT64_C(0xD235AD07F8B808C3), UINT64_C(0x7BBF4C586B71213A),
+ UINT64_C(0x0676792E93271EBB), UINT64_C(0x2CFD2C7605B1FC31) } },
+ { { UINT64_C(0x4258E5C037A450F5), UINT64_C(0xC3245F1B52D2B118),
+ UINT64_C(0x6DF7B48482BC5963), UINT64_C(0xE520DA4D9C273D1E),
+ UINT64_C(0xED78E0122C3010E5), UINT64_C(0x112229483C1D4C05) },
+ { UINT64_C(0xE3DAE5AFC692B490), UINT64_C(0x3272BD10C197F793),
+ UINT64_C(0xF7EAE411E709ACAA), UINT64_C(0x00B0C95F778270A6),
+ UINT64_C(0x4DA76EE1220D4350), UINT64_C(0x521E1461AB71E308) } },
+ { { UINT64_C(0x7B654323343196A3), UINT64_C(0x35D442ADB0C95250),
+ UINT64_C(0x38AF50E6E264FF17), UINT64_C(0x28397A412030D2EA),
+ UINT64_C(0x8F1D84E9F74EEDA1), UINT64_C(0xD521F92DE6FB3C52) },
+ { UINT64_C(0xAF358D7795733811), UINT64_C(0xEBFDDD0193ABFE94),
+ UINT64_C(0x05D8A028D18D99DE), UINT64_C(0x5A664019B5D5BDD9),
+ UINT64_C(0x3DF172822AA12FE8), UINT64_C(0xB42E006FB889A28E) } },
+ { { UINT64_C(0xCF10E97DBC35CB1A), UINT64_C(0xC70A7BBD994DEDC5),
+ UINT64_C(0x76A5327C37D04FB9), UINT64_C(0x87539F76A76E0CDA),
+ UINT64_C(0xE9FE493FCD60A6B1), UINT64_C(0xA4574796132F01C0) },
+ { UINT64_C(0xC43B85EBDB70B167), UINT64_C(0x81D5039A98551DFA),
+ UINT64_C(0x6B56FBE91D979FA4), UINT64_C(0x49714FD78615098F),
+ UINT64_C(0xB10E1CEA94DECAB5), UINT64_C(0x8342EBA3480EF6E3) } },
+ { { UINT64_C(0xE1E030B0B3677288), UINT64_C(0x2978174C8D5CE3AF),
+ UINT64_C(0xAFC0271CF7B2DE98), UINT64_C(0x745BC6F3B99C20B5),
+ UINT64_C(0x9F6EDCED1E3BB4E5), UINT64_C(0x58D3EE4E73C8C1FC) },
+ { UINT64_C(0x1F3535F47FD30124), UINT64_C(0xF366AC705FA62502),
+ UINT64_C(0x4C4C1FDD965363FE), UINT64_C(0x8B2C77771DE2CA2B),
+ UINT64_C(0x0CB54743882F1173), UINT64_C(0x94B6B8C071343331) } },
+ { { UINT64_C(0x75AF014165B8B35B), UINT64_C(0x6D7B84854670A1F5),
+ UINT64_C(0x6EAA3A47A3B6D376), UINT64_C(0xD7E673D2CB3E5B66),
+ UINT64_C(0xC0338E6C9589AB38), UINT64_C(0x4BE26CB309440FAA) },
+ { UINT64_C(0x82CB05E7394F9AA3), UINT64_C(0xC45C8A8A7F7792EA),
+ UINT64_C(0x37E5E33BB687DC70), UINT64_C(0x63853219DFE48E49),
+ UINT64_C(0x087951C16D0E5C8C), UINT64_C(0x7696A8C72BC27310) } },
+ { { UINT64_C(0xA05736D5B67E834A), UINT64_C(0xDD2AA0F29098D42A),
+ UINT64_C(0x09F0C1D849C69DDC), UINT64_C(0x81F8BC1C8FF0F0F3),
+ UINT64_C(0x36FD3A4F03037775), UINT64_C(0x8286717D4B06DF5C) },
+ { UINT64_C(0xB878F496A9079EA2), UINT64_C(0xA5642426D7DC796D),
+ UINT64_C(0x29B9351A67FDAC2B), UINT64_C(0x93774C0E1D543CDE),
+ UINT64_C(0x4F8793BA1A8E31C4), UINT64_C(0x7C9F3F3A6C94798A) } },
+ { { UINT64_C(0x23C5AD11CB8ECDB8), UINT64_C(0x1E88D25E485A6A02),
+ UINT64_C(0xB27CBE84F1E268AE), UINT64_C(0xDDA80238F4CD0475),
+ UINT64_C(0x4F88857B49F8EB1B), UINT64_C(0x91B1221F52FB07F9) },
+ { UINT64_C(0x7CE974608637FA67), UINT64_C(0x528B3CF4632198D8),
+ UINT64_C(0x33365AB3F6623769), UINT64_C(0x6FEBCFFF3A83A30F),
+ UINT64_C(0x398F4C999BD341EB), UINT64_C(0x180712BBB33A333C) } },
+ { { UINT64_C(0x2B8655A2D93429E7), UINT64_C(0x99D600BB75C8B9EE),
+ UINT64_C(0x9FC1AF8B88FCA6CD), UINT64_C(0x2FB533867C311F80),
+ UINT64_C(0x20743ECBE8A71EEE), UINT64_C(0xEC3713C4E848B49E) },
+ { UINT64_C(0x5B2037B5BB886817), UINT64_C(0x40EF5AC2307DBAF4),
+ UINT64_C(0xC2888AF21B3F643D), UINT64_C(0x0D8252E19D5A4190),
+ UINT64_C(0x06CC0BEC2DB52A8A), UINT64_C(0xB84B98EAAB94E969) } },
+ { { UINT64_C(0x2E7AC078A0321E0E), UINT64_C(0x5C5A1168EF3DAAB6),
+ UINT64_C(0xD2D573CBADDD454A), UINT64_C(0x27E149E236259CC7),
+ UINT64_C(0x1EDFD469A63F47F1), UINT64_C(0x039AD674F1BD2CFD) },
+ { UINT64_C(0xBFA633FC3077D3CC), UINT64_C(0x14A7C82F2FD64E9F),
+ UINT64_C(0xAAA650149D824999), UINT64_C(0x41AB113B21760F2E),
+ UINT64_C(0x23E646C51CAE260A), UINT64_C(0x08062C8F68DC5159) } },
+ },
+ {
+ { { UINT64_C(0x2E7D0A16204BE028), UINT64_C(0x4F1D082ED0E41851),
+ UINT64_C(0x15F1DDC63EB317F9), UINT64_C(0xF02750715ADF71D7),
+ UINT64_C(0x2CE33C2EEE858BC3), UINT64_C(0xA24C76D1DA73B71A) },
+ { UINT64_C(0x9EF6A70A6C70C483), UINT64_C(0xEFCF170505CF9612),
+ UINT64_C(0x9F5BF5A67502DE64), UINT64_C(0xD11122A1A4701973),
+ UINT64_C(0x82CFAAC2A2EA7B24), UINT64_C(0x6CAD67CC0A4582E1) } },
+ { { UINT64_C(0x597A26FFB4DC8600), UINT64_C(0x264A09F3F9288555),
+ UINT64_C(0x0B06AFF65C27F5F6), UINT64_C(0xCE5AB665D8D544E6),
+ UINT64_C(0x92F031BE99275C32), UINT64_C(0xAF51C5BBF42E0E7C) },
+ { UINT64_C(0x5BB28B061E37B36D), UINT64_C(0x583FBA6A8473543A),
+ UINT64_C(0xE73FD299F93FB7DC), UINT64_C(0xFCD999A86E2CCAD9),
+ UINT64_C(0xB8C8A6DF334D4F57), UINT64_C(0x5ADB28DD9A2ACC9B) } },
+ { { UINT64_C(0x5ADF3D9A111792B9), UINT64_C(0x1C77A3054F1E0D09),
+ UINT64_C(0xF9FBCE33A82D3736), UINT64_C(0xF307823E718C8AA3),
+ UINT64_C(0x860578CF416CCF69), UINT64_C(0xB942ADD81EF8465B) },
+ { UINT64_C(0x9EE0CF97CD9472E1), UINT64_C(0xE6792EEFB01528A8),
+ UINT64_C(0xF99B9A8DC09DA90B), UINT64_C(0x1F521C2DCBF3CCB8),
+ UINT64_C(0x6BF6694891A62632), UINT64_C(0xCC7A9CEB854FE9DA) } },
+ { { UINT64_C(0x46303171491CCB92), UINT64_C(0xA80A8C0D2771235B),
+ UINT64_C(0xD8E497FFF172C7CF), UINT64_C(0x7F7009D735B193CF),
+ UINT64_C(0x6B9FD3F7F19DF4BC), UINT64_C(0xADA548C3B46F1E37) },
+ { UINT64_C(0x87C6EAA9C7A20270), UINT64_C(0xEF2245D6AE78EF99),
+ UINT64_C(0x2A121042539EAB95), UINT64_C(0x29A6D5D779B8F5CC),
+ UINT64_C(0x33803A10B77840DC), UINT64_C(0xFEDD3A7011A6A30F) } },
+ { { UINT64_C(0xFA070E22142403D1), UINT64_C(0x68FF316015C6F7F5),
+ UINT64_C(0xE09F04E6223A0CE8), UINT64_C(0x22BBD01853E14183),
+ UINT64_C(0x35D9FAFCCF45B75B), UINT64_C(0x3A34819D7ECEEC88) },
+ { UINT64_C(0xD9CF7568D33262D2), UINT64_C(0x431036D5841D1505),
+ UINT64_C(0x0C8005659EB2A79A), UINT64_C(0x8E77D9F05F7EDC6A),
+ UINT64_C(0x19E12D0565E800AA), UINT64_C(0x335C8D36B7784E7C) } },
+ { { UINT64_C(0x8B2FC4E96484FD40), UINT64_C(0xEE702764A35D24EA),
+ UINT64_C(0x15B28AC7B871C3F3), UINT64_C(0x805B4048E097047F),
+ UINT64_C(0xD6F1B8DF647CAD2F), UINT64_C(0xF1D5B458DC7DD67F) },
+ { UINT64_C(0x324C529C25148803), UINT64_C(0xF6185EBE21274FAF),
+ UINT64_C(0xAF14751E95148B55), UINT64_C(0x283ED89D28F284F4),
+ UINT64_C(0x93AD20E74CBEBF1A), UINT64_C(0x5F6EC65D882935E1) } },
+ { { UINT64_C(0xE222EBA4A4DCEFE9), UINT64_C(0x63AD235FEC1CEB74),
+ UINT64_C(0x2E0BF749E05B18E7), UINT64_C(0x547BD050B48BDD87),
+ UINT64_C(0x0490C970F5AA2FC4), UINT64_C(0xCED5E4CF2B431390) },
+ { UINT64_C(0x07D8270451D2898E), UINT64_C(0x44B72442083B57D4),
+ UINT64_C(0xA4ADA2305037FCE8), UINT64_C(0x55F7905E50510DA6),
+ UINT64_C(0xD8EE724F8D890A98), UINT64_C(0x925A8E7C11B85640) } },
+ { { UINT64_C(0x5BFA10CD1CA459ED), UINT64_C(0x593F085A6DCF56BF),
+ UINT64_C(0xE6F0AD9BC0579C3E), UINT64_C(0xC11C95A22527C1AD),
+ UINT64_C(0x7CFA71E1CF1CB8B3), UINT64_C(0xEDCFF8331D6DC79D) },
+ { UINT64_C(0x581C4BBE432521C9), UINT64_C(0xBF620096144E11A0),
+ UINT64_C(0x54C38B71BE3A107B), UINT64_C(0xED555E37E2606EC0),
+ UINT64_C(0x3FB148B8D721D034), UINT64_C(0x79D53DAD0091BC90) } },
+ { { UINT64_C(0xE32068C5B7082C80), UINT64_C(0x4140FFD27A144E22),
+ UINT64_C(0x5811D2F09EDD9E86), UINT64_C(0xCDD79B5FC572C465),
+ UINT64_C(0x3563FED1C97BF450), UINT64_C(0x985C1444F2CE5C9C) },
+ { UINT64_C(0x260AE79799950F1C), UINT64_C(0x659F4F40765E9DED),
+ UINT64_C(0x2A412D662E3BC286), UINT64_C(0xE865E62CF87E0C82),
+ UINT64_C(0xD63D3A9A6C05E7D7), UINT64_C(0x96725D678686F89A) } },
+ { { UINT64_C(0xC99A5E4CAB7EA0F5), UINT64_C(0xC9860A1AC5393FA9),
+ UINT64_C(0x9ED83CEE8FDEEFC0), UINT64_C(0xE3EA8B4C5ED6869A),
+ UINT64_C(0x89A85463D2EED3A9), UINT64_C(0x2CD91B6DE421A622) },
+ { UINT64_C(0x6FEC1EF32C91C41D), UINT64_C(0xB1540D1F8171037D),
+ UINT64_C(0x4FE4991A1C010E5B), UINT64_C(0x28A3469FFC1C7368),
+ UINT64_C(0xE1EEECD1AF118781), UINT64_C(0x1BCCB97799EF3531) } },
+ { { UINT64_C(0x63D3B638C4DAB7B8), UINT64_C(0xD92133B63F7F5BAB),
+ UINT64_C(0x2573EE2009FB6069), UINT64_C(0x771FABDF890A1686),
+ UINT64_C(0x1D0BA21FA77AFFF5), UINT64_C(0x83145FCCBA3DD2C0) },
+ { UINT64_C(0xFA073A812D115C20), UINT64_C(0x6AB7A9D319176F27),
+ UINT64_C(0xAF62CF939AC639EE), UINT64_C(0xF73848B92CCD1319),
+ UINT64_C(0x3B6132343C71659D), UINT64_C(0xF8E0011C10AB3826) } },
+ { { UINT64_C(0x0501F0360282FFA5), UINT64_C(0xC39A5CF4D9E0F15A),
+ UINT64_C(0x48D8C7299A3D1F3C), UINT64_C(0xB5FC136B64E18EDA),
+ UINT64_C(0xE81B53D97E58FEF0), UINT64_C(0x0D534055F7B0F28D) },
+ { UINT64_C(0x47B8DE127A80619B), UINT64_C(0x60E2A2B381F9E55D),
+ UINT64_C(0x6E9624D7CF564CC5), UINT64_C(0xFDF18A216BDEDFFF),
+ UINT64_C(0x3787DE38C0D5FC82), UINT64_C(0xCBCAA347497A6B11) } },
+ { { UINT64_C(0x6E7EF35EB226465A), UINT64_C(0x4B4699195F8A2BAF),
+ UINT64_C(0x44B3A3CF1120D93F), UINT64_C(0xB052C8B668F34AD1),
+ UINT64_C(0x27EC574BEF7632DD), UINT64_C(0xAEBEA108685DE26F) },
+ { UINT64_C(0xDA33236BE39424B6), UINT64_C(0xB1BD94A9EBCC22AD),
+ UINT64_C(0x6DDEE6CC2CDFB5D5), UINT64_C(0xBDAED9276F14069A),
+ UINT64_C(0x2ADE427C2A247CB7), UINT64_C(0xCE96B436ED156A40) } },
+ { { UINT64_C(0xDDDCA36081F3F819), UINT64_C(0x4AF4A49FD419B96A),
+ UINT64_C(0x746C65257CB966B9), UINT64_C(0x01E390886F610023),
+ UINT64_C(0x05ECB38D98DD33FC), UINT64_C(0x962B971B8F84EDF4) },
+ { UINT64_C(0xEB32C0A56A6F2602), UINT64_C(0xF026AF71562D60F2),
+ UINT64_C(0xA9E246BF84615FAB), UINT64_C(0xAD96709275DBAE01),
+ UINT64_C(0xBF97C79B3ECE5D07), UINT64_C(0xE06266C774EAA3D3) } },
+ { { UINT64_C(0x161A01572E6DBB6E), UINT64_C(0xB8AF490460FA8F47),
+ UINT64_C(0xE4336C4400197F22), UINT64_C(0xF811AFFA9CEDCE0E),
+ UINT64_C(0xB1DD7685F94C2EF1), UINT64_C(0xEEDC0F4BCA957BB0) },
+ { UINT64_C(0xD319FD574AA76BB1), UINT64_C(0xB3525D7C16CD7CCB),
+ UINT64_C(0x7B22DA9CA97DD072), UINT64_C(0x99DB84BD38A83E71),
+ UINT64_C(0x4939BC8DC0EDD8BE), UINT64_C(0x06D524EA903A932C) } },
+ { { UINT64_C(0x4BC950EC0E31F639), UINT64_C(0xB7ABD3DC6016BE30),
+ UINT64_C(0x3B0F44736703DAD0), UINT64_C(0xCC405F8B0AC1C4EA),
+ UINT64_C(0x9BED5E57176C3FEE), UINT64_C(0xF452481036AE36C2) },
+ { UINT64_C(0xC1EDBB8315D7B503), UINT64_C(0x943B1156E30F3657),
+ UINT64_C(0x984E9EEF98377805), UINT64_C(0x291AE7AC36CF1DEB),
+ UINT64_C(0xFED8748CA9F66DF3), UINT64_C(0xECA758BBFEA8FA5D) } },
+ },
+ {
+ { { UINT64_C(0xACC787EF2DD1B249), UINT64_C(0x736E1030D82976F1),
+ UINT64_C(0x0A6940FAA01B3649), UINT64_C(0xE00B926BC42341E7),
+ UINT64_C(0x911508D0DE8FFD6C), UINT64_C(0x4DCF8D465276B0CB) },
+ { UINT64_C(0x23AD0A90CC3CAD8D), UINT64_C(0x2A92E54CADED962A),
+ UINT64_C(0x93FBEC4DF231BFAF), UINT64_C(0x9544BC774798987A),
+ UINT64_C(0x48084E2508E29F60), UINT64_C(0x0C0D2F4332DE5869) } },
+ { { UINT64_C(0x6778F9703A9ABC13), UINT64_C(0xFD014FAC3D2B166B),
+ UINT64_C(0x1FE4FC783C6FED60), UINT64_C(0x04295FA8AA7C69C5),
+ UINT64_C(0xA01DE56D7C123175), UINT64_C(0x0FA0D3A83D9A713A) },
+ { UINT64_C(0xA7A6E5E3E3E08ADD), UINT64_C(0xBD77E94B1AC58F85),
+ UINT64_C(0x078F6FD2B7321A9C), UINT64_C(0x9564601E911EF6D9),
+ UINT64_C(0x31C5C1B2415C6BEF), UINT64_C(0xE6C0C91ED3212C62) } },
+ { { UINT64_C(0xBA7BD23C0D16022F), UINT64_C(0xE9CF4750198BE288),
+ UINT64_C(0x304E316947DEEC65), UINT64_C(0xCF65B41F96EEB288),
+ UINT64_C(0x17E99C17927E9E3B), UINT64_C(0x82225546F6630A80) },
+ { UINT64_C(0x15122B8ACA067BD9), UINT64_C(0xE2673205B77B4E98),
+ UINT64_C(0x130375659407CA63), UINT64_C(0x53624F548B621602),
+ UINT64_C(0x96AF2CB1EAE4BD06), UINT64_C(0x576ECD1C8FA20829) } },
+ { { UINT64_C(0xA551CE107E02D2D0), UINT64_C(0x1584ED249D13DBC7),
+ UINT64_C(0x082017AD4DA7B6D8), UINT64_C(0x81918A8FE054BC48),
+ UINT64_C(0x677DB48E572DC384), UINT64_C(0x2EF822966155484C) },
+ { UINT64_C(0xC3DB14C641B9C231), UINT64_C(0x910A87D14A766192),
+ UINT64_C(0x93D5CC8610AB8E0F), UINT64_C(0x4194D548AE57CA1B),
+ UINT64_C(0xFAF3A1D6267FC37A), UINT64_C(0x70EC236413B87C97) } },
+ { { UINT64_C(0x064B565B5E12756A), UINT64_C(0x953B7BD1AE49C98E),
+ UINT64_C(0xE0CE8284F7001D91), UINT64_C(0x1546060BF31108D0),
+ UINT64_C(0xDBC2C3F46779B6E2), UINT64_C(0x157AA47DE0DD07CF) },
+ { UINT64_C(0xBF4A1C6FF23B261E), UINT64_C(0x5B8EED30654F4BE5),
+ UINT64_C(0xDF5896D36B20CCD8), UINT64_C(0x56920E2C559ED23D),
+ UINT64_C(0x901F342EFA6E3E27), UINT64_C(0x745C747C896CA082) } },
+ { { UINT64_C(0xDBCCD5752944EC84), UINT64_C(0x54A2A935A5FF65FE),
+ UINT64_C(0x88C92A5E1A1319B6), UINT64_C(0x9537C28F82DA96C1),
+ UINT64_C(0xB683647435F93C46), UINT64_C(0xEC526A1D65B0846C) },
+ { UINT64_C(0x6F12AFBDF382C412), UINT64_C(0x5EBC81D89E99FA06),
+ UINT64_C(0x97B5D672869B93BD), UINT64_C(0x2983C310377E12AA),
+ UINT64_C(0x4875968124D681EA), UINT64_C(0x1E0BD106287FD767) } },
+ { { UINT64_C(0x0AC75A3E7231247F), UINT64_C(0x65C20DE6EF27AD3A),
+ UINT64_C(0x87EB6CF1BD02EEE5), UINT64_C(0x264ACA7A00147E03),
+ UINT64_C(0xEBC78581AE2A9437), UINT64_C(0x9929964E6316BFA5) },
+ { UINT64_C(0xDC09E0409AF207EF), UINT64_C(0x3ECFFE2D0C9D8658),
+ UINT64_C(0x547EA735DFB43D38), UINT64_C(0x5485247BD04B1B20),
+ UINT64_C(0xB18D3F02BFD8B609), UINT64_C(0xEEB3E805CCE73705) } },
+ { { UINT64_C(0xDAB1A525DB93850F), UINT64_C(0x18ADAA238365B7D5),
+ UINT64_C(0x58485C90113FC8C7), UINT64_C(0x80C3DBB9348AD323),
+ UINT64_C(0xAF892FB5E16ADCA1), UINT64_C(0x2183C879979F005A) },
+ { UINT64_C(0x20FA1A940643A99E), UINT64_C(0x2741221C1A1609CB),
+ UINT64_C(0x1C1687E53C2FBDDC), UINT64_C(0xDCCF329ED420D6CF),
+ UINT64_C(0x75D5577D2B7197D1), UINT64_C(0x4C3C3875C8729D9C) } },
+ { { UINT64_C(0x5E79F995E5CBDCB9), UINT64_C(0x03139824A742FCC7),
+ UINT64_C(0x6D0C214A239EF4A1), UINT64_C(0x53A27952401A2944),
+ UINT64_C(0xF42A1B34C10BCDF0), UINT64_C(0x426BAA437CF38061) },
+ { UINT64_C(0x16A53139A96AD0C8), UINT64_C(0x627F1D316BAD5301),
+ UINT64_C(0x5AF748774ACCD627), UINT64_C(0x3C58A1C5B55B0FB8),
+ UINT64_C(0xFAA57B91F4399A6A), UINT64_C(0xBAD283FBC28094B8) } },
+ { { UINT64_C(0xBA32AC6183E10A93), UINT64_C(0x1C91F6B4EC06BDB0),
+ UINT64_C(0x42E6CFBC65F60C93), UINT64_C(0xEFE33BC82C0CDCBE),
+ UINT64_C(0xE0FE1D094D6414F2), UINT64_C(0x4C11231676FA5C5B) },
+ { UINT64_C(0x812C1DC62E26200A), UINT64_C(0xD6C413C5EE879D25),
+ UINT64_C(0xBEADE255BCA8BAFE), UINT64_C(0x0EAF4AE2CE2BA0E7),
+ UINT64_C(0x66E9FFB0C4F4408A), UINT64_C(0xB36A86D79782C7AD) } },
+ { { UINT64_C(0x10FCD1F4BAD8D1C7), UINT64_C(0xC903816A4502F645),
+ UINT64_C(0x7FAC1CC1A503B895), UINT64_C(0x8BCD60410778900C),
+ UINT64_C(0x5A5F22025BCF2784), UINT64_C(0x9B157E8710EDB896) },
+ { UINT64_C(0x4C58DA69F602A8B1), UINT64_C(0xD55132F859EC9D7E),
+ UINT64_C(0x155B719AA26D4870), UINT64_C(0x25AAFCA336441746),
+ UINT64_C(0x01F83338DD3B6B30), UINT64_C(0xD52BB5C1551917CC) } },
+ { { UINT64_C(0xA0B6207B6135066A), UINT64_C(0xB3409F842AEC8CBD),
+ UINT64_C(0x5EBFD43619D87DF0), UINT64_C(0xCB4C209BE8526DE2),
+ UINT64_C(0xD764085B21E1A230), UINT64_C(0x96F915540899964A) },
+ { UINT64_C(0xB0BEC8EFA57D122A), UINT64_C(0xC572EC565D9D0B33),
+ UINT64_C(0xEBE2A780CFA7C72C), UINT64_C(0x52D40CDB9EF3295C),
+ UINT64_C(0x640045840DE74DFE), UINT64_C(0xA6846432C0809716) } },
+ { { UINT64_C(0x0D09E8CD02C979BC), UINT64_C(0xEC4B21F6409F4F2A),
+ UINT64_C(0x68125C7013FB07CA), UINT64_C(0x1C4CFC176FDFA72A),
+ UINT64_C(0xC9E71B9E04539FCD), UINT64_C(0x94B7103D8BA70797) },
+ { UINT64_C(0x6B81E82FB33FDE83), UINT64_C(0x7CA9A8CAEABAFD4B),
+ UINT64_C(0xADD85A67EAB819CE), UINT64_C(0xAEC2548398E99FFC),
+ UINT64_C(0x938D6440274A07B6), UINT64_C(0x0A5C7097564A6AA0) } },
+ { { UINT64_C(0x7284FF502F4FCEB6), UINT64_C(0x0A28715A78D0D5CB),
+ UINT64_C(0xE70B7014BFCE187C), UINT64_C(0xA6B538F57A17148D),
+ UINT64_C(0x1DAB07C9DD427166), UINT64_C(0x5C5578B0149D23CA) },
+ { UINT64_C(0x875E2056875B5EDE), UINT64_C(0xCBF44B6D02C893B9),
+ UINT64_C(0x5715A77E5C2993FB), UINT64_C(0xAF3281463410597E),
+ UINT64_C(0x65DF418F42DC49DF), UINT64_C(0x7AC9C720A9EE52F6) } },
+ { { UINT64_C(0xB1C9AA0762955486), UINT64_C(0xCBF35BE3245061D7),
+ UINT64_C(0x811E1BD38CF4DDC0), UINT64_C(0xD9D4589C948F7C84),
+ UINT64_C(0x30D09A0FCB0F996D), UINT64_C(0x1A1B3B7A590E7704) },
+ { UINT64_C(0xA848E3492082768D), UINT64_C(0x9FEBD4929A249DF4),
+ UINT64_C(0x503420AF5F20439A), UINT64_C(0x0CBE52B68E2BFCD4),
+ UINT64_C(0xB1D5E261118C91B2), UINT64_C(0x93CFF6DA71D8F2BC) } },
+ { { UINT64_C(0x5F5BC06B8AB58944), UINT64_C(0xE4BED5384979882D),
+ UINT64_C(0x57C30362D79B0EB1), UINT64_C(0x391AE2C1EF7C56D8),
+ UINT64_C(0x28BC2E97ADD98625), UINT64_C(0xFA8E86B81B257107) },
+ { UINT64_C(0x5E4859F86118C715), UINT64_C(0x91C83324524C71DD),
+ UINT64_C(0xFB2092436D2F5E6D), UINT64_C(0x6B4FE21F2A900A43),
+ UINT64_C(0x241F75D632A73C1F), UINT64_C(0xF5BC46295AE89613) } },
+ }
+};
+
+/*-
+ * Finite field inversion.
+ * Computed with Bernstein-Yang algorithm.
+ * https://tches.iacr.org/index.php/TCHES/article/view/8298
+ * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp384r1_inv(fe_t output, const fe_t t1)
+{
+ int i;
+ fe_t v1, r1, v2;
+ limb_t *r2 = output;
+ limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1],
+ g2[LIMB_CNT + 1];
+ limb_t d2, d1 = 1;
+
+ fe_copy(g1, t1);
+ g1[LIMB_CNT] = 0;
+ fe_copy(f1, const_psat);
+ f1[LIMB_CNT] = 0;
+ fe_copy(r1, const_one);
+ fe_set_zero(v1);
+
+ /* 1110 divstep iterations */
+ for (i = 0; i < 555; i++) {
+ fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1);
+ fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2);
+ }
+
+ fiat_secp384r1_opp(output, v1);
+ fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1,
+ output);
+ fiat_secp384r1_mul(output, output, const_divstep);
+}
+
+/*-
+ * Q := 2P, both projective, Q and P same pointers OK
+ * Autogenerated: op3/dbl_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 6
+ * ASSERT: a = -3
+ */
+static void
+point_double(pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X = P->X;
+ const limb_t *Y = P->Y;
+ const limb_t *Z = P->Z;
+ limb_t *X3 = Q->X;
+ limb_t *Y3 = Q->Y;
+ limb_t *Z3 = Q->Z;
+
+ /* the curve arith formula */
+ fiat_secp384r1_square(t0, X);
+ fiat_secp384r1_square(t1, Y);
+ fiat_secp384r1_square(t2, Z);
+ fiat_secp384r1_mul(t3, X, Y);
+ fiat_secp384r1_add(t3, t3, t3);
+ fiat_secp384r1_mul(t4, Y, Z);
+ fiat_secp384r1_mul(Z3, X, Z);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+ fiat_secp384r1_mul(Y3, b, t2);
+ fiat_secp384r1_sub(Y3, Y3, Z3);
+ fiat_secp384r1_add(X3, Y3, Y3);
+ fiat_secp384r1_add(Y3, X3, Y3);
+ fiat_secp384r1_sub(X3, t1, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_mul(Y3, X3, Y3);
+ fiat_secp384r1_mul(X3, X3, t3);
+ fiat_secp384r1_add(t3, t2, t2);
+ fiat_secp384r1_add(t2, t2, t3);
+ fiat_secp384r1_mul(Z3, b, Z3);
+ fiat_secp384r1_sub(Z3, Z3, t2);
+ fiat_secp384r1_sub(Z3, Z3, t0);
+ fiat_secp384r1_add(t3, Z3, Z3);
+ fiat_secp384r1_add(Z3, Z3, t3);
+ fiat_secp384r1_add(t3, t0, t0);
+ fiat_secp384r1_add(t0, t3, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t0, t0, Z3);
+ fiat_secp384r1_add(Y3, Y3, t0);
+ fiat_secp384r1_add(t0, t4, t4);
+ fiat_secp384r1_mul(Z3, t0, Z3);
+ fiat_secp384r1_sub(X3, X3, Z3);
+ fiat_secp384r1_mul(Z3, t0, t1);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+}
+
+/*-
+ * R := Q + P where R and Q are projective, P affine.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_mixed.op3
+ * https://eprint.iacr.org/2015/1060 Alg 5
+ * ASSERT: a = -3
+ */
+static void
+point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ fe_t X3;
+ fe_t Y3;
+ fe_t Z3;
+ limb_t nz;
+
+ /* check P for affine inf */
+ fiat_secp384r1_nonzero(&nz, P->Y);
+
+ /* the curve arith formula */
+ fiat_secp384r1_mul(t0, X1, X2);
+ fiat_secp384r1_mul(t1, Y1, Y2);
+ fiat_secp384r1_add(t3, X2, Y2);
+ fiat_secp384r1_add(t4, X1, Y1);
+ fiat_secp384r1_mul(t3, t3, t4);
+ fiat_secp384r1_add(t4, t0, t1);
+ fiat_secp384r1_sub(t3, t3, t4);
+ fiat_secp384r1_mul(t4, Y2, Z1);
+ fiat_secp384r1_add(t4, t4, Y1);
+ fiat_secp384r1_mul(Y3, X2, Z1);
+ fiat_secp384r1_add(Y3, Y3, X1);
+ fiat_secp384r1_mul(Z3, b, Z1);
+ fiat_secp384r1_sub(X3, Y3, Z3);
+ fiat_secp384r1_add(Z3, X3, X3);
+ fiat_secp384r1_add(X3, X3, Z3);
+ fiat_secp384r1_sub(Z3, t1, X3);
+ fiat_secp384r1_add(X3, t1, X3);
+ fiat_secp384r1_mul(Y3, b, Y3);
+ fiat_secp384r1_add(t1, Z1, Z1);
+ fiat_secp384r1_add(t2, t1, Z1);
+ fiat_secp384r1_sub(Y3, Y3, t2);
+ fiat_secp384r1_sub(Y3, Y3, t0);
+ fiat_secp384r1_add(t1, Y3, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_add(t1, t0, t0);
+ fiat_secp384r1_add(t0, t1, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t1, t4, Y3);
+ fiat_secp384r1_mul(t2, t0, Y3);
+ fiat_secp384r1_mul(Y3, X3, Z3);
+ fiat_secp384r1_add(Y3, Y3, t2);
+ fiat_secp384r1_mul(X3, t3, X3);
+ fiat_secp384r1_sub(X3, X3, t1);
+ fiat_secp384r1_mul(Z3, t4, Z3);
+ fiat_secp384r1_mul(t1, t3, t0);
+ fiat_secp384r1_add(Z3, Z3, t1);
+
+ /* if P is inf, throw all that away and take Q */
+ fiat_secp384r1_selectznz(R->X, nz, Q->X, X3);
+ fiat_secp384r1_selectznz(R->Y, nz, Q->Y, Y3);
+ fiat_secp384r1_selectznz(R->Z, nz, Q->Z, Z3);
+}
+
+/*-
+ * R := Q + P all projective.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 4
+ * ASSERT: a = -3
+ */
+static void
+point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4, t5;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ const limb_t *Z2 = P->Z;
+ limb_t *X3 = R->X;
+ limb_t *Y3 = R->Y;
+ limb_t *Z3 = R->Z;
+
+ /* the curve arith formula */
+ fiat_secp384r1_mul(t0, X1, X2);
+ fiat_secp384r1_mul(t1, Y1, Y2);
+ fiat_secp384r1_mul(t2, Z1, Z2);
+ fiat_secp384r1_add(t3, X1, Y1);
+ fiat_secp384r1_add(t4, X2, Y2);
+ fiat_secp384r1_mul(t3, t3, t4);
+ fiat_secp384r1_add(t4, t0, t1);
+ fiat_secp384r1_sub(t3, t3, t4);
+ fiat_secp384r1_add(t4, Y1, Z1);
+ fiat_secp384r1_add(t5, Y2, Z2);
+ fiat_secp384r1_mul(t4, t4, t5);
+ fiat_secp384r1_add(t5, t1, t2);
+ fiat_secp384r1_sub(t4, t4, t5);
+ fiat_secp384r1_add(X3, X1, Z1);
+ fiat_secp384r1_add(Y3, X2, Z2);
+ fiat_secp384r1_mul(X3, X3, Y3);
+ fiat_secp384r1_add(Y3, t0, t2);
+ fiat_secp384r1_sub(Y3, X3, Y3);
+ fiat_secp384r1_mul(Z3, b, t2);
+ fiat_secp384r1_sub(X3, Y3, Z3);
+ fiat_secp384r1_add(Z3, X3, X3);
+ fiat_secp384r1_add(X3, X3, Z3);
+ fiat_secp384r1_sub(Z3, t1, X3);
+ fiat_secp384r1_add(X3, t1, X3);
+ fiat_secp384r1_mul(Y3, b, Y3);
+ fiat_secp384r1_add(t1, t2, t2);
+ fiat_secp384r1_add(t2, t1, t2);
+ fiat_secp384r1_sub(Y3, Y3, t2);
+ fiat_secp384r1_sub(Y3, Y3, t0);
+ fiat_secp384r1_add(t1, Y3, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_add(t1, t0, t0);
+ fiat_secp384r1_add(t0, t1, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t1, t4, Y3);
+ fiat_secp384r1_mul(t2, t0, Y3);
+ fiat_secp384r1_mul(Y3, X3, Z3);
+ fiat_secp384r1_add(Y3, Y3, t2);
+ fiat_secp384r1_mul(X3, t3, X3);
+ fiat_secp384r1_sub(X3, X3, t1);
+ fiat_secp384r1_mul(Z3, t4, Z3);
+ fiat_secp384r1_mul(t1, t3, t0);
+ fiat_secp384r1_add(Z3, Z3, t1);
+}
+
+/* constants */
+#define RADIX 5
+#define DRADIX (1 << RADIX)
+#define DRADIX_WNAF ((DRADIX) << 1)
+
+/*-
+ * precomp for wnaf scalar multiplication:
+ * precomp[0] = 1P
+ * precomp[1] = 3P
+ * precomp[2] = 5P
+ * precomp[3] = 7P
+ * precomp[4] = 9P
+ * ...
+ */
+static void
+precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P)
+{
+ int i;
+
+ fe_copy(precomp[0].X, P->X);
+ fe_copy(precomp[0].Y, P->Y);
+ fe_copy(precomp[0].Z, const_one);
+ point_double(&precomp[DRADIX / 2 - 1], &precomp[0]);
+
+ for (i = 1; i < DRADIX / 2; i++)
+ point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]);
+}
+
+/* fetch a scalar bit */
+static int
+scalar_get_bit(const unsigned char in[48], int idx)
+{
+ int widx, rshift;
+
+ widx = idx >> 3;
+ rshift = idx & 0x7;
+
+ if (idx < 0 || widx >= 48)
+ return 0;
+
+ return (in[widx] >> rshift) & 0x1;
+}
+
+/*-
+ * Compute "regular" wnaf representation of a scalar.
+ * See "Exponent Recoding and Regular Exponentiation Algorithms",
+ * Tunstall et al., AfricaCrypt 2009, Alg 6.
+ * It forces an odd scalar and outputs digits in
+ * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...}
+ * i.e. signed odd digits with _no zeroes_ -- that makes it "regular".
+ */
+static void
+scalar_rwnaf(int8_t out[77], const unsigned char in[48])
+{
+ int i;
+ int8_t window, d;
+
+ window = (in[0] & (DRADIX_WNAF - 1)) | 1;
+ for (i = 0; i < 76; i++) {
+ d = (window & (DRADIX_WNAF - 1)) - DRADIX;
+ out[i] = d;
+ window = (window - d) >> RADIX;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5;
+ }
+ out[i] = window;
+}
+
+/*-
+ * Compute "textbook" wnaf representation of a scalar.
+ * NB: not constant time
+ */
+static void
+scalar_wnaf(int8_t out[385], const unsigned char in[48])
+{
+ int i;
+ int8_t window, d;
+
+ window = in[0] & (DRADIX_WNAF - 1);
+ for (i = 0; i < 385; i++) {
+ d = 0;
+ if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX))
+ d -= DRADIX_WNAF;
+ out[i] = d;
+ window = (window - d) >> 1;
+ window += scalar_get_bit(in, i + 1 + RADIX) << RADIX;
+ }
+}
+
+/*-
+ * Simultaneous scalar multiplication: interleaved "textbook" wnaf.
+ * NB: not constant time
+ */
+static void
+var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48],
+ const unsigned char b[48], const pt_aff_t *P)
+{
+ int i, d, is_neg, is_inf = 1, flipped = 0;
+ int8_t anaf[385] = { 0 };
+ int8_t bnaf[385] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_wnaf(anaf, a);
+ scalar_wnaf(bnaf, b);
+
+ for (i = 384; i >= 0; i--) {
+ if (!is_inf)
+ point_double(&Q, &Q);
+ if ((d = bnaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &precomp[d].X);
+ fe_copy(Q.Y, &precomp[d].Y);
+ fe_copy(Q.Z, &precomp[d].Z);
+ is_inf = 0;
+ } else
+ point_add_proj(&Q, &Q, &precomp[d]);
+ }
+ if ((d = anaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &lut_cmb[0][d].X);
+ fe_copy(Q.Y, &lut_cmb[0][d].Y);
+ fe_copy(Q.Z, const_one);
+ is_inf = 0;
+ } else
+ point_add_mixed(&Q, &Q, &lut_cmb[0][d]);
+ }
+ }
+
+ if (is_inf) {
+ /* initialize accumulator to inf: all-zero scalars */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+ }
+
+ if (flipped) {
+ /* correct sign */
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ }
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Variable point scalar multiplication with "regular" wnaf.
+ * Here "regular" means _no zeroes_, so the sequence of
+ * EC arithmetic ops is fixed.
+ */
+static void
+var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48],
+ const pt_aff_t *P)
+{
+ int i, j, d, diff, is_neg;
+ int8_t rnaf[77] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_rwnaf(rnaf, scalar);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ /* initialize accumulator to high digit */
+ d = (rnaf[76] - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(Q.X, diff, Q.X, precomp[j].X);
+ fiat_secp384r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y);
+ fiat_secp384r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z);
+ }
+
+ for (i = 75; i >= 0; i--) {
+ for (j = 0; j < RADIX; j++)
+ point_double(&Q, &Q);
+ d = rnaf[i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(lut.X, diff, lut.X, precomp[j].X);
+ fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y);
+ fiat_secp384r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp384r1_opp(out->Y, lut.Y);
+ fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_proj(&Q, &Q, &lut);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, precomp[0].X);
+ fiat_secp384r1_opp(lut.Y, precomp[0].Y);
+ fe_copy(lut.Z, precomp[0].Z);
+ point_add_proj(&lut, &lut, &Q);
+ fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X);
+ fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y);
+ fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Fixed scalar multiplication: comb with interleaving.
+ */
+static void
+fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48])
+{
+ int i, j, k, d, diff, is_neg = 0;
+ int8_t rnaf[77] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } };
+ pt_aff_t lut = { { 0 }, { 0 } };
+
+ scalar_rwnaf(rnaf, scalar);
+
+ /* initalize accumulator to inf */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ for (i = 3; i >= 0; i--) {
+ for (j = 0; i != 3 && j < RADIX; j++)
+ point_double(&Q, &Q);
+ for (j = 0; j < 21; j++) {
+ if (j * 4 + i > 76)
+ continue;
+ d = rnaf[j * 4 + i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (k = 0; k < DRADIX / 2; k++) {
+ diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X);
+ fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp384r1_opp(out->Y, lut.Y);
+ fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_mixed(&Q, &Q, &lut);
+ }
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, lut_cmb[0][0].X);
+ fiat_secp384r1_opp(lut.Y, lut_cmb[0][0].Y);
+ point_add_mixed(&R, &Q, &lut);
+ fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X);
+ fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y);
+ fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char a[48],
+ const unsigned char b[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48])
+{
+ pt_aff_t P;
+
+ fiat_secp384r1_from_bytes(P.X, inx);
+ fiat_secp384r1_from_bytes(P.Y, iny);
+ fiat_secp384r1_to_montgomery(P.X, P.X);
+ fiat_secp384r1_to_montgomery(P.Y, P.Y);
+ /* simultaneous scalar multiplication */
+ var_smul_wnaf_two(&P, a, b, &P);
+
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48])
+{
+ pt_aff_t P;
+
+ /* fixed scmul function */
+ fixed_smul_cmb(&P, scalar);
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48])
+{
+ pt_aff_t P;
+
+ fiat_secp384r1_from_bytes(P.X, inx);
+ fiat_secp384r1_from_bytes(P.Y, iny);
+ fiat_secp384r1_to_montgomery(P.X, P.X);
+ fiat_secp384r1_to_montgomery(P.Y, P.Y);
+ /* var scmul function */
+ var_smul_rwnaf(&P, scalar, &P);
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+#else /* __SIZEOF_INT128__ */
+
+#include "ecp_secp384r1.h"
+#include <stdint.h>
+#include <string.h>
+#define LIMB_BITS 32
+#define LIMB_CNT 12
+/* Field elements */
+typedef uint32_t fe_t[LIMB_CNT];
+typedef uint32_t limb_t;
+
+#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t))
+#define fe_set_zero(d) memset(d, 0, sizeof(fe_t))
+
+/* Projective points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+ fe_t Z;
+} pt_prj_t;
+
+/* Affine points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+} pt_aff_t;
+
+/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */
+/*-
+ * MIT License
+ *
+ * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file).
+ * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Autogenerated: word_by_word_montgomery --static --use-value-barrier secp384r1 32 '2^384 - 2^128 - 2^96 + 2^32 - 1' */
+/* curve description: secp384r1 */
+/* machine_wordsize = 32 (from "32") */
+/* requested operations: (all) */
+/* m = 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff (from "2^384 - 2^128 - 2^96 + 2^32 - 1") */
+/* */
+/* NOTE: In addition to the bounds specified above each function, all */
+/* functions synthesized for this Montgomery arithmetic require the */
+/* input to be strictly less than the prime modulus (m), and also */
+/* require the input to be in the unique saturated representation. */
+/* All functions also ensure that these two properties are true of */
+/* return values. */
+/* */
+/* Computed values: */
+/* eval z = z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) */
+/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) */
+/* twos_complement_eval z = let x1 := z[0] + (z[1] << 32) + (z[2] << 64) + (z[3] << 96) + (z[4] << 128) + (z[5] << 160) + (z[6] << 192) + (z[7] << 224) + (z[8] << 256) + (z[9] << 0x120) + (z[10] << 0x140) + (z[11] << 0x160) in */
+/* if x1 & (2^384-1) < 2^383 then x1 & (2^384-1) else (x1 & (2^384-1)) - 2^384 */
+
+#include <stdint.h>
+typedef unsigned char fiat_secp384r1_uint1;
+typedef signed char fiat_secp384r1_int1;
+#ifdef __GNUC__
+#define FIAT_SECP384R1_FIAT_INLINE __inline__
+#else
+#define FIAT_SECP384R1_FIAT_INLINE
+#endif
+
+/* The type fiat_secp384r1_montgomery_domain_field_element is a field element in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */
+typedef uint32_t fiat_secp384r1_montgomery_domain_field_element[12];
+
+/* The type fiat_secp384r1_non_montgomery_domain_field_element is a field element NOT in the Montgomery domain. */
+/* Bounds: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]] */
+typedef uint32_t fiat_secp384r1_non_montgomery_domain_field_element[12];
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+#if !defined(FIAT_SECP384R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint32_t
+fiat_secp384r1_value_barrier_u32(uint32_t a)
+{
+ __asm__(""
+ : "+r"(a)
+ : /* no inputs */);
+ return a;
+}
+#else
+#define fiat_secp384r1_value_barrier_u32(x) (x)
+#endif
+
+/*
+ * The function fiat_secp384r1_addcarryx_u32 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^32
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^32⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffff]
+ * arg3: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp384r1_addcarryx_u32(uint32_t *out1,
+ fiat_secp384r1_uint1 *out2,
+ fiat_secp384r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ uint64_t x1;
+ uint32_t x2;
+ fiat_secp384r1_uint1 x3;
+ x1 = ((arg1 + (uint64_t)arg2) + arg3);
+ x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+ x3 = (fiat_secp384r1_uint1)(x1 >> 32);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_subborrowx_u32 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^32
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^32⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffff]
+ * arg3: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp384r1_subborrowx_u32(uint32_t *out1,
+ fiat_secp384r1_uint1 *out2,
+ fiat_secp384r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ int64_t x1;
+ fiat_secp384r1_int1 x2;
+ uint32_t x3;
+ x1 = ((arg2 - (int64_t)arg1) - arg3);
+ x2 = (fiat_secp384r1_int1)(x1 >> 32);
+ x3 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp384r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp384r1_mulx_u32 is a multiplication, returning the full double-width result.
+ *
+ * Postconditions:
+ * out1 = (arg1 * arg2) mod 2^32
+ * out2 = ⌊arg1 * arg2 / 2^32⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0xffffffff]
+ * arg2: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ * out2: [0x0 ~> 0xffffffff]
+ */
+static void
+fiat_secp384r1_mulx_u32(uint32_t *out1, uint32_t *out2,
+ uint32_t arg1, uint32_t arg2)
+{
+ uint64_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ x1 = ((uint64_t)arg1 * arg2);
+ x2 = (uint32_t)(x1 & UINT32_C(0xffffffff));
+ x3 = (uint32_t)(x1 >> 32);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_cmovznz_u32 is a single-word conditional move.
+ *
+ * Postconditions:
+ * out1 = (if arg1 = 0 then arg2 else arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffff]
+ * arg3: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ */
+static void
+fiat_secp384r1_cmovznz_u32(uint32_t *out1,
+ fiat_secp384r1_uint1 arg1, uint32_t arg2,
+ uint32_t arg3)
+{
+ fiat_secp384r1_uint1 x1;
+ uint32_t x2;
+ uint32_t x3;
+ x1 = (!(!arg1));
+ x2 = ((fiat_secp384r1_int1)(0x0 - x1) & UINT32_C(0xffffffff));
+ x3 = ((fiat_secp384r1_value_barrier_u32(x2) & arg3) |
+ (fiat_secp384r1_value_barrier_u32((~x2)) & arg2));
+ *out1 = x3;
+}
+
+/*
+ * The function fiat_secp384r1_mul multiplies two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_mul(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ fiat_secp384r1_uint1 x38;
+ uint32_t x39;
+ fiat_secp384r1_uint1 x40;
+ uint32_t x41;
+ fiat_secp384r1_uint1 x42;
+ uint32_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint32_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint32_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint32_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint32_t x51;
+ fiat_secp384r1_uint1 x52;
+ uint32_t x53;
+ fiat_secp384r1_uint1 x54;
+ uint32_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint32_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ uint32_t x63;
+ uint32_t x64;
+ uint32_t x65;
+ uint32_t x66;
+ uint32_t x67;
+ uint32_t x68;
+ uint32_t x69;
+ uint32_t x70;
+ uint32_t x71;
+ uint32_t x72;
+ uint32_t x73;
+ uint32_t x74;
+ uint32_t x75;
+ uint32_t x76;
+ uint32_t x77;
+ uint32_t x78;
+ uint32_t x79;
+ uint32_t x80;
+ fiat_secp384r1_uint1 x81;
+ uint32_t x82;
+ fiat_secp384r1_uint1 x83;
+ uint32_t x84;
+ fiat_secp384r1_uint1 x85;
+ uint32_t x86;
+ fiat_secp384r1_uint1 x87;
+ uint32_t x88;
+ fiat_secp384r1_uint1 x89;
+ uint32_t x90;
+ fiat_secp384r1_uint1 x91;
+ uint32_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint32_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint32_t x96;
+ uint32_t x97;
+ fiat_secp384r1_uint1 x98;
+ uint32_t x99;
+ fiat_secp384r1_uint1 x100;
+ uint32_t x101;
+ fiat_secp384r1_uint1 x102;
+ uint32_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint32_t x105;
+ fiat_secp384r1_uint1 x106;
+ uint32_t x107;
+ fiat_secp384r1_uint1 x108;
+ uint32_t x109;
+ fiat_secp384r1_uint1 x110;
+ uint32_t x111;
+ fiat_secp384r1_uint1 x112;
+ uint32_t x113;
+ fiat_secp384r1_uint1 x114;
+ uint32_t x115;
+ fiat_secp384r1_uint1 x116;
+ uint32_t x117;
+ fiat_secp384r1_uint1 x118;
+ uint32_t x119;
+ fiat_secp384r1_uint1 x120;
+ uint32_t x121;
+ fiat_secp384r1_uint1 x122;
+ uint32_t x123;
+ uint32_t x124;
+ uint32_t x125;
+ uint32_t x126;
+ uint32_t x127;
+ uint32_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint32_t x133;
+ uint32_t x134;
+ uint32_t x135;
+ uint32_t x136;
+ uint32_t x137;
+ uint32_t x138;
+ uint32_t x139;
+ uint32_t x140;
+ uint32_t x141;
+ uint32_t x142;
+ uint32_t x143;
+ uint32_t x144;
+ uint32_t x145;
+ uint32_t x146;
+ uint32_t x147;
+ fiat_secp384r1_uint1 x148;
+ uint32_t x149;
+ fiat_secp384r1_uint1 x150;
+ uint32_t x151;
+ fiat_secp384r1_uint1 x152;
+ uint32_t x153;
+ fiat_secp384r1_uint1 x154;
+ uint32_t x155;
+ fiat_secp384r1_uint1 x156;
+ uint32_t x157;
+ fiat_secp384r1_uint1 x158;
+ uint32_t x159;
+ fiat_secp384r1_uint1 x160;
+ uint32_t x161;
+ fiat_secp384r1_uint1 x162;
+ uint32_t x163;
+ fiat_secp384r1_uint1 x164;
+ uint32_t x165;
+ fiat_secp384r1_uint1 x166;
+ uint32_t x167;
+ fiat_secp384r1_uint1 x168;
+ uint32_t x169;
+ uint32_t x170;
+ fiat_secp384r1_uint1 x171;
+ uint32_t x172;
+ fiat_secp384r1_uint1 x173;
+ uint32_t x174;
+ fiat_secp384r1_uint1 x175;
+ uint32_t x176;
+ fiat_secp384r1_uint1 x177;
+ uint32_t x178;
+ fiat_secp384r1_uint1 x179;
+ uint32_t x180;
+ fiat_secp384r1_uint1 x181;
+ uint32_t x182;
+ fiat_secp384r1_uint1 x183;
+ uint32_t x184;
+ fiat_secp384r1_uint1 x185;
+ uint32_t x186;
+ fiat_secp384r1_uint1 x187;
+ uint32_t x188;
+ fiat_secp384r1_uint1 x189;
+ uint32_t x190;
+ fiat_secp384r1_uint1 x191;
+ uint32_t x192;
+ fiat_secp384r1_uint1 x193;
+ uint32_t x194;
+ fiat_secp384r1_uint1 x195;
+ uint32_t x196;
+ uint32_t x197;
+ uint32_t x198;
+ uint32_t x199;
+ uint32_t x200;
+ uint32_t x201;
+ uint32_t x202;
+ uint32_t x203;
+ uint32_t x204;
+ uint32_t x205;
+ uint32_t x206;
+ uint32_t x207;
+ uint32_t x208;
+ uint32_t x209;
+ uint32_t x210;
+ uint32_t x211;
+ uint32_t x212;
+ uint32_t x213;
+ uint32_t x214;
+ uint32_t x215;
+ uint32_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint32_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint32_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint32_t x222;
+ fiat_secp384r1_uint1 x223;
+ uint32_t x224;
+ fiat_secp384r1_uint1 x225;
+ uint32_t x226;
+ fiat_secp384r1_uint1 x227;
+ uint32_t x228;
+ fiat_secp384r1_uint1 x229;
+ uint32_t x230;
+ fiat_secp384r1_uint1 x231;
+ uint32_t x232;
+ uint32_t x233;
+ fiat_secp384r1_uint1 x234;
+ uint32_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint32_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint32_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint32_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint32_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint32_t x245;
+ fiat_secp384r1_uint1 x246;
+ uint32_t x247;
+ fiat_secp384r1_uint1 x248;
+ uint32_t x249;
+ fiat_secp384r1_uint1 x250;
+ uint32_t x251;
+ fiat_secp384r1_uint1 x252;
+ uint32_t x253;
+ fiat_secp384r1_uint1 x254;
+ uint32_t x255;
+ fiat_secp384r1_uint1 x256;
+ uint32_t x257;
+ fiat_secp384r1_uint1 x258;
+ uint32_t x259;
+ uint32_t x260;
+ uint32_t x261;
+ uint32_t x262;
+ uint32_t x263;
+ uint32_t x264;
+ uint32_t x265;
+ uint32_t x266;
+ uint32_t x267;
+ uint32_t x268;
+ uint32_t x269;
+ uint32_t x270;
+ uint32_t x271;
+ uint32_t x272;
+ uint32_t x273;
+ uint32_t x274;
+ uint32_t x275;
+ uint32_t x276;
+ uint32_t x277;
+ uint32_t x278;
+ uint32_t x279;
+ uint32_t x280;
+ uint32_t x281;
+ uint32_t x282;
+ uint32_t x283;
+ uint32_t x284;
+ fiat_secp384r1_uint1 x285;
+ uint32_t x286;
+ fiat_secp384r1_uint1 x287;
+ uint32_t x288;
+ fiat_secp384r1_uint1 x289;
+ uint32_t x290;
+ fiat_secp384r1_uint1 x291;
+ uint32_t x292;
+ fiat_secp384r1_uint1 x293;
+ uint32_t x294;
+ fiat_secp384r1_uint1 x295;
+ uint32_t x296;
+ fiat_secp384r1_uint1 x297;
+ uint32_t x298;
+ fiat_secp384r1_uint1 x299;
+ uint32_t x300;
+ fiat_secp384r1_uint1 x301;
+ uint32_t x302;
+ fiat_secp384r1_uint1 x303;
+ uint32_t x304;
+ fiat_secp384r1_uint1 x305;
+ uint32_t x306;
+ uint32_t x307;
+ fiat_secp384r1_uint1 x308;
+ uint32_t x309;
+ fiat_secp384r1_uint1 x310;
+ uint32_t x311;
+ fiat_secp384r1_uint1 x312;
+ uint32_t x313;
+ fiat_secp384r1_uint1 x314;
+ uint32_t x315;
+ fiat_secp384r1_uint1 x316;
+ uint32_t x317;
+ fiat_secp384r1_uint1 x318;
+ uint32_t x319;
+ fiat_secp384r1_uint1 x320;
+ uint32_t x321;
+ fiat_secp384r1_uint1 x322;
+ uint32_t x323;
+ fiat_secp384r1_uint1 x324;
+ uint32_t x325;
+ fiat_secp384r1_uint1 x326;
+ uint32_t x327;
+ fiat_secp384r1_uint1 x328;
+ uint32_t x329;
+ fiat_secp384r1_uint1 x330;
+ uint32_t x331;
+ fiat_secp384r1_uint1 x332;
+ uint32_t x333;
+ uint32_t x334;
+ uint32_t x335;
+ uint32_t x336;
+ uint32_t x337;
+ uint32_t x338;
+ uint32_t x339;
+ uint32_t x340;
+ uint32_t x341;
+ uint32_t x342;
+ uint32_t x343;
+ uint32_t x344;
+ uint32_t x345;
+ uint32_t x346;
+ uint32_t x347;
+ uint32_t x348;
+ uint32_t x349;
+ uint32_t x350;
+ uint32_t x351;
+ uint32_t x352;
+ uint32_t x353;
+ fiat_secp384r1_uint1 x354;
+ uint32_t x355;
+ fiat_secp384r1_uint1 x356;
+ uint32_t x357;
+ fiat_secp384r1_uint1 x358;
+ uint32_t x359;
+ fiat_secp384r1_uint1 x360;
+ uint32_t x361;
+ fiat_secp384r1_uint1 x362;
+ uint32_t x363;
+ fiat_secp384r1_uint1 x364;
+ uint32_t x365;
+ fiat_secp384r1_uint1 x366;
+ uint32_t x367;
+ fiat_secp384r1_uint1 x368;
+ uint32_t x369;
+ uint32_t x370;
+ fiat_secp384r1_uint1 x371;
+ uint32_t x372;
+ fiat_secp384r1_uint1 x373;
+ uint32_t x374;
+ fiat_secp384r1_uint1 x375;
+ uint32_t x376;
+ fiat_secp384r1_uint1 x377;
+ uint32_t x378;
+ fiat_secp384r1_uint1 x379;
+ uint32_t x380;
+ fiat_secp384r1_uint1 x381;
+ uint32_t x382;
+ fiat_secp384r1_uint1 x383;
+ uint32_t x384;
+ fiat_secp384r1_uint1 x385;
+ uint32_t x386;
+ fiat_secp384r1_uint1 x387;
+ uint32_t x388;
+ fiat_secp384r1_uint1 x389;
+ uint32_t x390;
+ fiat_secp384r1_uint1 x391;
+ uint32_t x392;
+ fiat_secp384r1_uint1 x393;
+ uint32_t x394;
+ fiat_secp384r1_uint1 x395;
+ uint32_t x396;
+ uint32_t x397;
+ uint32_t x398;
+ uint32_t x399;
+ uint32_t x400;
+ uint32_t x401;
+ uint32_t x402;
+ uint32_t x403;
+ uint32_t x404;
+ uint32_t x405;
+ uint32_t x406;
+ uint32_t x407;
+ uint32_t x408;
+ uint32_t x409;
+ uint32_t x410;
+ uint32_t x411;
+ uint32_t x412;
+ uint32_t x413;
+ uint32_t x414;
+ uint32_t x415;
+ uint32_t x416;
+ uint32_t x417;
+ uint32_t x418;
+ uint32_t x419;
+ uint32_t x420;
+ uint32_t x421;
+ fiat_secp384r1_uint1 x422;
+ uint32_t x423;
+ fiat_secp384r1_uint1 x424;
+ uint32_t x425;
+ fiat_secp384r1_uint1 x426;
+ uint32_t x427;
+ fiat_secp384r1_uint1 x428;
+ uint32_t x429;
+ fiat_secp384r1_uint1 x430;
+ uint32_t x431;
+ fiat_secp384r1_uint1 x432;
+ uint32_t x433;
+ fiat_secp384r1_uint1 x434;
+ uint32_t x435;
+ fiat_secp384r1_uint1 x436;
+ uint32_t x437;
+ fiat_secp384r1_uint1 x438;
+ uint32_t x439;
+ fiat_secp384r1_uint1 x440;
+ uint32_t x441;
+ fiat_secp384r1_uint1 x442;
+ uint32_t x443;
+ uint32_t x444;
+ fiat_secp384r1_uint1 x445;
+ uint32_t x446;
+ fiat_secp384r1_uint1 x447;
+ uint32_t x448;
+ fiat_secp384r1_uint1 x449;
+ uint32_t x450;
+ fiat_secp384r1_uint1 x451;
+ uint32_t x452;
+ fiat_secp384r1_uint1 x453;
+ uint32_t x454;
+ fiat_secp384r1_uint1 x455;
+ uint32_t x456;
+ fiat_secp384r1_uint1 x457;
+ uint32_t x458;
+ fiat_secp384r1_uint1 x459;
+ uint32_t x460;
+ fiat_secp384r1_uint1 x461;
+ uint32_t x462;
+ fiat_secp384r1_uint1 x463;
+ uint32_t x464;
+ fiat_secp384r1_uint1 x465;
+ uint32_t x466;
+ fiat_secp384r1_uint1 x467;
+ uint32_t x468;
+ fiat_secp384r1_uint1 x469;
+ uint32_t x470;
+ uint32_t x471;
+ uint32_t x472;
+ uint32_t x473;
+ uint32_t x474;
+ uint32_t x475;
+ uint32_t x476;
+ uint32_t x477;
+ uint32_t x478;
+ uint32_t x479;
+ uint32_t x480;
+ uint32_t x481;
+ uint32_t x482;
+ uint32_t x483;
+ uint32_t x484;
+ uint32_t x485;
+ uint32_t x486;
+ uint32_t x487;
+ uint32_t x488;
+ uint32_t x489;
+ uint32_t x490;
+ fiat_secp384r1_uint1 x491;
+ uint32_t x492;
+ fiat_secp384r1_uint1 x493;
+ uint32_t x494;
+ fiat_secp384r1_uint1 x495;
+ uint32_t x496;
+ fiat_secp384r1_uint1 x497;
+ uint32_t x498;
+ fiat_secp384r1_uint1 x499;
+ uint32_t x500;
+ fiat_secp384r1_uint1 x501;
+ uint32_t x502;
+ fiat_secp384r1_uint1 x503;
+ uint32_t x504;
+ fiat_secp384r1_uint1 x505;
+ uint32_t x506;
+ uint32_t x507;
+ fiat_secp384r1_uint1 x508;
+ uint32_t x509;
+ fiat_secp384r1_uint1 x510;
+ uint32_t x511;
+ fiat_secp384r1_uint1 x512;
+ uint32_t x513;
+ fiat_secp384r1_uint1 x514;
+ uint32_t x515;
+ fiat_secp384r1_uint1 x516;
+ uint32_t x517;
+ fiat_secp384r1_uint1 x518;
+ uint32_t x519;
+ fiat_secp384r1_uint1 x520;
+ uint32_t x521;
+ fiat_secp384r1_uint1 x522;
+ uint32_t x523;
+ fiat_secp384r1_uint1 x524;
+ uint32_t x525;
+ fiat_secp384r1_uint1 x526;
+ uint32_t x527;
+ fiat_secp384r1_uint1 x528;
+ uint32_t x529;
+ fiat_secp384r1_uint1 x530;
+ uint32_t x531;
+ fiat_secp384r1_uint1 x532;
+ uint32_t x533;
+ uint32_t x534;
+ uint32_t x535;
+ uint32_t x536;
+ uint32_t x537;
+ uint32_t x538;
+ uint32_t x539;
+ uint32_t x540;
+ uint32_t x541;
+ uint32_t x542;
+ uint32_t x543;
+ uint32_t x544;
+ uint32_t x545;
+ uint32_t x546;
+ uint32_t x547;
+ uint32_t x548;
+ uint32_t x549;
+ uint32_t x550;
+ uint32_t x551;
+ uint32_t x552;
+ uint32_t x553;
+ uint32_t x554;
+ uint32_t x555;
+ uint32_t x556;
+ uint32_t x557;
+ uint32_t x558;
+ fiat_secp384r1_uint1 x559;
+ uint32_t x560;
+ fiat_secp384r1_uint1 x561;
+ uint32_t x562;
+ fiat_secp384r1_uint1 x563;
+ uint32_t x564;
+ fiat_secp384r1_uint1 x565;
+ uint32_t x566;
+ fiat_secp384r1_uint1 x567;
+ uint32_t x568;
+ fiat_secp384r1_uint1 x569;
+ uint32_t x570;
+ fiat_secp384r1_uint1 x571;
+ uint32_t x572;
+ fiat_secp384r1_uint1 x573;
+ uint32_t x574;
+ fiat_secp384r1_uint1 x575;
+ uint32_t x576;
+ fiat_secp384r1_uint1 x577;
+ uint32_t x578;
+ fiat_secp384r1_uint1 x579;
+ uint32_t x580;
+ uint32_t x581;
+ fiat_secp384r1_uint1 x582;
+ uint32_t x583;
+ fiat_secp384r1_uint1 x584;
+ uint32_t x585;
+ fiat_secp384r1_uint1 x586;
+ uint32_t x587;
+ fiat_secp384r1_uint1 x588;
+ uint32_t x589;
+ fiat_secp384r1_uint1 x590;
+ uint32_t x591;
+ fiat_secp384r1_uint1 x592;
+ uint32_t x593;
+ fiat_secp384r1_uint1 x594;
+ uint32_t x595;
+ fiat_secp384r1_uint1 x596;
+ uint32_t x597;
+ fiat_secp384r1_uint1 x598;
+ uint32_t x599;
+ fiat_secp384r1_uint1 x600;
+ uint32_t x601;
+ fiat_secp384r1_uint1 x602;
+ uint32_t x603;
+ fiat_secp384r1_uint1 x604;
+ uint32_t x605;
+ fiat_secp384r1_uint1 x606;
+ uint32_t x607;
+ uint32_t x608;
+ uint32_t x609;
+ uint32_t x610;
+ uint32_t x611;
+ uint32_t x612;
+ uint32_t x613;
+ uint32_t x614;
+ uint32_t x615;
+ uint32_t x616;
+ uint32_t x617;
+ uint32_t x618;
+ uint32_t x619;
+ uint32_t x620;
+ uint32_t x621;
+ uint32_t x622;
+ uint32_t x623;
+ uint32_t x624;
+ uint32_t x625;
+ uint32_t x626;
+ uint32_t x627;
+ fiat_secp384r1_uint1 x628;
+ uint32_t x629;
+ fiat_secp384r1_uint1 x630;
+ uint32_t x631;
+ fiat_secp384r1_uint1 x632;
+ uint32_t x633;
+ fiat_secp384r1_uint1 x634;
+ uint32_t x635;
+ fiat_secp384r1_uint1 x636;
+ uint32_t x637;
+ fiat_secp384r1_uint1 x638;
+ uint32_t x639;
+ fiat_secp384r1_uint1 x640;
+ uint32_t x641;
+ fiat_secp384r1_uint1 x642;
+ uint32_t x643;
+ uint32_t x644;
+ fiat_secp384r1_uint1 x645;
+ uint32_t x646;
+ fiat_secp384r1_uint1 x647;
+ uint32_t x648;
+ fiat_secp384r1_uint1 x649;
+ uint32_t x650;
+ fiat_secp384r1_uint1 x651;
+ uint32_t x652;
+ fiat_secp384r1_uint1 x653;
+ uint32_t x654;
+ fiat_secp384r1_uint1 x655;
+ uint32_t x656;
+ fiat_secp384r1_uint1 x657;
+ uint32_t x658;
+ fiat_secp384r1_uint1 x659;
+ uint32_t x660;
+ fiat_secp384r1_uint1 x661;
+ uint32_t x662;
+ fiat_secp384r1_uint1 x663;
+ uint32_t x664;
+ fiat_secp384r1_uint1 x665;
+ uint32_t x666;
+ fiat_secp384r1_uint1 x667;
+ uint32_t x668;
+ fiat_secp384r1_uint1 x669;
+ uint32_t x670;
+ uint32_t x671;
+ uint32_t x672;
+ uint32_t x673;
+ uint32_t x674;
+ uint32_t x675;
+ uint32_t x676;
+ uint32_t x677;
+ uint32_t x678;
+ uint32_t x679;
+ uint32_t x680;
+ uint32_t x681;
+ uint32_t x682;
+ uint32_t x683;
+ uint32_t x684;
+ uint32_t x685;
+ uint32_t x686;
+ uint32_t x687;
+ uint32_t x688;
+ uint32_t x689;
+ uint32_t x690;
+ uint32_t x691;
+ uint32_t x692;
+ uint32_t x693;
+ uint32_t x694;
+ uint32_t x695;
+ fiat_secp384r1_uint1 x696;
+ uint32_t x697;
+ fiat_secp384r1_uint1 x698;
+ uint32_t x699;
+ fiat_secp384r1_uint1 x700;
+ uint32_t x701;
+ fiat_secp384r1_uint1 x702;
+ uint32_t x703;
+ fiat_secp384r1_uint1 x704;
+ uint32_t x705;
+ fiat_secp384r1_uint1 x706;
+ uint32_t x707;
+ fiat_secp384r1_uint1 x708;
+ uint32_t x709;
+ fiat_secp384r1_uint1 x710;
+ uint32_t x711;
+ fiat_secp384r1_uint1 x712;
+ uint32_t x713;
+ fiat_secp384r1_uint1 x714;
+ uint32_t x715;
+ fiat_secp384r1_uint1 x716;
+ uint32_t x717;
+ uint32_t x718;
+ fiat_secp384r1_uint1 x719;
+ uint32_t x720;
+ fiat_secp384r1_uint1 x721;
+ uint32_t x722;
+ fiat_secp384r1_uint1 x723;
+ uint32_t x724;
+ fiat_secp384r1_uint1 x725;
+ uint32_t x726;
+ fiat_secp384r1_uint1 x727;
+ uint32_t x728;
+ fiat_secp384r1_uint1 x729;
+ uint32_t x730;
+ fiat_secp384r1_uint1 x731;
+ uint32_t x732;
+ fiat_secp384r1_uint1 x733;
+ uint32_t x734;
+ fiat_secp384r1_uint1 x735;
+ uint32_t x736;
+ fiat_secp384r1_uint1 x737;
+ uint32_t x738;
+ fiat_secp384r1_uint1 x739;
+ uint32_t x740;
+ fiat_secp384r1_uint1 x741;
+ uint32_t x742;
+ fiat_secp384r1_uint1 x743;
+ uint32_t x744;
+ uint32_t x745;
+ uint32_t x746;
+ uint32_t x747;
+ uint32_t x748;
+ uint32_t x749;
+ uint32_t x750;
+ uint32_t x751;
+ uint32_t x752;
+ uint32_t x753;
+ uint32_t x754;
+ uint32_t x755;
+ uint32_t x756;
+ uint32_t x757;
+ uint32_t x758;
+ uint32_t x759;
+ uint32_t x760;
+ uint32_t x761;
+ uint32_t x762;
+ uint32_t x763;
+ uint32_t x764;
+ fiat_secp384r1_uint1 x765;
+ uint32_t x766;
+ fiat_secp384r1_uint1 x767;
+ uint32_t x768;
+ fiat_secp384r1_uint1 x769;
+ uint32_t x770;
+ fiat_secp384r1_uint1 x771;
+ uint32_t x772;
+ fiat_secp384r1_uint1 x773;
+ uint32_t x774;
+ fiat_secp384r1_uint1 x775;
+ uint32_t x776;
+ fiat_secp384r1_uint1 x777;
+ uint32_t x778;
+ fiat_secp384r1_uint1 x779;
+ uint32_t x780;
+ uint32_t x781;
+ fiat_secp384r1_uint1 x782;
+ uint32_t x783;
+ fiat_secp384r1_uint1 x784;
+ uint32_t x785;
+ fiat_secp384r1_uint1 x786;
+ uint32_t x787;
+ fiat_secp384r1_uint1 x788;
+ uint32_t x789;
+ fiat_secp384r1_uint1 x790;
+ uint32_t x791;
+ fiat_secp384r1_uint1 x792;
+ uint32_t x793;
+ fiat_secp384r1_uint1 x794;
+ uint32_t x795;
+ fiat_secp384r1_uint1 x796;
+ uint32_t x797;
+ fiat_secp384r1_uint1 x798;
+ uint32_t x799;
+ fiat_secp384r1_uint1 x800;
+ uint32_t x801;
+ fiat_secp384r1_uint1 x802;
+ uint32_t x803;
+ fiat_secp384r1_uint1 x804;
+ uint32_t x805;
+ fiat_secp384r1_uint1 x806;
+ uint32_t x807;
+ uint32_t x808;
+ uint32_t x809;
+ uint32_t x810;
+ uint32_t x811;
+ uint32_t x812;
+ uint32_t x813;
+ uint32_t x814;
+ uint32_t x815;
+ uint32_t x816;
+ uint32_t x817;
+ uint32_t x818;
+ uint32_t x819;
+ uint32_t x820;
+ uint32_t x821;
+ uint32_t x822;
+ uint32_t x823;
+ uint32_t x824;
+ uint32_t x825;
+ uint32_t x826;
+ uint32_t x827;
+ uint32_t x828;
+ uint32_t x829;
+ uint32_t x830;
+ uint32_t x831;
+ uint32_t x832;
+ fiat_secp384r1_uint1 x833;
+ uint32_t x834;
+ fiat_secp384r1_uint1 x835;
+ uint32_t x836;
+ fiat_secp384r1_uint1 x837;
+ uint32_t x838;
+ fiat_secp384r1_uint1 x839;
+ uint32_t x840;
+ fiat_secp384r1_uint1 x841;
+ uint32_t x842;
+ fiat_secp384r1_uint1 x843;
+ uint32_t x844;
+ fiat_secp384r1_uint1 x845;
+ uint32_t x846;
+ fiat_secp384r1_uint1 x847;
+ uint32_t x848;
+ fiat_secp384r1_uint1 x849;
+ uint32_t x850;
+ fiat_secp384r1_uint1 x851;
+ uint32_t x852;
+ fiat_secp384r1_uint1 x853;
+ uint32_t x854;
+ uint32_t x855;
+ fiat_secp384r1_uint1 x856;
+ uint32_t x857;
+ fiat_secp384r1_uint1 x858;
+ uint32_t x859;
+ fiat_secp384r1_uint1 x860;
+ uint32_t x861;
+ fiat_secp384r1_uint1 x862;
+ uint32_t x863;
+ fiat_secp384r1_uint1 x864;
+ uint32_t x865;
+ fiat_secp384r1_uint1 x866;
+ uint32_t x867;
+ fiat_secp384r1_uint1 x868;
+ uint32_t x869;
+ fiat_secp384r1_uint1 x870;
+ uint32_t x871;
+ fiat_secp384r1_uint1 x872;
+ uint32_t x873;
+ fiat_secp384r1_uint1 x874;
+ uint32_t x875;
+ fiat_secp384r1_uint1 x876;
+ uint32_t x877;
+ fiat_secp384r1_uint1 x878;
+ uint32_t x879;
+ fiat_secp384r1_uint1 x880;
+ uint32_t x881;
+ uint32_t x882;
+ uint32_t x883;
+ uint32_t x884;
+ uint32_t x885;
+ uint32_t x886;
+ uint32_t x887;
+ uint32_t x888;
+ uint32_t x889;
+ uint32_t x890;
+ uint32_t x891;
+ uint32_t x892;
+ uint32_t x893;
+ uint32_t x894;
+ uint32_t x895;
+ uint32_t x896;
+ uint32_t x897;
+ uint32_t x898;
+ uint32_t x899;
+ uint32_t x900;
+ uint32_t x901;
+ fiat_secp384r1_uint1 x902;
+ uint32_t x903;
+ fiat_secp384r1_uint1 x904;
+ uint32_t x905;
+ fiat_secp384r1_uint1 x906;
+ uint32_t x907;
+ fiat_secp384r1_uint1 x908;
+ uint32_t x909;
+ fiat_secp384r1_uint1 x910;
+ uint32_t x911;
+ fiat_secp384r1_uint1 x912;
+ uint32_t x913;
+ fiat_secp384r1_uint1 x914;
+ uint32_t x915;
+ fiat_secp384r1_uint1 x916;
+ uint32_t x917;
+ uint32_t x918;
+ fiat_secp384r1_uint1 x919;
+ uint32_t x920;
+ fiat_secp384r1_uint1 x921;
+ uint32_t x922;
+ fiat_secp384r1_uint1 x923;
+ uint32_t x924;
+ fiat_secp384r1_uint1 x925;
+ uint32_t x926;
+ fiat_secp384r1_uint1 x927;
+ uint32_t x928;
+ fiat_secp384r1_uint1 x929;
+ uint32_t x930;
+ fiat_secp384r1_uint1 x931;
+ uint32_t x932;
+ fiat_secp384r1_uint1 x933;
+ uint32_t x934;
+ fiat_secp384r1_uint1 x935;
+ uint32_t x936;
+ fiat_secp384r1_uint1 x937;
+ uint32_t x938;
+ fiat_secp384r1_uint1 x939;
+ uint32_t x940;
+ fiat_secp384r1_uint1 x941;
+ uint32_t x942;
+ fiat_secp384r1_uint1 x943;
+ uint32_t x944;
+ uint32_t x945;
+ uint32_t x946;
+ uint32_t x947;
+ uint32_t x948;
+ uint32_t x949;
+ uint32_t x950;
+ uint32_t x951;
+ uint32_t x952;
+ uint32_t x953;
+ uint32_t x954;
+ uint32_t x955;
+ uint32_t x956;
+ uint32_t x957;
+ uint32_t x958;
+ uint32_t x959;
+ uint32_t x960;
+ uint32_t x961;
+ uint32_t x962;
+ uint32_t x963;
+ uint32_t x964;
+ uint32_t x965;
+ uint32_t x966;
+ uint32_t x967;
+ uint32_t x968;
+ uint32_t x969;
+ fiat_secp384r1_uint1 x970;
+ uint32_t x971;
+ fiat_secp384r1_uint1 x972;
+ uint32_t x973;
+ fiat_secp384r1_uint1 x974;
+ uint32_t x975;
+ fiat_secp384r1_uint1 x976;
+ uint32_t x977;
+ fiat_secp384r1_uint1 x978;
+ uint32_t x979;
+ fiat_secp384r1_uint1 x980;
+ uint32_t x981;
+ fiat_secp384r1_uint1 x982;
+ uint32_t x983;
+ fiat_secp384r1_uint1 x984;
+ uint32_t x985;
+ fiat_secp384r1_uint1 x986;
+ uint32_t x987;
+ fiat_secp384r1_uint1 x988;
+ uint32_t x989;
+ fiat_secp384r1_uint1 x990;
+ uint32_t x991;
+ uint32_t x992;
+ fiat_secp384r1_uint1 x993;
+ uint32_t x994;
+ fiat_secp384r1_uint1 x995;
+ uint32_t x996;
+ fiat_secp384r1_uint1 x997;
+ uint32_t x998;
+ fiat_secp384r1_uint1 x999;
+ uint32_t x1000;
+ fiat_secp384r1_uint1 x1001;
+ uint32_t x1002;
+ fiat_secp384r1_uint1 x1003;
+ uint32_t x1004;
+ fiat_secp384r1_uint1 x1005;
+ uint32_t x1006;
+ fiat_secp384r1_uint1 x1007;
+ uint32_t x1008;
+ fiat_secp384r1_uint1 x1009;
+ uint32_t x1010;
+ fiat_secp384r1_uint1 x1011;
+ uint32_t x1012;
+ fiat_secp384r1_uint1 x1013;
+ uint32_t x1014;
+ fiat_secp384r1_uint1 x1015;
+ uint32_t x1016;
+ fiat_secp384r1_uint1 x1017;
+ uint32_t x1018;
+ uint32_t x1019;
+ uint32_t x1020;
+ uint32_t x1021;
+ uint32_t x1022;
+ uint32_t x1023;
+ uint32_t x1024;
+ uint32_t x1025;
+ uint32_t x1026;
+ uint32_t x1027;
+ uint32_t x1028;
+ uint32_t x1029;
+ uint32_t x1030;
+ uint32_t x1031;
+ uint32_t x1032;
+ uint32_t x1033;
+ uint32_t x1034;
+ uint32_t x1035;
+ uint32_t x1036;
+ uint32_t x1037;
+ uint32_t x1038;
+ fiat_secp384r1_uint1 x1039;
+ uint32_t x1040;
+ fiat_secp384r1_uint1 x1041;
+ uint32_t x1042;
+ fiat_secp384r1_uint1 x1043;
+ uint32_t x1044;
+ fiat_secp384r1_uint1 x1045;
+ uint32_t x1046;
+ fiat_secp384r1_uint1 x1047;
+ uint32_t x1048;
+ fiat_secp384r1_uint1 x1049;
+ uint32_t x1050;
+ fiat_secp384r1_uint1 x1051;
+ uint32_t x1052;
+ fiat_secp384r1_uint1 x1053;
+ uint32_t x1054;
+ uint32_t x1055;
+ fiat_secp384r1_uint1 x1056;
+ uint32_t x1057;
+ fiat_secp384r1_uint1 x1058;
+ uint32_t x1059;
+ fiat_secp384r1_uint1 x1060;
+ uint32_t x1061;
+ fiat_secp384r1_uint1 x1062;
+ uint32_t x1063;
+ fiat_secp384r1_uint1 x1064;
+ uint32_t x1065;
+ fiat_secp384r1_uint1 x1066;
+ uint32_t x1067;
+ fiat_secp384r1_uint1 x1068;
+ uint32_t x1069;
+ fiat_secp384r1_uint1 x1070;
+ uint32_t x1071;
+ fiat_secp384r1_uint1 x1072;
+ uint32_t x1073;
+ fiat_secp384r1_uint1 x1074;
+ uint32_t x1075;
+ fiat_secp384r1_uint1 x1076;
+ uint32_t x1077;
+ fiat_secp384r1_uint1 x1078;
+ uint32_t x1079;
+ fiat_secp384r1_uint1 x1080;
+ uint32_t x1081;
+ uint32_t x1082;
+ uint32_t x1083;
+ uint32_t x1084;
+ uint32_t x1085;
+ uint32_t x1086;
+ uint32_t x1087;
+ uint32_t x1088;
+ uint32_t x1089;
+ uint32_t x1090;
+ uint32_t x1091;
+ uint32_t x1092;
+ uint32_t x1093;
+ uint32_t x1094;
+ uint32_t x1095;
+ uint32_t x1096;
+ uint32_t x1097;
+ uint32_t x1098;
+ uint32_t x1099;
+ uint32_t x1100;
+ uint32_t x1101;
+ uint32_t x1102;
+ uint32_t x1103;
+ uint32_t x1104;
+ uint32_t x1105;
+ uint32_t x1106;
+ fiat_secp384r1_uint1 x1107;
+ uint32_t x1108;
+ fiat_secp384r1_uint1 x1109;
+ uint32_t x1110;
+ fiat_secp384r1_uint1 x1111;
+ uint32_t x1112;
+ fiat_secp384r1_uint1 x1113;
+ uint32_t x1114;
+ fiat_secp384r1_uint1 x1115;
+ uint32_t x1116;
+ fiat_secp384r1_uint1 x1117;
+ uint32_t x1118;
+ fiat_secp384r1_uint1 x1119;
+ uint32_t x1120;
+ fiat_secp384r1_uint1 x1121;
+ uint32_t x1122;
+ fiat_secp384r1_uint1 x1123;
+ uint32_t x1124;
+ fiat_secp384r1_uint1 x1125;
+ uint32_t x1126;
+ fiat_secp384r1_uint1 x1127;
+ uint32_t x1128;
+ uint32_t x1129;
+ fiat_secp384r1_uint1 x1130;
+ uint32_t x1131;
+ fiat_secp384r1_uint1 x1132;
+ uint32_t x1133;
+ fiat_secp384r1_uint1 x1134;
+ uint32_t x1135;
+ fiat_secp384r1_uint1 x1136;
+ uint32_t x1137;
+ fiat_secp384r1_uint1 x1138;
+ uint32_t x1139;
+ fiat_secp384r1_uint1 x1140;
+ uint32_t x1141;
+ fiat_secp384r1_uint1 x1142;
+ uint32_t x1143;
+ fiat_secp384r1_uint1 x1144;
+ uint32_t x1145;
+ fiat_secp384r1_uint1 x1146;
+ uint32_t x1147;
+ fiat_secp384r1_uint1 x1148;
+ uint32_t x1149;
+ fiat_secp384r1_uint1 x1150;
+ uint32_t x1151;
+ fiat_secp384r1_uint1 x1152;
+ uint32_t x1153;
+ fiat_secp384r1_uint1 x1154;
+ uint32_t x1155;
+ uint32_t x1156;
+ uint32_t x1157;
+ uint32_t x1158;
+ uint32_t x1159;
+ uint32_t x1160;
+ uint32_t x1161;
+ uint32_t x1162;
+ uint32_t x1163;
+ uint32_t x1164;
+ uint32_t x1165;
+ uint32_t x1166;
+ uint32_t x1167;
+ uint32_t x1168;
+ uint32_t x1169;
+ uint32_t x1170;
+ uint32_t x1171;
+ uint32_t x1172;
+ uint32_t x1173;
+ uint32_t x1174;
+ uint32_t x1175;
+ fiat_secp384r1_uint1 x1176;
+ uint32_t x1177;
+ fiat_secp384r1_uint1 x1178;
+ uint32_t x1179;
+ fiat_secp384r1_uint1 x1180;
+ uint32_t x1181;
+ fiat_secp384r1_uint1 x1182;
+ uint32_t x1183;
+ fiat_secp384r1_uint1 x1184;
+ uint32_t x1185;
+ fiat_secp384r1_uint1 x1186;
+ uint32_t x1187;
+ fiat_secp384r1_uint1 x1188;
+ uint32_t x1189;
+ fiat_secp384r1_uint1 x1190;
+ uint32_t x1191;
+ uint32_t x1192;
+ fiat_secp384r1_uint1 x1193;
+ uint32_t x1194;
+ fiat_secp384r1_uint1 x1195;
+ uint32_t x1196;
+ fiat_secp384r1_uint1 x1197;
+ uint32_t x1198;
+ fiat_secp384r1_uint1 x1199;
+ uint32_t x1200;
+ fiat_secp384r1_uint1 x1201;
+ uint32_t x1202;
+ fiat_secp384r1_uint1 x1203;
+ uint32_t x1204;
+ fiat_secp384r1_uint1 x1205;
+ uint32_t x1206;
+ fiat_secp384r1_uint1 x1207;
+ uint32_t x1208;
+ fiat_secp384r1_uint1 x1209;
+ uint32_t x1210;
+ fiat_secp384r1_uint1 x1211;
+ uint32_t x1212;
+ fiat_secp384r1_uint1 x1213;
+ uint32_t x1214;
+ fiat_secp384r1_uint1 x1215;
+ uint32_t x1216;
+ fiat_secp384r1_uint1 x1217;
+ uint32_t x1218;
+ uint32_t x1219;
+ uint32_t x1220;
+ uint32_t x1221;
+ uint32_t x1222;
+ uint32_t x1223;
+ uint32_t x1224;
+ uint32_t x1225;
+ uint32_t x1226;
+ uint32_t x1227;
+ uint32_t x1228;
+ uint32_t x1229;
+ uint32_t x1230;
+ uint32_t x1231;
+ uint32_t x1232;
+ uint32_t x1233;
+ uint32_t x1234;
+ uint32_t x1235;
+ uint32_t x1236;
+ uint32_t x1237;
+ uint32_t x1238;
+ uint32_t x1239;
+ uint32_t x1240;
+ uint32_t x1241;
+ uint32_t x1242;
+ uint32_t x1243;
+ fiat_secp384r1_uint1 x1244;
+ uint32_t x1245;
+ fiat_secp384r1_uint1 x1246;
+ uint32_t x1247;
+ fiat_secp384r1_uint1 x1248;
+ uint32_t x1249;
+ fiat_secp384r1_uint1 x1250;
+ uint32_t x1251;
+ fiat_secp384r1_uint1 x1252;
+ uint32_t x1253;
+ fiat_secp384r1_uint1 x1254;
+ uint32_t x1255;
+ fiat_secp384r1_uint1 x1256;
+ uint32_t x1257;
+ fiat_secp384r1_uint1 x1258;
+ uint32_t x1259;
+ fiat_secp384r1_uint1 x1260;
+ uint32_t x1261;
+ fiat_secp384r1_uint1 x1262;
+ uint32_t x1263;
+ fiat_secp384r1_uint1 x1264;
+ uint32_t x1265;
+ uint32_t x1266;
+ fiat_secp384r1_uint1 x1267;
+ uint32_t x1268;
+ fiat_secp384r1_uint1 x1269;
+ uint32_t x1270;
+ fiat_secp384r1_uint1 x1271;
+ uint32_t x1272;
+ fiat_secp384r1_uint1 x1273;
+ uint32_t x1274;
+ fiat_secp384r1_uint1 x1275;
+ uint32_t x1276;
+ fiat_secp384r1_uint1 x1277;
+ uint32_t x1278;
+ fiat_secp384r1_uint1 x1279;
+ uint32_t x1280;
+ fiat_secp384r1_uint1 x1281;
+ uint32_t x1282;
+ fiat_secp384r1_uint1 x1283;
+ uint32_t x1284;
+ fiat_secp384r1_uint1 x1285;
+ uint32_t x1286;
+ fiat_secp384r1_uint1 x1287;
+ uint32_t x1288;
+ fiat_secp384r1_uint1 x1289;
+ uint32_t x1290;
+ fiat_secp384r1_uint1 x1291;
+ uint32_t x1292;
+ uint32_t x1293;
+ uint32_t x1294;
+ uint32_t x1295;
+ uint32_t x1296;
+ uint32_t x1297;
+ uint32_t x1298;
+ uint32_t x1299;
+ uint32_t x1300;
+ uint32_t x1301;
+ uint32_t x1302;
+ uint32_t x1303;
+ uint32_t x1304;
+ uint32_t x1305;
+ uint32_t x1306;
+ uint32_t x1307;
+ uint32_t x1308;
+ uint32_t x1309;
+ uint32_t x1310;
+ uint32_t x1311;
+ uint32_t x1312;
+ fiat_secp384r1_uint1 x1313;
+ uint32_t x1314;
+ fiat_secp384r1_uint1 x1315;
+ uint32_t x1316;
+ fiat_secp384r1_uint1 x1317;
+ uint32_t x1318;
+ fiat_secp384r1_uint1 x1319;
+ uint32_t x1320;
+ fiat_secp384r1_uint1 x1321;
+ uint32_t x1322;
+ fiat_secp384r1_uint1 x1323;
+ uint32_t x1324;
+ fiat_secp384r1_uint1 x1325;
+ uint32_t x1326;
+ fiat_secp384r1_uint1 x1327;
+ uint32_t x1328;
+ uint32_t x1329;
+ fiat_secp384r1_uint1 x1330;
+ uint32_t x1331;
+ fiat_secp384r1_uint1 x1332;
+ uint32_t x1333;
+ fiat_secp384r1_uint1 x1334;
+ uint32_t x1335;
+ fiat_secp384r1_uint1 x1336;
+ uint32_t x1337;
+ fiat_secp384r1_uint1 x1338;
+ uint32_t x1339;
+ fiat_secp384r1_uint1 x1340;
+ uint32_t x1341;
+ fiat_secp384r1_uint1 x1342;
+ uint32_t x1343;
+ fiat_secp384r1_uint1 x1344;
+ uint32_t x1345;
+ fiat_secp384r1_uint1 x1346;
+ uint32_t x1347;
+ fiat_secp384r1_uint1 x1348;
+ uint32_t x1349;
+ fiat_secp384r1_uint1 x1350;
+ uint32_t x1351;
+ fiat_secp384r1_uint1 x1352;
+ uint32_t x1353;
+ fiat_secp384r1_uint1 x1354;
+ uint32_t x1355;
+ uint32_t x1356;
+ uint32_t x1357;
+ uint32_t x1358;
+ uint32_t x1359;
+ uint32_t x1360;
+ uint32_t x1361;
+ uint32_t x1362;
+ uint32_t x1363;
+ uint32_t x1364;
+ uint32_t x1365;
+ uint32_t x1366;
+ uint32_t x1367;
+ uint32_t x1368;
+ uint32_t x1369;
+ uint32_t x1370;
+ uint32_t x1371;
+ uint32_t x1372;
+ uint32_t x1373;
+ uint32_t x1374;
+ uint32_t x1375;
+ uint32_t x1376;
+ uint32_t x1377;
+ uint32_t x1378;
+ uint32_t x1379;
+ uint32_t x1380;
+ fiat_secp384r1_uint1 x1381;
+ uint32_t x1382;
+ fiat_secp384r1_uint1 x1383;
+ uint32_t x1384;
+ fiat_secp384r1_uint1 x1385;
+ uint32_t x1386;
+ fiat_secp384r1_uint1 x1387;
+ uint32_t x1388;
+ fiat_secp384r1_uint1 x1389;
+ uint32_t x1390;
+ fiat_secp384r1_uint1 x1391;
+ uint32_t x1392;
+ fiat_secp384r1_uint1 x1393;
+ uint32_t x1394;
+ fiat_secp384r1_uint1 x1395;
+ uint32_t x1396;
+ fiat_secp384r1_uint1 x1397;
+ uint32_t x1398;
+ fiat_secp384r1_uint1 x1399;
+ uint32_t x1400;
+ fiat_secp384r1_uint1 x1401;
+ uint32_t x1402;
+ uint32_t x1403;
+ fiat_secp384r1_uint1 x1404;
+ uint32_t x1405;
+ fiat_secp384r1_uint1 x1406;
+ uint32_t x1407;
+ fiat_secp384r1_uint1 x1408;
+ uint32_t x1409;
+ fiat_secp384r1_uint1 x1410;
+ uint32_t x1411;
+ fiat_secp384r1_uint1 x1412;
+ uint32_t x1413;
+ fiat_secp384r1_uint1 x1414;
+ uint32_t x1415;
+ fiat_secp384r1_uint1 x1416;
+ uint32_t x1417;
+ fiat_secp384r1_uint1 x1418;
+ uint32_t x1419;
+ fiat_secp384r1_uint1 x1420;
+ uint32_t x1421;
+ fiat_secp384r1_uint1 x1422;
+ uint32_t x1423;
+ fiat_secp384r1_uint1 x1424;
+ uint32_t x1425;
+ fiat_secp384r1_uint1 x1426;
+ uint32_t x1427;
+ fiat_secp384r1_uint1 x1428;
+ uint32_t x1429;
+ uint32_t x1430;
+ uint32_t x1431;
+ uint32_t x1432;
+ uint32_t x1433;
+ uint32_t x1434;
+ uint32_t x1435;
+ uint32_t x1436;
+ uint32_t x1437;
+ uint32_t x1438;
+ uint32_t x1439;
+ uint32_t x1440;
+ uint32_t x1441;
+ uint32_t x1442;
+ uint32_t x1443;
+ uint32_t x1444;
+ uint32_t x1445;
+ uint32_t x1446;
+ uint32_t x1447;
+ uint32_t x1448;
+ uint32_t x1449;
+ fiat_secp384r1_uint1 x1450;
+ uint32_t x1451;
+ fiat_secp384r1_uint1 x1452;
+ uint32_t x1453;
+ fiat_secp384r1_uint1 x1454;
+ uint32_t x1455;
+ fiat_secp384r1_uint1 x1456;
+ uint32_t x1457;
+ fiat_secp384r1_uint1 x1458;
+ uint32_t x1459;
+ fiat_secp384r1_uint1 x1460;
+ uint32_t x1461;
+ fiat_secp384r1_uint1 x1462;
+ uint32_t x1463;
+ fiat_secp384r1_uint1 x1464;
+ uint32_t x1465;
+ uint32_t x1466;
+ fiat_secp384r1_uint1 x1467;
+ uint32_t x1468;
+ fiat_secp384r1_uint1 x1469;
+ uint32_t x1470;
+ fiat_secp384r1_uint1 x1471;
+ uint32_t x1472;
+ fiat_secp384r1_uint1 x1473;
+ uint32_t x1474;
+ fiat_secp384r1_uint1 x1475;
+ uint32_t x1476;
+ fiat_secp384r1_uint1 x1477;
+ uint32_t x1478;
+ fiat_secp384r1_uint1 x1479;
+ uint32_t x1480;
+ fiat_secp384r1_uint1 x1481;
+ uint32_t x1482;
+ fiat_secp384r1_uint1 x1483;
+ uint32_t x1484;
+ fiat_secp384r1_uint1 x1485;
+ uint32_t x1486;
+ fiat_secp384r1_uint1 x1487;
+ uint32_t x1488;
+ fiat_secp384r1_uint1 x1489;
+ uint32_t x1490;
+ fiat_secp384r1_uint1 x1491;
+ uint32_t x1492;
+ uint32_t x1493;
+ uint32_t x1494;
+ uint32_t x1495;
+ uint32_t x1496;
+ uint32_t x1497;
+ uint32_t x1498;
+ uint32_t x1499;
+ uint32_t x1500;
+ uint32_t x1501;
+ uint32_t x1502;
+ uint32_t x1503;
+ uint32_t x1504;
+ uint32_t x1505;
+ uint32_t x1506;
+ uint32_t x1507;
+ uint32_t x1508;
+ uint32_t x1509;
+ uint32_t x1510;
+ uint32_t x1511;
+ uint32_t x1512;
+ uint32_t x1513;
+ uint32_t x1514;
+ uint32_t x1515;
+ uint32_t x1516;
+ uint32_t x1517;
+ fiat_secp384r1_uint1 x1518;
+ uint32_t x1519;
+ fiat_secp384r1_uint1 x1520;
+ uint32_t x1521;
+ fiat_secp384r1_uint1 x1522;
+ uint32_t x1523;
+ fiat_secp384r1_uint1 x1524;
+ uint32_t x1525;
+ fiat_secp384r1_uint1 x1526;
+ uint32_t x1527;
+ fiat_secp384r1_uint1 x1528;
+ uint32_t x1529;
+ fiat_secp384r1_uint1 x1530;
+ uint32_t x1531;
+ fiat_secp384r1_uint1 x1532;
+ uint32_t x1533;
+ fiat_secp384r1_uint1 x1534;
+ uint32_t x1535;
+ fiat_secp384r1_uint1 x1536;
+ uint32_t x1537;
+ fiat_secp384r1_uint1 x1538;
+ uint32_t x1539;
+ uint32_t x1540;
+ fiat_secp384r1_uint1 x1541;
+ uint32_t x1542;
+ fiat_secp384r1_uint1 x1543;
+ uint32_t x1544;
+ fiat_secp384r1_uint1 x1545;
+ uint32_t x1546;
+ fiat_secp384r1_uint1 x1547;
+ uint32_t x1548;
+ fiat_secp384r1_uint1 x1549;
+ uint32_t x1550;
+ fiat_secp384r1_uint1 x1551;
+ uint32_t x1552;
+ fiat_secp384r1_uint1 x1553;
+ uint32_t x1554;
+ fiat_secp384r1_uint1 x1555;
+ uint32_t x1556;
+ fiat_secp384r1_uint1 x1557;
+ uint32_t x1558;
+ fiat_secp384r1_uint1 x1559;
+ uint32_t x1560;
+ fiat_secp384r1_uint1 x1561;
+ uint32_t x1562;
+ fiat_secp384r1_uint1 x1563;
+ uint32_t x1564;
+ fiat_secp384r1_uint1 x1565;
+ uint32_t x1566;
+ uint32_t x1567;
+ uint32_t x1568;
+ uint32_t x1569;
+ uint32_t x1570;
+ uint32_t x1571;
+ uint32_t x1572;
+ uint32_t x1573;
+ uint32_t x1574;
+ uint32_t x1575;
+ uint32_t x1576;
+ uint32_t x1577;
+ uint32_t x1578;
+ uint32_t x1579;
+ uint32_t x1580;
+ uint32_t x1581;
+ uint32_t x1582;
+ uint32_t x1583;
+ uint32_t x1584;
+ uint32_t x1585;
+ uint32_t x1586;
+ fiat_secp384r1_uint1 x1587;
+ uint32_t x1588;
+ fiat_secp384r1_uint1 x1589;
+ uint32_t x1590;
+ fiat_secp384r1_uint1 x1591;
+ uint32_t x1592;
+ fiat_secp384r1_uint1 x1593;
+ uint32_t x1594;
+ fiat_secp384r1_uint1 x1595;
+ uint32_t x1596;
+ fiat_secp384r1_uint1 x1597;
+ uint32_t x1598;
+ fiat_secp384r1_uint1 x1599;
+ uint32_t x1600;
+ fiat_secp384r1_uint1 x1601;
+ uint32_t x1602;
+ uint32_t x1603;
+ fiat_secp384r1_uint1 x1604;
+ uint32_t x1605;
+ fiat_secp384r1_uint1 x1606;
+ uint32_t x1607;
+ fiat_secp384r1_uint1 x1608;
+ uint32_t x1609;
+ fiat_secp384r1_uint1 x1610;
+ uint32_t x1611;
+ fiat_secp384r1_uint1 x1612;
+ uint32_t x1613;
+ fiat_secp384r1_uint1 x1614;
+ uint32_t x1615;
+ fiat_secp384r1_uint1 x1616;
+ uint32_t x1617;
+ fiat_secp384r1_uint1 x1618;
+ uint32_t x1619;
+ fiat_secp384r1_uint1 x1620;
+ uint32_t x1621;
+ fiat_secp384r1_uint1 x1622;
+ uint32_t x1623;
+ fiat_secp384r1_uint1 x1624;
+ uint32_t x1625;
+ fiat_secp384r1_uint1 x1626;
+ uint32_t x1627;
+ fiat_secp384r1_uint1 x1628;
+ uint32_t x1629;
+ uint32_t x1630;
+ fiat_secp384r1_uint1 x1631;
+ uint32_t x1632;
+ fiat_secp384r1_uint1 x1633;
+ uint32_t x1634;
+ fiat_secp384r1_uint1 x1635;
+ uint32_t x1636;
+ fiat_secp384r1_uint1 x1637;
+ uint32_t x1638;
+ fiat_secp384r1_uint1 x1639;
+ uint32_t x1640;
+ fiat_secp384r1_uint1 x1641;
+ uint32_t x1642;
+ fiat_secp384r1_uint1 x1643;
+ uint32_t x1644;
+ fiat_secp384r1_uint1 x1645;
+ uint32_t x1646;
+ fiat_secp384r1_uint1 x1647;
+ uint32_t x1648;
+ fiat_secp384r1_uint1 x1649;
+ uint32_t x1650;
+ fiat_secp384r1_uint1 x1651;
+ uint32_t x1652;
+ fiat_secp384r1_uint1 x1653;
+ uint32_t x1654;
+ fiat_secp384r1_uint1 x1655;
+ uint32_t x1656;
+ uint32_t x1657;
+ uint32_t x1658;
+ uint32_t x1659;
+ uint32_t x1660;
+ uint32_t x1661;
+ uint32_t x1662;
+ uint32_t x1663;
+ uint32_t x1664;
+ uint32_t x1665;
+ uint32_t x1666;
+ uint32_t x1667;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[6]);
+ x7 = (arg1[7]);
+ x8 = (arg1[8]);
+ x9 = (arg1[9]);
+ x10 = (arg1[10]);
+ x11 = (arg1[11]);
+ x12 = (arg1[0]);
+ fiat_secp384r1_mulx_u32(&x13, &x14, x12, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x15, &x16, x12, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x17, &x18, x12, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x19, &x20, x12, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x21, &x22, x12, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x23, &x24, x12, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x25, &x26, x12, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x27, &x28, x12, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x29, &x30, x12, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x31, &x32, x12, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x33, &x34, x12, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x35, &x36, x12, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x37, &x38, 0x0, x36, x33);
+ fiat_secp384r1_addcarryx_u32(&x39, &x40, x38, x34, x31);
+ fiat_secp384r1_addcarryx_u32(&x41, &x42, x40, x32, x29);
+ fiat_secp384r1_addcarryx_u32(&x43, &x44, x42, x30, x27);
+ fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x28, x25);
+ fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x26, x23);
+ fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x24, x21);
+ fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x22, x19);
+ fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x20, x17);
+ fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x18, x15);
+ fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x16, x13);
+ x59 = (x58 + x14);
+ fiat_secp384r1_mulx_u32(&x60, &x61, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x62, &x63, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x64, &x65, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x66, &x67, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x68, &x69, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x70, &x71, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x72, &x73, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x74, &x75, x35, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x76, &x77, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x78, &x79, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x80, &x81, 0x0, x77, x74);
+ fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x75, x72);
+ fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x73, x70);
+ fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x71, x68);
+ fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x69, x66);
+ fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x67, x64);
+ fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x65, x62);
+ fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x63, x60);
+ x96 = (x95 + x61);
+ fiat_secp384r1_addcarryx_u32(&x97, &x98, 0x0, x35, x78);
+ fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x37, x79);
+ fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x39, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x41, x76);
+ fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x43, x80);
+ fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x45, x82);
+ fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x47, x84);
+ fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x49, x86);
+ fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x51, x88);
+ fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x53, x90);
+ fiat_secp384r1_addcarryx_u32(&x117, &x118, x116, x55, x92);
+ fiat_secp384r1_addcarryx_u32(&x119, &x120, x118, x57, x94);
+ fiat_secp384r1_addcarryx_u32(&x121, &x122, x120, x59, x96);
+ fiat_secp384r1_mulx_u32(&x123, &x124, x1, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x125, &x126, x1, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x127, &x128, x1, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x129, &x130, x1, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x131, &x132, x1, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x133, &x134, x1, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x135, &x136, x1, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x137, &x138, x1, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x139, &x140, x1, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x141, &x142, x1, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x143, &x144, x1, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x145, &x146, x1, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x147, &x148, 0x0, x146, x143);
+ fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x144, x141);
+ fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x142, x139);
+ fiat_secp384r1_addcarryx_u32(&x153, &x154, x152, x140, x137);
+ fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x138, x135);
+ fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x136, x133);
+ fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x134, x131);
+ fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x132, x129);
+ fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x130, x127);
+ fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x128, x125);
+ fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x126, x123);
+ x169 = (x168 + x124);
+ fiat_secp384r1_addcarryx_u32(&x170, &x171, 0x0, x99, x145);
+ fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x101, x147);
+ fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x103, x149);
+ fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x105, x151);
+ fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x107, x153);
+ fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x109, x155);
+ fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x111, x157);
+ fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x113, x159);
+ fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x115, x161);
+ fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, x117, x163);
+ fiat_secp384r1_addcarryx_u32(&x190, &x191, x189, x119, x165);
+ fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x121, x167);
+ fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x122, x169);
+ fiat_secp384r1_mulx_u32(&x196, &x197, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x198, &x199, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x200, &x201, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x202, &x203, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x204, &x205, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x206, &x207, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x208, &x209, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x210, &x211, x170, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x212, &x213, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x214, &x215, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x216, &x217, 0x0, x213, x210);
+ fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x211, x208);
+ fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x209, x206);
+ fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x207, x204);
+ fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x205, x202);
+ fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x203, x200);
+ fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x201, x198);
+ fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x199, x196);
+ x232 = (x231 + x197);
+ fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x170, x214);
+ fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x172, x215);
+ fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x174, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x176, x212);
+ fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x178, x216);
+ fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x180, x218);
+ fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x182, x220);
+ fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x184, x222);
+ fiat_secp384r1_addcarryx_u32(&x249, &x250, x248, x186, x224);
+ fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x188, x226);
+ fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x190, x228);
+ fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x192, x230);
+ fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x194, x232);
+ x259 = ((uint32_t)x258 + x195);
+ fiat_secp384r1_mulx_u32(&x260, &x261, x2, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x262, &x263, x2, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x264, &x265, x2, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x266, &x267, x2, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x268, &x269, x2, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x270, &x271, x2, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x272, &x273, x2, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x274, &x275, x2, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x276, &x277, x2, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x278, &x279, x2, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x280, &x281, x2, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x282, &x283, x2, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x284, &x285, 0x0, x283, x280);
+ fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x281, x278);
+ fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x279, x276);
+ fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x277, x274);
+ fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x275, x272);
+ fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x273, x270);
+ fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x271, x268);
+ fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x269, x266);
+ fiat_secp384r1_addcarryx_u32(&x300, &x301, x299, x267, x264);
+ fiat_secp384r1_addcarryx_u32(&x302, &x303, x301, x265, x262);
+ fiat_secp384r1_addcarryx_u32(&x304, &x305, x303, x263, x260);
+ x306 = (x305 + x261);
+ fiat_secp384r1_addcarryx_u32(&x307, &x308, 0x0, x235, x282);
+ fiat_secp384r1_addcarryx_u32(&x309, &x310, x308, x237, x284);
+ fiat_secp384r1_addcarryx_u32(&x311, &x312, x310, x239, x286);
+ fiat_secp384r1_addcarryx_u32(&x313, &x314, x312, x241, x288);
+ fiat_secp384r1_addcarryx_u32(&x315, &x316, x314, x243, x290);
+ fiat_secp384r1_addcarryx_u32(&x317, &x318, x316, x245, x292);
+ fiat_secp384r1_addcarryx_u32(&x319, &x320, x318, x247, x294);
+ fiat_secp384r1_addcarryx_u32(&x321, &x322, x320, x249, x296);
+ fiat_secp384r1_addcarryx_u32(&x323, &x324, x322, x251, x298);
+ fiat_secp384r1_addcarryx_u32(&x325, &x326, x324, x253, x300);
+ fiat_secp384r1_addcarryx_u32(&x327, &x328, x326, x255, x302);
+ fiat_secp384r1_addcarryx_u32(&x329, &x330, x328, x257, x304);
+ fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x259, x306);
+ fiat_secp384r1_mulx_u32(&x333, &x334, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x335, &x336, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x337, &x338, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x339, &x340, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x341, &x342, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x343, &x344, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x345, &x346, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x347, &x348, x307, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x349, &x350, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x351, &x352, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x353, &x354, 0x0, x350, x347);
+ fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x348, x345);
+ fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x346, x343);
+ fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x344, x341);
+ fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x342, x339);
+ fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x340, x337);
+ fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x338, x335);
+ fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x336, x333);
+ x369 = (x368 + x334);
+ fiat_secp384r1_addcarryx_u32(&x370, &x371, 0x0, x307, x351);
+ fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x309, x352);
+ fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x311, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x313, x349);
+ fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x315, x353);
+ fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x317, x355);
+ fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x319, x357);
+ fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x321, x359);
+ fiat_secp384r1_addcarryx_u32(&x386, &x387, x385, x323, x361);
+ fiat_secp384r1_addcarryx_u32(&x388, &x389, x387, x325, x363);
+ fiat_secp384r1_addcarryx_u32(&x390, &x391, x389, x327, x365);
+ fiat_secp384r1_addcarryx_u32(&x392, &x393, x391, x329, x367);
+ fiat_secp384r1_addcarryx_u32(&x394, &x395, x393, x331, x369);
+ x396 = ((uint32_t)x395 + x332);
+ fiat_secp384r1_mulx_u32(&x397, &x398, x3, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x399, &x400, x3, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x401, &x402, x3, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x403, &x404, x3, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x405, &x406, x3, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x407, &x408, x3, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x409, &x410, x3, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x411, &x412, x3, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x413, &x414, x3, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x415, &x416, x3, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x417, &x418, x3, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x419, &x420, x3, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x421, &x422, 0x0, x420, x417);
+ fiat_secp384r1_addcarryx_u32(&x423, &x424, x422, x418, x415);
+ fiat_secp384r1_addcarryx_u32(&x425, &x426, x424, x416, x413);
+ fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x414, x411);
+ fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x412, x409);
+ fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x410, x407);
+ fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x408, x405);
+ fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x406, x403);
+ fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x404, x401);
+ fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x402, x399);
+ fiat_secp384r1_addcarryx_u32(&x441, &x442, x440, x400, x397);
+ x443 = (x442 + x398);
+ fiat_secp384r1_addcarryx_u32(&x444, &x445, 0x0, x372, x419);
+ fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, x374, x421);
+ fiat_secp384r1_addcarryx_u32(&x448, &x449, x447, x376, x423);
+ fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x378, x425);
+ fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x380, x427);
+ fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x382, x429);
+ fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x384, x431);
+ fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x386, x433);
+ fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x388, x435);
+ fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x390, x437);
+ fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x392, x439);
+ fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x394, x441);
+ fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x396, x443);
+ fiat_secp384r1_mulx_u32(&x470, &x471, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x472, &x473, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x474, &x475, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x476, &x477, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x478, &x479, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x480, &x481, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x482, &x483, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x484, &x485, x444, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x486, &x487, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x488, &x489, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x490, &x491, 0x0, x487, x484);
+ fiat_secp384r1_addcarryx_u32(&x492, &x493, x491, x485, x482);
+ fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x483, x480);
+ fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x481, x478);
+ fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x479, x476);
+ fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x477, x474);
+ fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x475, x472);
+ fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x473, x470);
+ x506 = (x505 + x471);
+ fiat_secp384r1_addcarryx_u32(&x507, &x508, 0x0, x444, x488);
+ fiat_secp384r1_addcarryx_u32(&x509, &x510, x508, x446, x489);
+ fiat_secp384r1_addcarryx_u32(&x511, &x512, x510, x448, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x513, &x514, x512, x450, x486);
+ fiat_secp384r1_addcarryx_u32(&x515, &x516, x514, x452, x490);
+ fiat_secp384r1_addcarryx_u32(&x517, &x518, x516, x454, x492);
+ fiat_secp384r1_addcarryx_u32(&x519, &x520, x518, x456, x494);
+ fiat_secp384r1_addcarryx_u32(&x521, &x522, x520, x458, x496);
+ fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x460, x498);
+ fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x462, x500);
+ fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x464, x502);
+ fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x466, x504);
+ fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x468, x506);
+ x533 = ((uint32_t)x532 + x469);
+ fiat_secp384r1_mulx_u32(&x534, &x535, x4, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x536, &x537, x4, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x538, &x539, x4, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x540, &x541, x4, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x542, &x543, x4, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x544, &x545, x4, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x546, &x547, x4, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x548, &x549, x4, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x550, &x551, x4, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x552, &x553, x4, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x554, &x555, x4, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x556, &x557, x4, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x558, &x559, 0x0, x557, x554);
+ fiat_secp384r1_addcarryx_u32(&x560, &x561, x559, x555, x552);
+ fiat_secp384r1_addcarryx_u32(&x562, &x563, x561, x553, x550);
+ fiat_secp384r1_addcarryx_u32(&x564, &x565, x563, x551, x548);
+ fiat_secp384r1_addcarryx_u32(&x566, &x567, x565, x549, x546);
+ fiat_secp384r1_addcarryx_u32(&x568, &x569, x567, x547, x544);
+ fiat_secp384r1_addcarryx_u32(&x570, &x571, x569, x545, x542);
+ fiat_secp384r1_addcarryx_u32(&x572, &x573, x571, x543, x540);
+ fiat_secp384r1_addcarryx_u32(&x574, &x575, x573, x541, x538);
+ fiat_secp384r1_addcarryx_u32(&x576, &x577, x575, x539, x536);
+ fiat_secp384r1_addcarryx_u32(&x578, &x579, x577, x537, x534);
+ x580 = (x579 + x535);
+ fiat_secp384r1_addcarryx_u32(&x581, &x582, 0x0, x509, x556);
+ fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x511, x558);
+ fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x513, x560);
+ fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x515, x562);
+ fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x517, x564);
+ fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x519, x566);
+ fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x521, x568);
+ fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x523, x570);
+ fiat_secp384r1_addcarryx_u32(&x597, &x598, x596, x525, x572);
+ fiat_secp384r1_addcarryx_u32(&x599, &x600, x598, x527, x574);
+ fiat_secp384r1_addcarryx_u32(&x601, &x602, x600, x529, x576);
+ fiat_secp384r1_addcarryx_u32(&x603, &x604, x602, x531, x578);
+ fiat_secp384r1_addcarryx_u32(&x605, &x606, x604, x533, x580);
+ fiat_secp384r1_mulx_u32(&x607, &x608, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x609, &x610, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x611, &x612, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x613, &x614, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x615, &x616, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x617, &x618, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x619, &x620, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x621, &x622, x581, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x623, &x624, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x625, &x626, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x627, &x628, 0x0, x624, x621);
+ fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x622, x619);
+ fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x620, x617);
+ fiat_secp384r1_addcarryx_u32(&x633, &x634, x632, x618, x615);
+ fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x616, x613);
+ fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x614, x611);
+ fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x612, x609);
+ fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x610, x607);
+ x643 = (x642 + x608);
+ fiat_secp384r1_addcarryx_u32(&x644, &x645, 0x0, x581, x625);
+ fiat_secp384r1_addcarryx_u32(&x646, &x647, x645, x583, x626);
+ fiat_secp384r1_addcarryx_u32(&x648, &x649, x647, x585, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x650, &x651, x649, x587, x623);
+ fiat_secp384r1_addcarryx_u32(&x652, &x653, x651, x589, x627);
+ fiat_secp384r1_addcarryx_u32(&x654, &x655, x653, x591, x629);
+ fiat_secp384r1_addcarryx_u32(&x656, &x657, x655, x593, x631);
+ fiat_secp384r1_addcarryx_u32(&x658, &x659, x657, x595, x633);
+ fiat_secp384r1_addcarryx_u32(&x660, &x661, x659, x597, x635);
+ fiat_secp384r1_addcarryx_u32(&x662, &x663, x661, x599, x637);
+ fiat_secp384r1_addcarryx_u32(&x664, &x665, x663, x601, x639);
+ fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x603, x641);
+ fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x605, x643);
+ x670 = ((uint32_t)x669 + x606);
+ fiat_secp384r1_mulx_u32(&x671, &x672, x5, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x673, &x674, x5, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x675, &x676, x5, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x677, &x678, x5, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x679, &x680, x5, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x681, &x682, x5, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x683, &x684, x5, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x685, &x686, x5, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x687, &x688, x5, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x689, &x690, x5, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x691, &x692, x5, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x693, &x694, x5, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x695, &x696, 0x0, x694, x691);
+ fiat_secp384r1_addcarryx_u32(&x697, &x698, x696, x692, x689);
+ fiat_secp384r1_addcarryx_u32(&x699, &x700, x698, x690, x687);
+ fiat_secp384r1_addcarryx_u32(&x701, &x702, x700, x688, x685);
+ fiat_secp384r1_addcarryx_u32(&x703, &x704, x702, x686, x683);
+ fiat_secp384r1_addcarryx_u32(&x705, &x706, x704, x684, x681);
+ fiat_secp384r1_addcarryx_u32(&x707, &x708, x706, x682, x679);
+ fiat_secp384r1_addcarryx_u32(&x709, &x710, x708, x680, x677);
+ fiat_secp384r1_addcarryx_u32(&x711, &x712, x710, x678, x675);
+ fiat_secp384r1_addcarryx_u32(&x713, &x714, x712, x676, x673);
+ fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x674, x671);
+ x717 = (x716 + x672);
+ fiat_secp384r1_addcarryx_u32(&x718, &x719, 0x0, x646, x693);
+ fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x648, x695);
+ fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x650, x697);
+ fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x652, x699);
+ fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x654, x701);
+ fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x656, x703);
+ fiat_secp384r1_addcarryx_u32(&x730, &x731, x729, x658, x705);
+ fiat_secp384r1_addcarryx_u32(&x732, &x733, x731, x660, x707);
+ fiat_secp384r1_addcarryx_u32(&x734, &x735, x733, x662, x709);
+ fiat_secp384r1_addcarryx_u32(&x736, &x737, x735, x664, x711);
+ fiat_secp384r1_addcarryx_u32(&x738, &x739, x737, x666, x713);
+ fiat_secp384r1_addcarryx_u32(&x740, &x741, x739, x668, x715);
+ fiat_secp384r1_addcarryx_u32(&x742, &x743, x741, x670, x717);
+ fiat_secp384r1_mulx_u32(&x744, &x745, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x746, &x747, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x748, &x749, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x750, &x751, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x752, &x753, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x754, &x755, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x756, &x757, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x758, &x759, x718, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x760, &x761, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x762, &x763, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x764, &x765, 0x0, x761, x758);
+ fiat_secp384r1_addcarryx_u32(&x766, &x767, x765, x759, x756);
+ fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x757, x754);
+ fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x755, x752);
+ fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x753, x750);
+ fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x751, x748);
+ fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x749, x746);
+ fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x747, x744);
+ x780 = (x779 + x745);
+ fiat_secp384r1_addcarryx_u32(&x781, &x782, 0x0, x718, x762);
+ fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x720, x763);
+ fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x722, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x724, x760);
+ fiat_secp384r1_addcarryx_u32(&x789, &x790, x788, x726, x764);
+ fiat_secp384r1_addcarryx_u32(&x791, &x792, x790, x728, x766);
+ fiat_secp384r1_addcarryx_u32(&x793, &x794, x792, x730, x768);
+ fiat_secp384r1_addcarryx_u32(&x795, &x796, x794, x732, x770);
+ fiat_secp384r1_addcarryx_u32(&x797, &x798, x796, x734, x772);
+ fiat_secp384r1_addcarryx_u32(&x799, &x800, x798, x736, x774);
+ fiat_secp384r1_addcarryx_u32(&x801, &x802, x800, x738, x776);
+ fiat_secp384r1_addcarryx_u32(&x803, &x804, x802, x740, x778);
+ fiat_secp384r1_addcarryx_u32(&x805, &x806, x804, x742, x780);
+ x807 = ((uint32_t)x806 + x743);
+ fiat_secp384r1_mulx_u32(&x808, &x809, x6, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x810, &x811, x6, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x812, &x813, x6, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x814, &x815, x6, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x816, &x817, x6, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x818, &x819, x6, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x820, &x821, x6, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x822, &x823, x6, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x824, &x825, x6, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x826, &x827, x6, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x828, &x829, x6, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x830, &x831, x6, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x832, &x833, 0x0, x831, x828);
+ fiat_secp384r1_addcarryx_u32(&x834, &x835, x833, x829, x826);
+ fiat_secp384r1_addcarryx_u32(&x836, &x837, x835, x827, x824);
+ fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x825, x822);
+ fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x823, x820);
+ fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x821, x818);
+ fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x819, x816);
+ fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x817, x814);
+ fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x815, x812);
+ fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x813, x810);
+ fiat_secp384r1_addcarryx_u32(&x852, &x853, x851, x811, x808);
+ x854 = (x853 + x809);
+ fiat_secp384r1_addcarryx_u32(&x855, &x856, 0x0, x783, x830);
+ fiat_secp384r1_addcarryx_u32(&x857, &x858, x856, x785, x832);
+ fiat_secp384r1_addcarryx_u32(&x859, &x860, x858, x787, x834);
+ fiat_secp384r1_addcarryx_u32(&x861, &x862, x860, x789, x836);
+ fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x791, x838);
+ fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x793, x840);
+ fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x795, x842);
+ fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x797, x844);
+ fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x799, x846);
+ fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x801, x848);
+ fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x803, x850);
+ fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x805, x852);
+ fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x807, x854);
+ fiat_secp384r1_mulx_u32(&x881, &x882, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x883, &x884, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x885, &x886, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x887, &x888, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x889, &x890, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x891, &x892, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x893, &x894, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x895, &x896, x855, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x897, &x898, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x899, &x900, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x901, &x902, 0x0, x898, x895);
+ fiat_secp384r1_addcarryx_u32(&x903, &x904, x902, x896, x893);
+ fiat_secp384r1_addcarryx_u32(&x905, &x906, x904, x894, x891);
+ fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x892, x889);
+ fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x890, x887);
+ fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x888, x885);
+ fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x886, x883);
+ fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x884, x881);
+ x917 = (x916 + x882);
+ fiat_secp384r1_addcarryx_u32(&x918, &x919, 0x0, x855, x899);
+ fiat_secp384r1_addcarryx_u32(&x920, &x921, x919, x857, x900);
+ fiat_secp384r1_addcarryx_u32(&x922, &x923, x921, x859, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x861, x897);
+ fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x863, x901);
+ fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x865, x903);
+ fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x867, x905);
+ fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x869, x907);
+ fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x871, x909);
+ fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x873, x911);
+ fiat_secp384r1_addcarryx_u32(&x938, &x939, x937, x875, x913);
+ fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x877, x915);
+ fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x879, x917);
+ x944 = ((uint32_t)x943 + x880);
+ fiat_secp384r1_mulx_u32(&x945, &x946, x7, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x947, &x948, x7, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x949, &x950, x7, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x951, &x952, x7, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x953, &x954, x7, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x955, &x956, x7, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x957, &x958, x7, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x959, &x960, x7, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x961, &x962, x7, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x963, &x964, x7, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x965, &x966, x7, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x967, &x968, x7, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x969, &x970, 0x0, x968, x965);
+ fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x966, x963);
+ fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x964, x961);
+ fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x962, x959);
+ fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x960, x957);
+ fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x958, x955);
+ fiat_secp384r1_addcarryx_u32(&x981, &x982, x980, x956, x953);
+ fiat_secp384r1_addcarryx_u32(&x983, &x984, x982, x954, x951);
+ fiat_secp384r1_addcarryx_u32(&x985, &x986, x984, x952, x949);
+ fiat_secp384r1_addcarryx_u32(&x987, &x988, x986, x950, x947);
+ fiat_secp384r1_addcarryx_u32(&x989, &x990, x988, x948, x945);
+ x991 = (x990 + x946);
+ fiat_secp384r1_addcarryx_u32(&x992, &x993, 0x0, x920, x967);
+ fiat_secp384r1_addcarryx_u32(&x994, &x995, x993, x922, x969);
+ fiat_secp384r1_addcarryx_u32(&x996, &x997, x995, x924, x971);
+ fiat_secp384r1_addcarryx_u32(&x998, &x999, x997, x926, x973);
+ fiat_secp384r1_addcarryx_u32(&x1000, &x1001, x999, x928, x975);
+ fiat_secp384r1_addcarryx_u32(&x1002, &x1003, x1001, x930, x977);
+ fiat_secp384r1_addcarryx_u32(&x1004, &x1005, x1003, x932, x979);
+ fiat_secp384r1_addcarryx_u32(&x1006, &x1007, x1005, x934, x981);
+ fiat_secp384r1_addcarryx_u32(&x1008, &x1009, x1007, x936, x983);
+ fiat_secp384r1_addcarryx_u32(&x1010, &x1011, x1009, x938, x985);
+ fiat_secp384r1_addcarryx_u32(&x1012, &x1013, x1011, x940, x987);
+ fiat_secp384r1_addcarryx_u32(&x1014, &x1015, x1013, x942, x989);
+ fiat_secp384r1_addcarryx_u32(&x1016, &x1017, x1015, x944, x991);
+ fiat_secp384r1_mulx_u32(&x1018, &x1019, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1020, &x1021, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1022, &x1023, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1024, &x1025, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1026, &x1027, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1028, &x1029, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1030, &x1031, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1032, &x1033, x992, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1034, &x1035, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1036, &x1037, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1038, &x1039, 0x0, x1035, x1032);
+ fiat_secp384r1_addcarryx_u32(&x1040, &x1041, x1039, x1033, x1030);
+ fiat_secp384r1_addcarryx_u32(&x1042, &x1043, x1041, x1031, x1028);
+ fiat_secp384r1_addcarryx_u32(&x1044, &x1045, x1043, x1029, x1026);
+ fiat_secp384r1_addcarryx_u32(&x1046, &x1047, x1045, x1027, x1024);
+ fiat_secp384r1_addcarryx_u32(&x1048, &x1049, x1047, x1025, x1022);
+ fiat_secp384r1_addcarryx_u32(&x1050, &x1051, x1049, x1023, x1020);
+ fiat_secp384r1_addcarryx_u32(&x1052, &x1053, x1051, x1021, x1018);
+ x1054 = (x1053 + x1019);
+ fiat_secp384r1_addcarryx_u32(&x1055, &x1056, 0x0, x992, x1036);
+ fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x994, x1037);
+ fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x996, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x998, x1034);
+ fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1000, x1038);
+ fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1002, x1040);
+ fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1004, x1042);
+ fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1006, x1044);
+ fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1008, x1046);
+ fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1010, x1048);
+ fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1012, x1050);
+ fiat_secp384r1_addcarryx_u32(&x1077, &x1078, x1076, x1014, x1052);
+ fiat_secp384r1_addcarryx_u32(&x1079, &x1080, x1078, x1016, x1054);
+ x1081 = ((uint32_t)x1080 + x1017);
+ fiat_secp384r1_mulx_u32(&x1082, &x1083, x8, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x1084, &x1085, x8, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x1086, &x1087, x8, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x1088, &x1089, x8, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x1090, &x1091, x8, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x1092, &x1093, x8, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x1094, &x1095, x8, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x1096, &x1097, x8, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x1098, &x1099, x8, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x1100, &x1101, x8, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x1102, &x1103, x8, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x1104, &x1105, x8, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x1106, &x1107, 0x0, x1105, x1102);
+ fiat_secp384r1_addcarryx_u32(&x1108, &x1109, x1107, x1103, x1100);
+ fiat_secp384r1_addcarryx_u32(&x1110, &x1111, x1109, x1101, x1098);
+ fiat_secp384r1_addcarryx_u32(&x1112, &x1113, x1111, x1099, x1096);
+ fiat_secp384r1_addcarryx_u32(&x1114, &x1115, x1113, x1097, x1094);
+ fiat_secp384r1_addcarryx_u32(&x1116, &x1117, x1115, x1095, x1092);
+ fiat_secp384r1_addcarryx_u32(&x1118, &x1119, x1117, x1093, x1090);
+ fiat_secp384r1_addcarryx_u32(&x1120, &x1121, x1119, x1091, x1088);
+ fiat_secp384r1_addcarryx_u32(&x1122, &x1123, x1121, x1089, x1086);
+ fiat_secp384r1_addcarryx_u32(&x1124, &x1125, x1123, x1087, x1084);
+ fiat_secp384r1_addcarryx_u32(&x1126, &x1127, x1125, x1085, x1082);
+ x1128 = (x1127 + x1083);
+ fiat_secp384r1_addcarryx_u32(&x1129, &x1130, 0x0, x1057, x1104);
+ fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1059, x1106);
+ fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1061, x1108);
+ fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1063, x1110);
+ fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136, x1065, x1112);
+ fiat_secp384r1_addcarryx_u32(&x1139, &x1140, x1138, x1067, x1114);
+ fiat_secp384r1_addcarryx_u32(&x1141, &x1142, x1140, x1069, x1116);
+ fiat_secp384r1_addcarryx_u32(&x1143, &x1144, x1142, x1071, x1118);
+ fiat_secp384r1_addcarryx_u32(&x1145, &x1146, x1144, x1073, x1120);
+ fiat_secp384r1_addcarryx_u32(&x1147, &x1148, x1146, x1075, x1122);
+ fiat_secp384r1_addcarryx_u32(&x1149, &x1150, x1148, x1077, x1124);
+ fiat_secp384r1_addcarryx_u32(&x1151, &x1152, x1150, x1079, x1126);
+ fiat_secp384r1_addcarryx_u32(&x1153, &x1154, x1152, x1081, x1128);
+ fiat_secp384r1_mulx_u32(&x1155, &x1156, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1157, &x1158, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1159, &x1160, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1161, &x1162, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1163, &x1164, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1165, &x1166, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1167, &x1168, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1169, &x1170, x1129, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1171, &x1172, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1173, &x1174, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1175, &x1176, 0x0, x1172, x1169);
+ fiat_secp384r1_addcarryx_u32(&x1177, &x1178, x1176, x1170, x1167);
+ fiat_secp384r1_addcarryx_u32(&x1179, &x1180, x1178, x1168, x1165);
+ fiat_secp384r1_addcarryx_u32(&x1181, &x1182, x1180, x1166, x1163);
+ fiat_secp384r1_addcarryx_u32(&x1183, &x1184, x1182, x1164, x1161);
+ fiat_secp384r1_addcarryx_u32(&x1185, &x1186, x1184, x1162, x1159);
+ fiat_secp384r1_addcarryx_u32(&x1187, &x1188, x1186, x1160, x1157);
+ fiat_secp384r1_addcarryx_u32(&x1189, &x1190, x1188, x1158, x1155);
+ x1191 = (x1190 + x1156);
+ fiat_secp384r1_addcarryx_u32(&x1192, &x1193, 0x0, x1129, x1173);
+ fiat_secp384r1_addcarryx_u32(&x1194, &x1195, x1193, x1131, x1174);
+ fiat_secp384r1_addcarryx_u32(&x1196, &x1197, x1195, x1133, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1198, &x1199, x1197, x1135, x1171);
+ fiat_secp384r1_addcarryx_u32(&x1200, &x1201, x1199, x1137, x1175);
+ fiat_secp384r1_addcarryx_u32(&x1202, &x1203, x1201, x1139, x1177);
+ fiat_secp384r1_addcarryx_u32(&x1204, &x1205, x1203, x1141, x1179);
+ fiat_secp384r1_addcarryx_u32(&x1206, &x1207, x1205, x1143, x1181);
+ fiat_secp384r1_addcarryx_u32(&x1208, &x1209, x1207, x1145, x1183);
+ fiat_secp384r1_addcarryx_u32(&x1210, &x1211, x1209, x1147, x1185);
+ fiat_secp384r1_addcarryx_u32(&x1212, &x1213, x1211, x1149, x1187);
+ fiat_secp384r1_addcarryx_u32(&x1214, &x1215, x1213, x1151, x1189);
+ fiat_secp384r1_addcarryx_u32(&x1216, &x1217, x1215, x1153, x1191);
+ x1218 = ((uint32_t)x1217 + x1154);
+ fiat_secp384r1_mulx_u32(&x1219, &x1220, x9, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x1221, &x1222, x9, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x1223, &x1224, x9, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x1225, &x1226, x9, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x1227, &x1228, x9, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x1229, &x1230, x9, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x1231, &x1232, x9, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x1233, &x1234, x9, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x1235, &x1236, x9, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x1237, &x1238, x9, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x1239, &x1240, x9, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x1241, &x1242, x9, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x1243, &x1244, 0x0, x1242, x1239);
+ fiat_secp384r1_addcarryx_u32(&x1245, &x1246, x1244, x1240, x1237);
+ fiat_secp384r1_addcarryx_u32(&x1247, &x1248, x1246, x1238, x1235);
+ fiat_secp384r1_addcarryx_u32(&x1249, &x1250, x1248, x1236, x1233);
+ fiat_secp384r1_addcarryx_u32(&x1251, &x1252, x1250, x1234, x1231);
+ fiat_secp384r1_addcarryx_u32(&x1253, &x1254, x1252, x1232, x1229);
+ fiat_secp384r1_addcarryx_u32(&x1255, &x1256, x1254, x1230, x1227);
+ fiat_secp384r1_addcarryx_u32(&x1257, &x1258, x1256, x1228, x1225);
+ fiat_secp384r1_addcarryx_u32(&x1259, &x1260, x1258, x1226, x1223);
+ fiat_secp384r1_addcarryx_u32(&x1261, &x1262, x1260, x1224, x1221);
+ fiat_secp384r1_addcarryx_u32(&x1263, &x1264, x1262, x1222, x1219);
+ x1265 = (x1264 + x1220);
+ fiat_secp384r1_addcarryx_u32(&x1266, &x1267, 0x0, x1194, x1241);
+ fiat_secp384r1_addcarryx_u32(&x1268, &x1269, x1267, x1196, x1243);
+ fiat_secp384r1_addcarryx_u32(&x1270, &x1271, x1269, x1198, x1245);
+ fiat_secp384r1_addcarryx_u32(&x1272, &x1273, x1271, x1200, x1247);
+ fiat_secp384r1_addcarryx_u32(&x1274, &x1275, x1273, x1202, x1249);
+ fiat_secp384r1_addcarryx_u32(&x1276, &x1277, x1275, x1204, x1251);
+ fiat_secp384r1_addcarryx_u32(&x1278, &x1279, x1277, x1206, x1253);
+ fiat_secp384r1_addcarryx_u32(&x1280, &x1281, x1279, x1208, x1255);
+ fiat_secp384r1_addcarryx_u32(&x1282, &x1283, x1281, x1210, x1257);
+ fiat_secp384r1_addcarryx_u32(&x1284, &x1285, x1283, x1212, x1259);
+ fiat_secp384r1_addcarryx_u32(&x1286, &x1287, x1285, x1214, x1261);
+ fiat_secp384r1_addcarryx_u32(&x1288, &x1289, x1287, x1216, x1263);
+ fiat_secp384r1_addcarryx_u32(&x1290, &x1291, x1289, x1218, x1265);
+ fiat_secp384r1_mulx_u32(&x1292, &x1293, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1294, &x1295, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1296, &x1297, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1298, &x1299, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1300, &x1301, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1302, &x1303, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1304, &x1305, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1306, &x1307, x1266, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1308, &x1309, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1310, &x1311, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1312, &x1313, 0x0, x1309, x1306);
+ fiat_secp384r1_addcarryx_u32(&x1314, &x1315, x1313, x1307, x1304);
+ fiat_secp384r1_addcarryx_u32(&x1316, &x1317, x1315, x1305, x1302);
+ fiat_secp384r1_addcarryx_u32(&x1318, &x1319, x1317, x1303, x1300);
+ fiat_secp384r1_addcarryx_u32(&x1320, &x1321, x1319, x1301, x1298);
+ fiat_secp384r1_addcarryx_u32(&x1322, &x1323, x1321, x1299, x1296);
+ fiat_secp384r1_addcarryx_u32(&x1324, &x1325, x1323, x1297, x1294);
+ fiat_secp384r1_addcarryx_u32(&x1326, &x1327, x1325, x1295, x1292);
+ x1328 = (x1327 + x1293);
+ fiat_secp384r1_addcarryx_u32(&x1329, &x1330, 0x0, x1266, x1310);
+ fiat_secp384r1_addcarryx_u32(&x1331, &x1332, x1330, x1268, x1311);
+ fiat_secp384r1_addcarryx_u32(&x1333, &x1334, x1332, x1270, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1335, &x1336, x1334, x1272, x1308);
+ fiat_secp384r1_addcarryx_u32(&x1337, &x1338, x1336, x1274, x1312);
+ fiat_secp384r1_addcarryx_u32(&x1339, &x1340, x1338, x1276, x1314);
+ fiat_secp384r1_addcarryx_u32(&x1341, &x1342, x1340, x1278, x1316);
+ fiat_secp384r1_addcarryx_u32(&x1343, &x1344, x1342, x1280, x1318);
+ fiat_secp384r1_addcarryx_u32(&x1345, &x1346, x1344, x1282, x1320);
+ fiat_secp384r1_addcarryx_u32(&x1347, &x1348, x1346, x1284, x1322);
+ fiat_secp384r1_addcarryx_u32(&x1349, &x1350, x1348, x1286, x1324);
+ fiat_secp384r1_addcarryx_u32(&x1351, &x1352, x1350, x1288, x1326);
+ fiat_secp384r1_addcarryx_u32(&x1353, &x1354, x1352, x1290, x1328);
+ x1355 = ((uint32_t)x1354 + x1291);
+ fiat_secp384r1_mulx_u32(&x1356, &x1357, x10, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x1358, &x1359, x10, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x1360, &x1361, x10, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x1362, &x1363, x10, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x1364, &x1365, x10, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x1366, &x1367, x10, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x1368, &x1369, x10, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x1370, &x1371, x10, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x1372, &x1373, x10, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x1374, &x1375, x10, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x1376, &x1377, x10, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x1378, &x1379, x10, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x1380, &x1381, 0x0, x1379, x1376);
+ fiat_secp384r1_addcarryx_u32(&x1382, &x1383, x1381, x1377, x1374);
+ fiat_secp384r1_addcarryx_u32(&x1384, &x1385, x1383, x1375, x1372);
+ fiat_secp384r1_addcarryx_u32(&x1386, &x1387, x1385, x1373, x1370);
+ fiat_secp384r1_addcarryx_u32(&x1388, &x1389, x1387, x1371, x1368);
+ fiat_secp384r1_addcarryx_u32(&x1390, &x1391, x1389, x1369, x1366);
+ fiat_secp384r1_addcarryx_u32(&x1392, &x1393, x1391, x1367, x1364);
+ fiat_secp384r1_addcarryx_u32(&x1394, &x1395, x1393, x1365, x1362);
+ fiat_secp384r1_addcarryx_u32(&x1396, &x1397, x1395, x1363, x1360);
+ fiat_secp384r1_addcarryx_u32(&x1398, &x1399, x1397, x1361, x1358);
+ fiat_secp384r1_addcarryx_u32(&x1400, &x1401, x1399, x1359, x1356);
+ x1402 = (x1401 + x1357);
+ fiat_secp384r1_addcarryx_u32(&x1403, &x1404, 0x0, x1331, x1378);
+ fiat_secp384r1_addcarryx_u32(&x1405, &x1406, x1404, x1333, x1380);
+ fiat_secp384r1_addcarryx_u32(&x1407, &x1408, x1406, x1335, x1382);
+ fiat_secp384r1_addcarryx_u32(&x1409, &x1410, x1408, x1337, x1384);
+ fiat_secp384r1_addcarryx_u32(&x1411, &x1412, x1410, x1339, x1386);
+ fiat_secp384r1_addcarryx_u32(&x1413, &x1414, x1412, x1341, x1388);
+ fiat_secp384r1_addcarryx_u32(&x1415, &x1416, x1414, x1343, x1390);
+ fiat_secp384r1_addcarryx_u32(&x1417, &x1418, x1416, x1345, x1392);
+ fiat_secp384r1_addcarryx_u32(&x1419, &x1420, x1418, x1347, x1394);
+ fiat_secp384r1_addcarryx_u32(&x1421, &x1422, x1420, x1349, x1396);
+ fiat_secp384r1_addcarryx_u32(&x1423, &x1424, x1422, x1351, x1398);
+ fiat_secp384r1_addcarryx_u32(&x1425, &x1426, x1424, x1353, x1400);
+ fiat_secp384r1_addcarryx_u32(&x1427, &x1428, x1426, x1355, x1402);
+ fiat_secp384r1_mulx_u32(&x1429, &x1430, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1431, &x1432, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1433, &x1434, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1435, &x1436, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1437, &x1438, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1439, &x1440, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1441, &x1442, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1443, &x1444, x1403, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1445, &x1446, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1447, &x1448, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1449, &x1450, 0x0, x1446, x1443);
+ fiat_secp384r1_addcarryx_u32(&x1451, &x1452, x1450, x1444, x1441);
+ fiat_secp384r1_addcarryx_u32(&x1453, &x1454, x1452, x1442, x1439);
+ fiat_secp384r1_addcarryx_u32(&x1455, &x1456, x1454, x1440, x1437);
+ fiat_secp384r1_addcarryx_u32(&x1457, &x1458, x1456, x1438, x1435);
+ fiat_secp384r1_addcarryx_u32(&x1459, &x1460, x1458, x1436, x1433);
+ fiat_secp384r1_addcarryx_u32(&x1461, &x1462, x1460, x1434, x1431);
+ fiat_secp384r1_addcarryx_u32(&x1463, &x1464, x1462, x1432, x1429);
+ x1465 = (x1464 + x1430);
+ fiat_secp384r1_addcarryx_u32(&x1466, &x1467, 0x0, x1403, x1447);
+ fiat_secp384r1_addcarryx_u32(&x1468, &x1469, x1467, x1405, x1448);
+ fiat_secp384r1_addcarryx_u32(&x1470, &x1471, x1469, x1407, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1472, &x1473, x1471, x1409, x1445);
+ fiat_secp384r1_addcarryx_u32(&x1474, &x1475, x1473, x1411, x1449);
+ fiat_secp384r1_addcarryx_u32(&x1476, &x1477, x1475, x1413, x1451);
+ fiat_secp384r1_addcarryx_u32(&x1478, &x1479, x1477, x1415, x1453);
+ fiat_secp384r1_addcarryx_u32(&x1480, &x1481, x1479, x1417, x1455);
+ fiat_secp384r1_addcarryx_u32(&x1482, &x1483, x1481, x1419, x1457);
+ fiat_secp384r1_addcarryx_u32(&x1484, &x1485, x1483, x1421, x1459);
+ fiat_secp384r1_addcarryx_u32(&x1486, &x1487, x1485, x1423, x1461);
+ fiat_secp384r1_addcarryx_u32(&x1488, &x1489, x1487, x1425, x1463);
+ fiat_secp384r1_addcarryx_u32(&x1490, &x1491, x1489, x1427, x1465);
+ x1492 = ((uint32_t)x1491 + x1428);
+ fiat_secp384r1_mulx_u32(&x1493, &x1494, x11, (arg2[11]));
+ fiat_secp384r1_mulx_u32(&x1495, &x1496, x11, (arg2[10]));
+ fiat_secp384r1_mulx_u32(&x1497, &x1498, x11, (arg2[9]));
+ fiat_secp384r1_mulx_u32(&x1499, &x1500, x11, (arg2[8]));
+ fiat_secp384r1_mulx_u32(&x1501, &x1502, x11, (arg2[7]));
+ fiat_secp384r1_mulx_u32(&x1503, &x1504, x11, (arg2[6]));
+ fiat_secp384r1_mulx_u32(&x1505, &x1506, x11, (arg2[5]));
+ fiat_secp384r1_mulx_u32(&x1507, &x1508, x11, (arg2[4]));
+ fiat_secp384r1_mulx_u32(&x1509, &x1510, x11, (arg2[3]));
+ fiat_secp384r1_mulx_u32(&x1511, &x1512, x11, (arg2[2]));
+ fiat_secp384r1_mulx_u32(&x1513, &x1514, x11, (arg2[1]));
+ fiat_secp384r1_mulx_u32(&x1515, &x1516, x11, (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x1517, &x1518, 0x0, x1516, x1513);
+ fiat_secp384r1_addcarryx_u32(&x1519, &x1520, x1518, x1514, x1511);
+ fiat_secp384r1_addcarryx_u32(&x1521, &x1522, x1520, x1512, x1509);
+ fiat_secp384r1_addcarryx_u32(&x1523, &x1524, x1522, x1510, x1507);
+ fiat_secp384r1_addcarryx_u32(&x1525, &x1526, x1524, x1508, x1505);
+ fiat_secp384r1_addcarryx_u32(&x1527, &x1528, x1526, x1506, x1503);
+ fiat_secp384r1_addcarryx_u32(&x1529, &x1530, x1528, x1504, x1501);
+ fiat_secp384r1_addcarryx_u32(&x1531, &x1532, x1530, x1502, x1499);
+ fiat_secp384r1_addcarryx_u32(&x1533, &x1534, x1532, x1500, x1497);
+ fiat_secp384r1_addcarryx_u32(&x1535, &x1536, x1534, x1498, x1495);
+ fiat_secp384r1_addcarryx_u32(&x1537, &x1538, x1536, x1496, x1493);
+ x1539 = (x1538 + x1494);
+ fiat_secp384r1_addcarryx_u32(&x1540, &x1541, 0x0, x1468, x1515);
+ fiat_secp384r1_addcarryx_u32(&x1542, &x1543, x1541, x1470, x1517);
+ fiat_secp384r1_addcarryx_u32(&x1544, &x1545, x1543, x1472, x1519);
+ fiat_secp384r1_addcarryx_u32(&x1546, &x1547, x1545, x1474, x1521);
+ fiat_secp384r1_addcarryx_u32(&x1548, &x1549, x1547, x1476, x1523);
+ fiat_secp384r1_addcarryx_u32(&x1550, &x1551, x1549, x1478, x1525);
+ fiat_secp384r1_addcarryx_u32(&x1552, &x1553, x1551, x1480, x1527);
+ fiat_secp384r1_addcarryx_u32(&x1554, &x1555, x1553, x1482, x1529);
+ fiat_secp384r1_addcarryx_u32(&x1556, &x1557, x1555, x1484, x1531);
+ fiat_secp384r1_addcarryx_u32(&x1558, &x1559, x1557, x1486, x1533);
+ fiat_secp384r1_addcarryx_u32(&x1560, &x1561, x1559, x1488, x1535);
+ fiat_secp384r1_addcarryx_u32(&x1562, &x1563, x1561, x1490, x1537);
+ fiat_secp384r1_addcarryx_u32(&x1564, &x1565, x1563, x1492, x1539);
+ fiat_secp384r1_mulx_u32(&x1566, &x1567, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1568, &x1569, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1570, &x1571, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1572, &x1573, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1574, &x1575, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1576, &x1577, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1578, &x1579, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1580, &x1581, x1540, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1582, &x1583, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1584, &x1585, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1586, &x1587, 0x0, x1583, x1580);
+ fiat_secp384r1_addcarryx_u32(&x1588, &x1589, x1587, x1581, x1578);
+ fiat_secp384r1_addcarryx_u32(&x1590, &x1591, x1589, x1579, x1576);
+ fiat_secp384r1_addcarryx_u32(&x1592, &x1593, x1591, x1577, x1574);
+ fiat_secp384r1_addcarryx_u32(&x1594, &x1595, x1593, x1575, x1572);
+ fiat_secp384r1_addcarryx_u32(&x1596, &x1597, x1595, x1573, x1570);
+ fiat_secp384r1_addcarryx_u32(&x1598, &x1599, x1597, x1571, x1568);
+ fiat_secp384r1_addcarryx_u32(&x1600, &x1601, x1599, x1569, x1566);
+ x1602 = (x1601 + x1567);
+ fiat_secp384r1_addcarryx_u32(&x1603, &x1604, 0x0, x1540, x1584);
+ fiat_secp384r1_addcarryx_u32(&x1605, &x1606, x1604, x1542, x1585);
+ fiat_secp384r1_addcarryx_u32(&x1607, &x1608, x1606, x1544, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1609, &x1610, x1608, x1546, x1582);
+ fiat_secp384r1_addcarryx_u32(&x1611, &x1612, x1610, x1548, x1586);
+ fiat_secp384r1_addcarryx_u32(&x1613, &x1614, x1612, x1550, x1588);
+ fiat_secp384r1_addcarryx_u32(&x1615, &x1616, x1614, x1552, x1590);
+ fiat_secp384r1_addcarryx_u32(&x1617, &x1618, x1616, x1554, x1592);
+ fiat_secp384r1_addcarryx_u32(&x1619, &x1620, x1618, x1556, x1594);
+ fiat_secp384r1_addcarryx_u32(&x1621, &x1622, x1620, x1558, x1596);
+ fiat_secp384r1_addcarryx_u32(&x1623, &x1624, x1622, x1560, x1598);
+ fiat_secp384r1_addcarryx_u32(&x1625, &x1626, x1624, x1562, x1600);
+ fiat_secp384r1_addcarryx_u32(&x1627, &x1628, x1626, x1564, x1602);
+ x1629 = ((uint32_t)x1628 + x1565);
+ fiat_secp384r1_subborrowx_u32(&x1630, &x1631, 0x0, x1605,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1632, &x1633, x1631, x1607, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1634, &x1635, x1633, x1609, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1636, &x1637, x1635, x1611,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1638, &x1639, x1637, x1613,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x1640, &x1641, x1639, x1615,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1642, &x1643, x1641, x1617,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1644, &x1645, x1643, x1619,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1646, &x1647, x1645, x1621,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1648, &x1649, x1647, x1623,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1650, &x1651, x1649, x1625,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1652, &x1653, x1651, x1627,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1654, &x1655, x1653, x1629, 0x0);
+ fiat_secp384r1_cmovznz_u32(&x1656, x1655, x1630, x1605);
+ fiat_secp384r1_cmovznz_u32(&x1657, x1655, x1632, x1607);
+ fiat_secp384r1_cmovznz_u32(&x1658, x1655, x1634, x1609);
+ fiat_secp384r1_cmovznz_u32(&x1659, x1655, x1636, x1611);
+ fiat_secp384r1_cmovznz_u32(&x1660, x1655, x1638, x1613);
+ fiat_secp384r1_cmovznz_u32(&x1661, x1655, x1640, x1615);
+ fiat_secp384r1_cmovznz_u32(&x1662, x1655, x1642, x1617);
+ fiat_secp384r1_cmovznz_u32(&x1663, x1655, x1644, x1619);
+ fiat_secp384r1_cmovznz_u32(&x1664, x1655, x1646, x1621);
+ fiat_secp384r1_cmovznz_u32(&x1665, x1655, x1648, x1623);
+ fiat_secp384r1_cmovznz_u32(&x1666, x1655, x1650, x1625);
+ fiat_secp384r1_cmovznz_u32(&x1667, x1655, x1652, x1627);
+ out1[0] = x1656;
+ out1[1] = x1657;
+ out1[2] = x1658;
+ out1[3] = x1659;
+ out1[4] = x1660;
+ out1[5] = x1661;
+ out1[6] = x1662;
+ out1[7] = x1663;
+ out1[8] = x1664;
+ out1[9] = x1665;
+ out1[10] = x1666;
+ out1[11] = x1667;
+}
+
+/*
+ * The function fiat_secp384r1_square squares a field element in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_square(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ fiat_secp384r1_uint1 x38;
+ uint32_t x39;
+ fiat_secp384r1_uint1 x40;
+ uint32_t x41;
+ fiat_secp384r1_uint1 x42;
+ uint32_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint32_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint32_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint32_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint32_t x51;
+ fiat_secp384r1_uint1 x52;
+ uint32_t x53;
+ fiat_secp384r1_uint1 x54;
+ uint32_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint32_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ uint32_t x63;
+ uint32_t x64;
+ uint32_t x65;
+ uint32_t x66;
+ uint32_t x67;
+ uint32_t x68;
+ uint32_t x69;
+ uint32_t x70;
+ uint32_t x71;
+ uint32_t x72;
+ uint32_t x73;
+ uint32_t x74;
+ uint32_t x75;
+ uint32_t x76;
+ uint32_t x77;
+ uint32_t x78;
+ uint32_t x79;
+ uint32_t x80;
+ fiat_secp384r1_uint1 x81;
+ uint32_t x82;
+ fiat_secp384r1_uint1 x83;
+ uint32_t x84;
+ fiat_secp384r1_uint1 x85;
+ uint32_t x86;
+ fiat_secp384r1_uint1 x87;
+ uint32_t x88;
+ fiat_secp384r1_uint1 x89;
+ uint32_t x90;
+ fiat_secp384r1_uint1 x91;
+ uint32_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint32_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint32_t x96;
+ uint32_t x97;
+ fiat_secp384r1_uint1 x98;
+ uint32_t x99;
+ fiat_secp384r1_uint1 x100;
+ uint32_t x101;
+ fiat_secp384r1_uint1 x102;
+ uint32_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint32_t x105;
+ fiat_secp384r1_uint1 x106;
+ uint32_t x107;
+ fiat_secp384r1_uint1 x108;
+ uint32_t x109;
+ fiat_secp384r1_uint1 x110;
+ uint32_t x111;
+ fiat_secp384r1_uint1 x112;
+ uint32_t x113;
+ fiat_secp384r1_uint1 x114;
+ uint32_t x115;
+ fiat_secp384r1_uint1 x116;
+ uint32_t x117;
+ fiat_secp384r1_uint1 x118;
+ uint32_t x119;
+ fiat_secp384r1_uint1 x120;
+ uint32_t x121;
+ fiat_secp384r1_uint1 x122;
+ uint32_t x123;
+ uint32_t x124;
+ uint32_t x125;
+ uint32_t x126;
+ uint32_t x127;
+ uint32_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint32_t x133;
+ uint32_t x134;
+ uint32_t x135;
+ uint32_t x136;
+ uint32_t x137;
+ uint32_t x138;
+ uint32_t x139;
+ uint32_t x140;
+ uint32_t x141;
+ uint32_t x142;
+ uint32_t x143;
+ uint32_t x144;
+ uint32_t x145;
+ uint32_t x146;
+ uint32_t x147;
+ fiat_secp384r1_uint1 x148;
+ uint32_t x149;
+ fiat_secp384r1_uint1 x150;
+ uint32_t x151;
+ fiat_secp384r1_uint1 x152;
+ uint32_t x153;
+ fiat_secp384r1_uint1 x154;
+ uint32_t x155;
+ fiat_secp384r1_uint1 x156;
+ uint32_t x157;
+ fiat_secp384r1_uint1 x158;
+ uint32_t x159;
+ fiat_secp384r1_uint1 x160;
+ uint32_t x161;
+ fiat_secp384r1_uint1 x162;
+ uint32_t x163;
+ fiat_secp384r1_uint1 x164;
+ uint32_t x165;
+ fiat_secp384r1_uint1 x166;
+ uint32_t x167;
+ fiat_secp384r1_uint1 x168;
+ uint32_t x169;
+ uint32_t x170;
+ fiat_secp384r1_uint1 x171;
+ uint32_t x172;
+ fiat_secp384r1_uint1 x173;
+ uint32_t x174;
+ fiat_secp384r1_uint1 x175;
+ uint32_t x176;
+ fiat_secp384r1_uint1 x177;
+ uint32_t x178;
+ fiat_secp384r1_uint1 x179;
+ uint32_t x180;
+ fiat_secp384r1_uint1 x181;
+ uint32_t x182;
+ fiat_secp384r1_uint1 x183;
+ uint32_t x184;
+ fiat_secp384r1_uint1 x185;
+ uint32_t x186;
+ fiat_secp384r1_uint1 x187;
+ uint32_t x188;
+ fiat_secp384r1_uint1 x189;
+ uint32_t x190;
+ fiat_secp384r1_uint1 x191;
+ uint32_t x192;
+ fiat_secp384r1_uint1 x193;
+ uint32_t x194;
+ fiat_secp384r1_uint1 x195;
+ uint32_t x196;
+ uint32_t x197;
+ uint32_t x198;
+ uint32_t x199;
+ uint32_t x200;
+ uint32_t x201;
+ uint32_t x202;
+ uint32_t x203;
+ uint32_t x204;
+ uint32_t x205;
+ uint32_t x206;
+ uint32_t x207;
+ uint32_t x208;
+ uint32_t x209;
+ uint32_t x210;
+ uint32_t x211;
+ uint32_t x212;
+ uint32_t x213;
+ uint32_t x214;
+ uint32_t x215;
+ uint32_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint32_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint32_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint32_t x222;
+ fiat_secp384r1_uint1 x223;
+ uint32_t x224;
+ fiat_secp384r1_uint1 x225;
+ uint32_t x226;
+ fiat_secp384r1_uint1 x227;
+ uint32_t x228;
+ fiat_secp384r1_uint1 x229;
+ uint32_t x230;
+ fiat_secp384r1_uint1 x231;
+ uint32_t x232;
+ uint32_t x233;
+ fiat_secp384r1_uint1 x234;
+ uint32_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint32_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint32_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint32_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint32_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint32_t x245;
+ fiat_secp384r1_uint1 x246;
+ uint32_t x247;
+ fiat_secp384r1_uint1 x248;
+ uint32_t x249;
+ fiat_secp384r1_uint1 x250;
+ uint32_t x251;
+ fiat_secp384r1_uint1 x252;
+ uint32_t x253;
+ fiat_secp384r1_uint1 x254;
+ uint32_t x255;
+ fiat_secp384r1_uint1 x256;
+ uint32_t x257;
+ fiat_secp384r1_uint1 x258;
+ uint32_t x259;
+ uint32_t x260;
+ uint32_t x261;
+ uint32_t x262;
+ uint32_t x263;
+ uint32_t x264;
+ uint32_t x265;
+ uint32_t x266;
+ uint32_t x267;
+ uint32_t x268;
+ uint32_t x269;
+ uint32_t x270;
+ uint32_t x271;
+ uint32_t x272;
+ uint32_t x273;
+ uint32_t x274;
+ uint32_t x275;
+ uint32_t x276;
+ uint32_t x277;
+ uint32_t x278;
+ uint32_t x279;
+ uint32_t x280;
+ uint32_t x281;
+ uint32_t x282;
+ uint32_t x283;
+ uint32_t x284;
+ fiat_secp384r1_uint1 x285;
+ uint32_t x286;
+ fiat_secp384r1_uint1 x287;
+ uint32_t x288;
+ fiat_secp384r1_uint1 x289;
+ uint32_t x290;
+ fiat_secp384r1_uint1 x291;
+ uint32_t x292;
+ fiat_secp384r1_uint1 x293;
+ uint32_t x294;
+ fiat_secp384r1_uint1 x295;
+ uint32_t x296;
+ fiat_secp384r1_uint1 x297;
+ uint32_t x298;
+ fiat_secp384r1_uint1 x299;
+ uint32_t x300;
+ fiat_secp384r1_uint1 x301;
+ uint32_t x302;
+ fiat_secp384r1_uint1 x303;
+ uint32_t x304;
+ fiat_secp384r1_uint1 x305;
+ uint32_t x306;
+ uint32_t x307;
+ fiat_secp384r1_uint1 x308;
+ uint32_t x309;
+ fiat_secp384r1_uint1 x310;
+ uint32_t x311;
+ fiat_secp384r1_uint1 x312;
+ uint32_t x313;
+ fiat_secp384r1_uint1 x314;
+ uint32_t x315;
+ fiat_secp384r1_uint1 x316;
+ uint32_t x317;
+ fiat_secp384r1_uint1 x318;
+ uint32_t x319;
+ fiat_secp384r1_uint1 x320;
+ uint32_t x321;
+ fiat_secp384r1_uint1 x322;
+ uint32_t x323;
+ fiat_secp384r1_uint1 x324;
+ uint32_t x325;
+ fiat_secp384r1_uint1 x326;
+ uint32_t x327;
+ fiat_secp384r1_uint1 x328;
+ uint32_t x329;
+ fiat_secp384r1_uint1 x330;
+ uint32_t x331;
+ fiat_secp384r1_uint1 x332;
+ uint32_t x333;
+ uint32_t x334;
+ uint32_t x335;
+ uint32_t x336;
+ uint32_t x337;
+ uint32_t x338;
+ uint32_t x339;
+ uint32_t x340;
+ uint32_t x341;
+ uint32_t x342;
+ uint32_t x343;
+ uint32_t x344;
+ uint32_t x345;
+ uint32_t x346;
+ uint32_t x347;
+ uint32_t x348;
+ uint32_t x349;
+ uint32_t x350;
+ uint32_t x351;
+ uint32_t x352;
+ uint32_t x353;
+ fiat_secp384r1_uint1 x354;
+ uint32_t x355;
+ fiat_secp384r1_uint1 x356;
+ uint32_t x357;
+ fiat_secp384r1_uint1 x358;
+ uint32_t x359;
+ fiat_secp384r1_uint1 x360;
+ uint32_t x361;
+ fiat_secp384r1_uint1 x362;
+ uint32_t x363;
+ fiat_secp384r1_uint1 x364;
+ uint32_t x365;
+ fiat_secp384r1_uint1 x366;
+ uint32_t x367;
+ fiat_secp384r1_uint1 x368;
+ uint32_t x369;
+ uint32_t x370;
+ fiat_secp384r1_uint1 x371;
+ uint32_t x372;
+ fiat_secp384r1_uint1 x373;
+ uint32_t x374;
+ fiat_secp384r1_uint1 x375;
+ uint32_t x376;
+ fiat_secp384r1_uint1 x377;
+ uint32_t x378;
+ fiat_secp384r1_uint1 x379;
+ uint32_t x380;
+ fiat_secp384r1_uint1 x381;
+ uint32_t x382;
+ fiat_secp384r1_uint1 x383;
+ uint32_t x384;
+ fiat_secp384r1_uint1 x385;
+ uint32_t x386;
+ fiat_secp384r1_uint1 x387;
+ uint32_t x388;
+ fiat_secp384r1_uint1 x389;
+ uint32_t x390;
+ fiat_secp384r1_uint1 x391;
+ uint32_t x392;
+ fiat_secp384r1_uint1 x393;
+ uint32_t x394;
+ fiat_secp384r1_uint1 x395;
+ uint32_t x396;
+ uint32_t x397;
+ uint32_t x398;
+ uint32_t x399;
+ uint32_t x400;
+ uint32_t x401;
+ uint32_t x402;
+ uint32_t x403;
+ uint32_t x404;
+ uint32_t x405;
+ uint32_t x406;
+ uint32_t x407;
+ uint32_t x408;
+ uint32_t x409;
+ uint32_t x410;
+ uint32_t x411;
+ uint32_t x412;
+ uint32_t x413;
+ uint32_t x414;
+ uint32_t x415;
+ uint32_t x416;
+ uint32_t x417;
+ uint32_t x418;
+ uint32_t x419;
+ uint32_t x420;
+ uint32_t x421;
+ fiat_secp384r1_uint1 x422;
+ uint32_t x423;
+ fiat_secp384r1_uint1 x424;
+ uint32_t x425;
+ fiat_secp384r1_uint1 x426;
+ uint32_t x427;
+ fiat_secp384r1_uint1 x428;
+ uint32_t x429;
+ fiat_secp384r1_uint1 x430;
+ uint32_t x431;
+ fiat_secp384r1_uint1 x432;
+ uint32_t x433;
+ fiat_secp384r1_uint1 x434;
+ uint32_t x435;
+ fiat_secp384r1_uint1 x436;
+ uint32_t x437;
+ fiat_secp384r1_uint1 x438;
+ uint32_t x439;
+ fiat_secp384r1_uint1 x440;
+ uint32_t x441;
+ fiat_secp384r1_uint1 x442;
+ uint32_t x443;
+ uint32_t x444;
+ fiat_secp384r1_uint1 x445;
+ uint32_t x446;
+ fiat_secp384r1_uint1 x447;
+ uint32_t x448;
+ fiat_secp384r1_uint1 x449;
+ uint32_t x450;
+ fiat_secp384r1_uint1 x451;
+ uint32_t x452;
+ fiat_secp384r1_uint1 x453;
+ uint32_t x454;
+ fiat_secp384r1_uint1 x455;
+ uint32_t x456;
+ fiat_secp384r1_uint1 x457;
+ uint32_t x458;
+ fiat_secp384r1_uint1 x459;
+ uint32_t x460;
+ fiat_secp384r1_uint1 x461;
+ uint32_t x462;
+ fiat_secp384r1_uint1 x463;
+ uint32_t x464;
+ fiat_secp384r1_uint1 x465;
+ uint32_t x466;
+ fiat_secp384r1_uint1 x467;
+ uint32_t x468;
+ fiat_secp384r1_uint1 x469;
+ uint32_t x470;
+ uint32_t x471;
+ uint32_t x472;
+ uint32_t x473;
+ uint32_t x474;
+ uint32_t x475;
+ uint32_t x476;
+ uint32_t x477;
+ uint32_t x478;
+ uint32_t x479;
+ uint32_t x480;
+ uint32_t x481;
+ uint32_t x482;
+ uint32_t x483;
+ uint32_t x484;
+ uint32_t x485;
+ uint32_t x486;
+ uint32_t x487;
+ uint32_t x488;
+ uint32_t x489;
+ uint32_t x490;
+ fiat_secp384r1_uint1 x491;
+ uint32_t x492;
+ fiat_secp384r1_uint1 x493;
+ uint32_t x494;
+ fiat_secp384r1_uint1 x495;
+ uint32_t x496;
+ fiat_secp384r1_uint1 x497;
+ uint32_t x498;
+ fiat_secp384r1_uint1 x499;
+ uint32_t x500;
+ fiat_secp384r1_uint1 x501;
+ uint32_t x502;
+ fiat_secp384r1_uint1 x503;
+ uint32_t x504;
+ fiat_secp384r1_uint1 x505;
+ uint32_t x506;
+ uint32_t x507;
+ fiat_secp384r1_uint1 x508;
+ uint32_t x509;
+ fiat_secp384r1_uint1 x510;
+ uint32_t x511;
+ fiat_secp384r1_uint1 x512;
+ uint32_t x513;
+ fiat_secp384r1_uint1 x514;
+ uint32_t x515;
+ fiat_secp384r1_uint1 x516;
+ uint32_t x517;
+ fiat_secp384r1_uint1 x518;
+ uint32_t x519;
+ fiat_secp384r1_uint1 x520;
+ uint32_t x521;
+ fiat_secp384r1_uint1 x522;
+ uint32_t x523;
+ fiat_secp384r1_uint1 x524;
+ uint32_t x525;
+ fiat_secp384r1_uint1 x526;
+ uint32_t x527;
+ fiat_secp384r1_uint1 x528;
+ uint32_t x529;
+ fiat_secp384r1_uint1 x530;
+ uint32_t x531;
+ fiat_secp384r1_uint1 x532;
+ uint32_t x533;
+ uint32_t x534;
+ uint32_t x535;
+ uint32_t x536;
+ uint32_t x537;
+ uint32_t x538;
+ uint32_t x539;
+ uint32_t x540;
+ uint32_t x541;
+ uint32_t x542;
+ uint32_t x543;
+ uint32_t x544;
+ uint32_t x545;
+ uint32_t x546;
+ uint32_t x547;
+ uint32_t x548;
+ uint32_t x549;
+ uint32_t x550;
+ uint32_t x551;
+ uint32_t x552;
+ uint32_t x553;
+ uint32_t x554;
+ uint32_t x555;
+ uint32_t x556;
+ uint32_t x557;
+ uint32_t x558;
+ fiat_secp384r1_uint1 x559;
+ uint32_t x560;
+ fiat_secp384r1_uint1 x561;
+ uint32_t x562;
+ fiat_secp384r1_uint1 x563;
+ uint32_t x564;
+ fiat_secp384r1_uint1 x565;
+ uint32_t x566;
+ fiat_secp384r1_uint1 x567;
+ uint32_t x568;
+ fiat_secp384r1_uint1 x569;
+ uint32_t x570;
+ fiat_secp384r1_uint1 x571;
+ uint32_t x572;
+ fiat_secp384r1_uint1 x573;
+ uint32_t x574;
+ fiat_secp384r1_uint1 x575;
+ uint32_t x576;
+ fiat_secp384r1_uint1 x577;
+ uint32_t x578;
+ fiat_secp384r1_uint1 x579;
+ uint32_t x580;
+ uint32_t x581;
+ fiat_secp384r1_uint1 x582;
+ uint32_t x583;
+ fiat_secp384r1_uint1 x584;
+ uint32_t x585;
+ fiat_secp384r1_uint1 x586;
+ uint32_t x587;
+ fiat_secp384r1_uint1 x588;
+ uint32_t x589;
+ fiat_secp384r1_uint1 x590;
+ uint32_t x591;
+ fiat_secp384r1_uint1 x592;
+ uint32_t x593;
+ fiat_secp384r1_uint1 x594;
+ uint32_t x595;
+ fiat_secp384r1_uint1 x596;
+ uint32_t x597;
+ fiat_secp384r1_uint1 x598;
+ uint32_t x599;
+ fiat_secp384r1_uint1 x600;
+ uint32_t x601;
+ fiat_secp384r1_uint1 x602;
+ uint32_t x603;
+ fiat_secp384r1_uint1 x604;
+ uint32_t x605;
+ fiat_secp384r1_uint1 x606;
+ uint32_t x607;
+ uint32_t x608;
+ uint32_t x609;
+ uint32_t x610;
+ uint32_t x611;
+ uint32_t x612;
+ uint32_t x613;
+ uint32_t x614;
+ uint32_t x615;
+ uint32_t x616;
+ uint32_t x617;
+ uint32_t x618;
+ uint32_t x619;
+ uint32_t x620;
+ uint32_t x621;
+ uint32_t x622;
+ uint32_t x623;
+ uint32_t x624;
+ uint32_t x625;
+ uint32_t x626;
+ uint32_t x627;
+ fiat_secp384r1_uint1 x628;
+ uint32_t x629;
+ fiat_secp384r1_uint1 x630;
+ uint32_t x631;
+ fiat_secp384r1_uint1 x632;
+ uint32_t x633;
+ fiat_secp384r1_uint1 x634;
+ uint32_t x635;
+ fiat_secp384r1_uint1 x636;
+ uint32_t x637;
+ fiat_secp384r1_uint1 x638;
+ uint32_t x639;
+ fiat_secp384r1_uint1 x640;
+ uint32_t x641;
+ fiat_secp384r1_uint1 x642;
+ uint32_t x643;
+ uint32_t x644;
+ fiat_secp384r1_uint1 x645;
+ uint32_t x646;
+ fiat_secp384r1_uint1 x647;
+ uint32_t x648;
+ fiat_secp384r1_uint1 x649;
+ uint32_t x650;
+ fiat_secp384r1_uint1 x651;
+ uint32_t x652;
+ fiat_secp384r1_uint1 x653;
+ uint32_t x654;
+ fiat_secp384r1_uint1 x655;
+ uint32_t x656;
+ fiat_secp384r1_uint1 x657;
+ uint32_t x658;
+ fiat_secp384r1_uint1 x659;
+ uint32_t x660;
+ fiat_secp384r1_uint1 x661;
+ uint32_t x662;
+ fiat_secp384r1_uint1 x663;
+ uint32_t x664;
+ fiat_secp384r1_uint1 x665;
+ uint32_t x666;
+ fiat_secp384r1_uint1 x667;
+ uint32_t x668;
+ fiat_secp384r1_uint1 x669;
+ uint32_t x670;
+ uint32_t x671;
+ uint32_t x672;
+ uint32_t x673;
+ uint32_t x674;
+ uint32_t x675;
+ uint32_t x676;
+ uint32_t x677;
+ uint32_t x678;
+ uint32_t x679;
+ uint32_t x680;
+ uint32_t x681;
+ uint32_t x682;
+ uint32_t x683;
+ uint32_t x684;
+ uint32_t x685;
+ uint32_t x686;
+ uint32_t x687;
+ uint32_t x688;
+ uint32_t x689;
+ uint32_t x690;
+ uint32_t x691;
+ uint32_t x692;
+ uint32_t x693;
+ uint32_t x694;
+ uint32_t x695;
+ fiat_secp384r1_uint1 x696;
+ uint32_t x697;
+ fiat_secp384r1_uint1 x698;
+ uint32_t x699;
+ fiat_secp384r1_uint1 x700;
+ uint32_t x701;
+ fiat_secp384r1_uint1 x702;
+ uint32_t x703;
+ fiat_secp384r1_uint1 x704;
+ uint32_t x705;
+ fiat_secp384r1_uint1 x706;
+ uint32_t x707;
+ fiat_secp384r1_uint1 x708;
+ uint32_t x709;
+ fiat_secp384r1_uint1 x710;
+ uint32_t x711;
+ fiat_secp384r1_uint1 x712;
+ uint32_t x713;
+ fiat_secp384r1_uint1 x714;
+ uint32_t x715;
+ fiat_secp384r1_uint1 x716;
+ uint32_t x717;
+ uint32_t x718;
+ fiat_secp384r1_uint1 x719;
+ uint32_t x720;
+ fiat_secp384r1_uint1 x721;
+ uint32_t x722;
+ fiat_secp384r1_uint1 x723;
+ uint32_t x724;
+ fiat_secp384r1_uint1 x725;
+ uint32_t x726;
+ fiat_secp384r1_uint1 x727;
+ uint32_t x728;
+ fiat_secp384r1_uint1 x729;
+ uint32_t x730;
+ fiat_secp384r1_uint1 x731;
+ uint32_t x732;
+ fiat_secp384r1_uint1 x733;
+ uint32_t x734;
+ fiat_secp384r1_uint1 x735;
+ uint32_t x736;
+ fiat_secp384r1_uint1 x737;
+ uint32_t x738;
+ fiat_secp384r1_uint1 x739;
+ uint32_t x740;
+ fiat_secp384r1_uint1 x741;
+ uint32_t x742;
+ fiat_secp384r1_uint1 x743;
+ uint32_t x744;
+ uint32_t x745;
+ uint32_t x746;
+ uint32_t x747;
+ uint32_t x748;
+ uint32_t x749;
+ uint32_t x750;
+ uint32_t x751;
+ uint32_t x752;
+ uint32_t x753;
+ uint32_t x754;
+ uint32_t x755;
+ uint32_t x756;
+ uint32_t x757;
+ uint32_t x758;
+ uint32_t x759;
+ uint32_t x760;
+ uint32_t x761;
+ uint32_t x762;
+ uint32_t x763;
+ uint32_t x764;
+ fiat_secp384r1_uint1 x765;
+ uint32_t x766;
+ fiat_secp384r1_uint1 x767;
+ uint32_t x768;
+ fiat_secp384r1_uint1 x769;
+ uint32_t x770;
+ fiat_secp384r1_uint1 x771;
+ uint32_t x772;
+ fiat_secp384r1_uint1 x773;
+ uint32_t x774;
+ fiat_secp384r1_uint1 x775;
+ uint32_t x776;
+ fiat_secp384r1_uint1 x777;
+ uint32_t x778;
+ fiat_secp384r1_uint1 x779;
+ uint32_t x780;
+ uint32_t x781;
+ fiat_secp384r1_uint1 x782;
+ uint32_t x783;
+ fiat_secp384r1_uint1 x784;
+ uint32_t x785;
+ fiat_secp384r1_uint1 x786;
+ uint32_t x787;
+ fiat_secp384r1_uint1 x788;
+ uint32_t x789;
+ fiat_secp384r1_uint1 x790;
+ uint32_t x791;
+ fiat_secp384r1_uint1 x792;
+ uint32_t x793;
+ fiat_secp384r1_uint1 x794;
+ uint32_t x795;
+ fiat_secp384r1_uint1 x796;
+ uint32_t x797;
+ fiat_secp384r1_uint1 x798;
+ uint32_t x799;
+ fiat_secp384r1_uint1 x800;
+ uint32_t x801;
+ fiat_secp384r1_uint1 x802;
+ uint32_t x803;
+ fiat_secp384r1_uint1 x804;
+ uint32_t x805;
+ fiat_secp384r1_uint1 x806;
+ uint32_t x807;
+ uint32_t x808;
+ uint32_t x809;
+ uint32_t x810;
+ uint32_t x811;
+ uint32_t x812;
+ uint32_t x813;
+ uint32_t x814;
+ uint32_t x815;
+ uint32_t x816;
+ uint32_t x817;
+ uint32_t x818;
+ uint32_t x819;
+ uint32_t x820;
+ uint32_t x821;
+ uint32_t x822;
+ uint32_t x823;
+ uint32_t x824;
+ uint32_t x825;
+ uint32_t x826;
+ uint32_t x827;
+ uint32_t x828;
+ uint32_t x829;
+ uint32_t x830;
+ uint32_t x831;
+ uint32_t x832;
+ fiat_secp384r1_uint1 x833;
+ uint32_t x834;
+ fiat_secp384r1_uint1 x835;
+ uint32_t x836;
+ fiat_secp384r1_uint1 x837;
+ uint32_t x838;
+ fiat_secp384r1_uint1 x839;
+ uint32_t x840;
+ fiat_secp384r1_uint1 x841;
+ uint32_t x842;
+ fiat_secp384r1_uint1 x843;
+ uint32_t x844;
+ fiat_secp384r1_uint1 x845;
+ uint32_t x846;
+ fiat_secp384r1_uint1 x847;
+ uint32_t x848;
+ fiat_secp384r1_uint1 x849;
+ uint32_t x850;
+ fiat_secp384r1_uint1 x851;
+ uint32_t x852;
+ fiat_secp384r1_uint1 x853;
+ uint32_t x854;
+ uint32_t x855;
+ fiat_secp384r1_uint1 x856;
+ uint32_t x857;
+ fiat_secp384r1_uint1 x858;
+ uint32_t x859;
+ fiat_secp384r1_uint1 x860;
+ uint32_t x861;
+ fiat_secp384r1_uint1 x862;
+ uint32_t x863;
+ fiat_secp384r1_uint1 x864;
+ uint32_t x865;
+ fiat_secp384r1_uint1 x866;
+ uint32_t x867;
+ fiat_secp384r1_uint1 x868;
+ uint32_t x869;
+ fiat_secp384r1_uint1 x870;
+ uint32_t x871;
+ fiat_secp384r1_uint1 x872;
+ uint32_t x873;
+ fiat_secp384r1_uint1 x874;
+ uint32_t x875;
+ fiat_secp384r1_uint1 x876;
+ uint32_t x877;
+ fiat_secp384r1_uint1 x878;
+ uint32_t x879;
+ fiat_secp384r1_uint1 x880;
+ uint32_t x881;
+ uint32_t x882;
+ uint32_t x883;
+ uint32_t x884;
+ uint32_t x885;
+ uint32_t x886;
+ uint32_t x887;
+ uint32_t x888;
+ uint32_t x889;
+ uint32_t x890;
+ uint32_t x891;
+ uint32_t x892;
+ uint32_t x893;
+ uint32_t x894;
+ uint32_t x895;
+ uint32_t x896;
+ uint32_t x897;
+ uint32_t x898;
+ uint32_t x899;
+ uint32_t x900;
+ uint32_t x901;
+ fiat_secp384r1_uint1 x902;
+ uint32_t x903;
+ fiat_secp384r1_uint1 x904;
+ uint32_t x905;
+ fiat_secp384r1_uint1 x906;
+ uint32_t x907;
+ fiat_secp384r1_uint1 x908;
+ uint32_t x909;
+ fiat_secp384r1_uint1 x910;
+ uint32_t x911;
+ fiat_secp384r1_uint1 x912;
+ uint32_t x913;
+ fiat_secp384r1_uint1 x914;
+ uint32_t x915;
+ fiat_secp384r1_uint1 x916;
+ uint32_t x917;
+ uint32_t x918;
+ fiat_secp384r1_uint1 x919;
+ uint32_t x920;
+ fiat_secp384r1_uint1 x921;
+ uint32_t x922;
+ fiat_secp384r1_uint1 x923;
+ uint32_t x924;
+ fiat_secp384r1_uint1 x925;
+ uint32_t x926;
+ fiat_secp384r1_uint1 x927;
+ uint32_t x928;
+ fiat_secp384r1_uint1 x929;
+ uint32_t x930;
+ fiat_secp384r1_uint1 x931;
+ uint32_t x932;
+ fiat_secp384r1_uint1 x933;
+ uint32_t x934;
+ fiat_secp384r1_uint1 x935;
+ uint32_t x936;
+ fiat_secp384r1_uint1 x937;
+ uint32_t x938;
+ fiat_secp384r1_uint1 x939;
+ uint32_t x940;
+ fiat_secp384r1_uint1 x941;
+ uint32_t x942;
+ fiat_secp384r1_uint1 x943;
+ uint32_t x944;
+ uint32_t x945;
+ uint32_t x946;
+ uint32_t x947;
+ uint32_t x948;
+ uint32_t x949;
+ uint32_t x950;
+ uint32_t x951;
+ uint32_t x952;
+ uint32_t x953;
+ uint32_t x954;
+ uint32_t x955;
+ uint32_t x956;
+ uint32_t x957;
+ uint32_t x958;
+ uint32_t x959;
+ uint32_t x960;
+ uint32_t x961;
+ uint32_t x962;
+ uint32_t x963;
+ uint32_t x964;
+ uint32_t x965;
+ uint32_t x966;
+ uint32_t x967;
+ uint32_t x968;
+ uint32_t x969;
+ fiat_secp384r1_uint1 x970;
+ uint32_t x971;
+ fiat_secp384r1_uint1 x972;
+ uint32_t x973;
+ fiat_secp384r1_uint1 x974;
+ uint32_t x975;
+ fiat_secp384r1_uint1 x976;
+ uint32_t x977;
+ fiat_secp384r1_uint1 x978;
+ uint32_t x979;
+ fiat_secp384r1_uint1 x980;
+ uint32_t x981;
+ fiat_secp384r1_uint1 x982;
+ uint32_t x983;
+ fiat_secp384r1_uint1 x984;
+ uint32_t x985;
+ fiat_secp384r1_uint1 x986;
+ uint32_t x987;
+ fiat_secp384r1_uint1 x988;
+ uint32_t x989;
+ fiat_secp384r1_uint1 x990;
+ uint32_t x991;
+ uint32_t x992;
+ fiat_secp384r1_uint1 x993;
+ uint32_t x994;
+ fiat_secp384r1_uint1 x995;
+ uint32_t x996;
+ fiat_secp384r1_uint1 x997;
+ uint32_t x998;
+ fiat_secp384r1_uint1 x999;
+ uint32_t x1000;
+ fiat_secp384r1_uint1 x1001;
+ uint32_t x1002;
+ fiat_secp384r1_uint1 x1003;
+ uint32_t x1004;
+ fiat_secp384r1_uint1 x1005;
+ uint32_t x1006;
+ fiat_secp384r1_uint1 x1007;
+ uint32_t x1008;
+ fiat_secp384r1_uint1 x1009;
+ uint32_t x1010;
+ fiat_secp384r1_uint1 x1011;
+ uint32_t x1012;
+ fiat_secp384r1_uint1 x1013;
+ uint32_t x1014;
+ fiat_secp384r1_uint1 x1015;
+ uint32_t x1016;
+ fiat_secp384r1_uint1 x1017;
+ uint32_t x1018;
+ uint32_t x1019;
+ uint32_t x1020;
+ uint32_t x1021;
+ uint32_t x1022;
+ uint32_t x1023;
+ uint32_t x1024;
+ uint32_t x1025;
+ uint32_t x1026;
+ uint32_t x1027;
+ uint32_t x1028;
+ uint32_t x1029;
+ uint32_t x1030;
+ uint32_t x1031;
+ uint32_t x1032;
+ uint32_t x1033;
+ uint32_t x1034;
+ uint32_t x1035;
+ uint32_t x1036;
+ uint32_t x1037;
+ uint32_t x1038;
+ fiat_secp384r1_uint1 x1039;
+ uint32_t x1040;
+ fiat_secp384r1_uint1 x1041;
+ uint32_t x1042;
+ fiat_secp384r1_uint1 x1043;
+ uint32_t x1044;
+ fiat_secp384r1_uint1 x1045;
+ uint32_t x1046;
+ fiat_secp384r1_uint1 x1047;
+ uint32_t x1048;
+ fiat_secp384r1_uint1 x1049;
+ uint32_t x1050;
+ fiat_secp384r1_uint1 x1051;
+ uint32_t x1052;
+ fiat_secp384r1_uint1 x1053;
+ uint32_t x1054;
+ uint32_t x1055;
+ fiat_secp384r1_uint1 x1056;
+ uint32_t x1057;
+ fiat_secp384r1_uint1 x1058;
+ uint32_t x1059;
+ fiat_secp384r1_uint1 x1060;
+ uint32_t x1061;
+ fiat_secp384r1_uint1 x1062;
+ uint32_t x1063;
+ fiat_secp384r1_uint1 x1064;
+ uint32_t x1065;
+ fiat_secp384r1_uint1 x1066;
+ uint32_t x1067;
+ fiat_secp384r1_uint1 x1068;
+ uint32_t x1069;
+ fiat_secp384r1_uint1 x1070;
+ uint32_t x1071;
+ fiat_secp384r1_uint1 x1072;
+ uint32_t x1073;
+ fiat_secp384r1_uint1 x1074;
+ uint32_t x1075;
+ fiat_secp384r1_uint1 x1076;
+ uint32_t x1077;
+ fiat_secp384r1_uint1 x1078;
+ uint32_t x1079;
+ fiat_secp384r1_uint1 x1080;
+ uint32_t x1081;
+ uint32_t x1082;
+ uint32_t x1083;
+ uint32_t x1084;
+ uint32_t x1085;
+ uint32_t x1086;
+ uint32_t x1087;
+ uint32_t x1088;
+ uint32_t x1089;
+ uint32_t x1090;
+ uint32_t x1091;
+ uint32_t x1092;
+ uint32_t x1093;
+ uint32_t x1094;
+ uint32_t x1095;
+ uint32_t x1096;
+ uint32_t x1097;
+ uint32_t x1098;
+ uint32_t x1099;
+ uint32_t x1100;
+ uint32_t x1101;
+ uint32_t x1102;
+ uint32_t x1103;
+ uint32_t x1104;
+ uint32_t x1105;
+ uint32_t x1106;
+ fiat_secp384r1_uint1 x1107;
+ uint32_t x1108;
+ fiat_secp384r1_uint1 x1109;
+ uint32_t x1110;
+ fiat_secp384r1_uint1 x1111;
+ uint32_t x1112;
+ fiat_secp384r1_uint1 x1113;
+ uint32_t x1114;
+ fiat_secp384r1_uint1 x1115;
+ uint32_t x1116;
+ fiat_secp384r1_uint1 x1117;
+ uint32_t x1118;
+ fiat_secp384r1_uint1 x1119;
+ uint32_t x1120;
+ fiat_secp384r1_uint1 x1121;
+ uint32_t x1122;
+ fiat_secp384r1_uint1 x1123;
+ uint32_t x1124;
+ fiat_secp384r1_uint1 x1125;
+ uint32_t x1126;
+ fiat_secp384r1_uint1 x1127;
+ uint32_t x1128;
+ uint32_t x1129;
+ fiat_secp384r1_uint1 x1130;
+ uint32_t x1131;
+ fiat_secp384r1_uint1 x1132;
+ uint32_t x1133;
+ fiat_secp384r1_uint1 x1134;
+ uint32_t x1135;
+ fiat_secp384r1_uint1 x1136;
+ uint32_t x1137;
+ fiat_secp384r1_uint1 x1138;
+ uint32_t x1139;
+ fiat_secp384r1_uint1 x1140;
+ uint32_t x1141;
+ fiat_secp384r1_uint1 x1142;
+ uint32_t x1143;
+ fiat_secp384r1_uint1 x1144;
+ uint32_t x1145;
+ fiat_secp384r1_uint1 x1146;
+ uint32_t x1147;
+ fiat_secp384r1_uint1 x1148;
+ uint32_t x1149;
+ fiat_secp384r1_uint1 x1150;
+ uint32_t x1151;
+ fiat_secp384r1_uint1 x1152;
+ uint32_t x1153;
+ fiat_secp384r1_uint1 x1154;
+ uint32_t x1155;
+ uint32_t x1156;
+ uint32_t x1157;
+ uint32_t x1158;
+ uint32_t x1159;
+ uint32_t x1160;
+ uint32_t x1161;
+ uint32_t x1162;
+ uint32_t x1163;
+ uint32_t x1164;
+ uint32_t x1165;
+ uint32_t x1166;
+ uint32_t x1167;
+ uint32_t x1168;
+ uint32_t x1169;
+ uint32_t x1170;
+ uint32_t x1171;
+ uint32_t x1172;
+ uint32_t x1173;
+ uint32_t x1174;
+ uint32_t x1175;
+ fiat_secp384r1_uint1 x1176;
+ uint32_t x1177;
+ fiat_secp384r1_uint1 x1178;
+ uint32_t x1179;
+ fiat_secp384r1_uint1 x1180;
+ uint32_t x1181;
+ fiat_secp384r1_uint1 x1182;
+ uint32_t x1183;
+ fiat_secp384r1_uint1 x1184;
+ uint32_t x1185;
+ fiat_secp384r1_uint1 x1186;
+ uint32_t x1187;
+ fiat_secp384r1_uint1 x1188;
+ uint32_t x1189;
+ fiat_secp384r1_uint1 x1190;
+ uint32_t x1191;
+ uint32_t x1192;
+ fiat_secp384r1_uint1 x1193;
+ uint32_t x1194;
+ fiat_secp384r1_uint1 x1195;
+ uint32_t x1196;
+ fiat_secp384r1_uint1 x1197;
+ uint32_t x1198;
+ fiat_secp384r1_uint1 x1199;
+ uint32_t x1200;
+ fiat_secp384r1_uint1 x1201;
+ uint32_t x1202;
+ fiat_secp384r1_uint1 x1203;
+ uint32_t x1204;
+ fiat_secp384r1_uint1 x1205;
+ uint32_t x1206;
+ fiat_secp384r1_uint1 x1207;
+ uint32_t x1208;
+ fiat_secp384r1_uint1 x1209;
+ uint32_t x1210;
+ fiat_secp384r1_uint1 x1211;
+ uint32_t x1212;
+ fiat_secp384r1_uint1 x1213;
+ uint32_t x1214;
+ fiat_secp384r1_uint1 x1215;
+ uint32_t x1216;
+ fiat_secp384r1_uint1 x1217;
+ uint32_t x1218;
+ uint32_t x1219;
+ uint32_t x1220;
+ uint32_t x1221;
+ uint32_t x1222;
+ uint32_t x1223;
+ uint32_t x1224;
+ uint32_t x1225;
+ uint32_t x1226;
+ uint32_t x1227;
+ uint32_t x1228;
+ uint32_t x1229;
+ uint32_t x1230;
+ uint32_t x1231;
+ uint32_t x1232;
+ uint32_t x1233;
+ uint32_t x1234;
+ uint32_t x1235;
+ uint32_t x1236;
+ uint32_t x1237;
+ uint32_t x1238;
+ uint32_t x1239;
+ uint32_t x1240;
+ uint32_t x1241;
+ uint32_t x1242;
+ uint32_t x1243;
+ fiat_secp384r1_uint1 x1244;
+ uint32_t x1245;
+ fiat_secp384r1_uint1 x1246;
+ uint32_t x1247;
+ fiat_secp384r1_uint1 x1248;
+ uint32_t x1249;
+ fiat_secp384r1_uint1 x1250;
+ uint32_t x1251;
+ fiat_secp384r1_uint1 x1252;
+ uint32_t x1253;
+ fiat_secp384r1_uint1 x1254;
+ uint32_t x1255;
+ fiat_secp384r1_uint1 x1256;
+ uint32_t x1257;
+ fiat_secp384r1_uint1 x1258;
+ uint32_t x1259;
+ fiat_secp384r1_uint1 x1260;
+ uint32_t x1261;
+ fiat_secp384r1_uint1 x1262;
+ uint32_t x1263;
+ fiat_secp384r1_uint1 x1264;
+ uint32_t x1265;
+ uint32_t x1266;
+ fiat_secp384r1_uint1 x1267;
+ uint32_t x1268;
+ fiat_secp384r1_uint1 x1269;
+ uint32_t x1270;
+ fiat_secp384r1_uint1 x1271;
+ uint32_t x1272;
+ fiat_secp384r1_uint1 x1273;
+ uint32_t x1274;
+ fiat_secp384r1_uint1 x1275;
+ uint32_t x1276;
+ fiat_secp384r1_uint1 x1277;
+ uint32_t x1278;
+ fiat_secp384r1_uint1 x1279;
+ uint32_t x1280;
+ fiat_secp384r1_uint1 x1281;
+ uint32_t x1282;
+ fiat_secp384r1_uint1 x1283;
+ uint32_t x1284;
+ fiat_secp384r1_uint1 x1285;
+ uint32_t x1286;
+ fiat_secp384r1_uint1 x1287;
+ uint32_t x1288;
+ fiat_secp384r1_uint1 x1289;
+ uint32_t x1290;
+ fiat_secp384r1_uint1 x1291;
+ uint32_t x1292;
+ uint32_t x1293;
+ uint32_t x1294;
+ uint32_t x1295;
+ uint32_t x1296;
+ uint32_t x1297;
+ uint32_t x1298;
+ uint32_t x1299;
+ uint32_t x1300;
+ uint32_t x1301;
+ uint32_t x1302;
+ uint32_t x1303;
+ uint32_t x1304;
+ uint32_t x1305;
+ uint32_t x1306;
+ uint32_t x1307;
+ uint32_t x1308;
+ uint32_t x1309;
+ uint32_t x1310;
+ uint32_t x1311;
+ uint32_t x1312;
+ fiat_secp384r1_uint1 x1313;
+ uint32_t x1314;
+ fiat_secp384r1_uint1 x1315;
+ uint32_t x1316;
+ fiat_secp384r1_uint1 x1317;
+ uint32_t x1318;
+ fiat_secp384r1_uint1 x1319;
+ uint32_t x1320;
+ fiat_secp384r1_uint1 x1321;
+ uint32_t x1322;
+ fiat_secp384r1_uint1 x1323;
+ uint32_t x1324;
+ fiat_secp384r1_uint1 x1325;
+ uint32_t x1326;
+ fiat_secp384r1_uint1 x1327;
+ uint32_t x1328;
+ uint32_t x1329;
+ fiat_secp384r1_uint1 x1330;
+ uint32_t x1331;
+ fiat_secp384r1_uint1 x1332;
+ uint32_t x1333;
+ fiat_secp384r1_uint1 x1334;
+ uint32_t x1335;
+ fiat_secp384r1_uint1 x1336;
+ uint32_t x1337;
+ fiat_secp384r1_uint1 x1338;
+ uint32_t x1339;
+ fiat_secp384r1_uint1 x1340;
+ uint32_t x1341;
+ fiat_secp384r1_uint1 x1342;
+ uint32_t x1343;
+ fiat_secp384r1_uint1 x1344;
+ uint32_t x1345;
+ fiat_secp384r1_uint1 x1346;
+ uint32_t x1347;
+ fiat_secp384r1_uint1 x1348;
+ uint32_t x1349;
+ fiat_secp384r1_uint1 x1350;
+ uint32_t x1351;
+ fiat_secp384r1_uint1 x1352;
+ uint32_t x1353;
+ fiat_secp384r1_uint1 x1354;
+ uint32_t x1355;
+ uint32_t x1356;
+ uint32_t x1357;
+ uint32_t x1358;
+ uint32_t x1359;
+ uint32_t x1360;
+ uint32_t x1361;
+ uint32_t x1362;
+ uint32_t x1363;
+ uint32_t x1364;
+ uint32_t x1365;
+ uint32_t x1366;
+ uint32_t x1367;
+ uint32_t x1368;
+ uint32_t x1369;
+ uint32_t x1370;
+ uint32_t x1371;
+ uint32_t x1372;
+ uint32_t x1373;
+ uint32_t x1374;
+ uint32_t x1375;
+ uint32_t x1376;
+ uint32_t x1377;
+ uint32_t x1378;
+ uint32_t x1379;
+ uint32_t x1380;
+ fiat_secp384r1_uint1 x1381;
+ uint32_t x1382;
+ fiat_secp384r1_uint1 x1383;
+ uint32_t x1384;
+ fiat_secp384r1_uint1 x1385;
+ uint32_t x1386;
+ fiat_secp384r1_uint1 x1387;
+ uint32_t x1388;
+ fiat_secp384r1_uint1 x1389;
+ uint32_t x1390;
+ fiat_secp384r1_uint1 x1391;
+ uint32_t x1392;
+ fiat_secp384r1_uint1 x1393;
+ uint32_t x1394;
+ fiat_secp384r1_uint1 x1395;
+ uint32_t x1396;
+ fiat_secp384r1_uint1 x1397;
+ uint32_t x1398;
+ fiat_secp384r1_uint1 x1399;
+ uint32_t x1400;
+ fiat_secp384r1_uint1 x1401;
+ uint32_t x1402;
+ uint32_t x1403;
+ fiat_secp384r1_uint1 x1404;
+ uint32_t x1405;
+ fiat_secp384r1_uint1 x1406;
+ uint32_t x1407;
+ fiat_secp384r1_uint1 x1408;
+ uint32_t x1409;
+ fiat_secp384r1_uint1 x1410;
+ uint32_t x1411;
+ fiat_secp384r1_uint1 x1412;
+ uint32_t x1413;
+ fiat_secp384r1_uint1 x1414;
+ uint32_t x1415;
+ fiat_secp384r1_uint1 x1416;
+ uint32_t x1417;
+ fiat_secp384r1_uint1 x1418;
+ uint32_t x1419;
+ fiat_secp384r1_uint1 x1420;
+ uint32_t x1421;
+ fiat_secp384r1_uint1 x1422;
+ uint32_t x1423;
+ fiat_secp384r1_uint1 x1424;
+ uint32_t x1425;
+ fiat_secp384r1_uint1 x1426;
+ uint32_t x1427;
+ fiat_secp384r1_uint1 x1428;
+ uint32_t x1429;
+ uint32_t x1430;
+ uint32_t x1431;
+ uint32_t x1432;
+ uint32_t x1433;
+ uint32_t x1434;
+ uint32_t x1435;
+ uint32_t x1436;
+ uint32_t x1437;
+ uint32_t x1438;
+ uint32_t x1439;
+ uint32_t x1440;
+ uint32_t x1441;
+ uint32_t x1442;
+ uint32_t x1443;
+ uint32_t x1444;
+ uint32_t x1445;
+ uint32_t x1446;
+ uint32_t x1447;
+ uint32_t x1448;
+ uint32_t x1449;
+ fiat_secp384r1_uint1 x1450;
+ uint32_t x1451;
+ fiat_secp384r1_uint1 x1452;
+ uint32_t x1453;
+ fiat_secp384r1_uint1 x1454;
+ uint32_t x1455;
+ fiat_secp384r1_uint1 x1456;
+ uint32_t x1457;
+ fiat_secp384r1_uint1 x1458;
+ uint32_t x1459;
+ fiat_secp384r1_uint1 x1460;
+ uint32_t x1461;
+ fiat_secp384r1_uint1 x1462;
+ uint32_t x1463;
+ fiat_secp384r1_uint1 x1464;
+ uint32_t x1465;
+ uint32_t x1466;
+ fiat_secp384r1_uint1 x1467;
+ uint32_t x1468;
+ fiat_secp384r1_uint1 x1469;
+ uint32_t x1470;
+ fiat_secp384r1_uint1 x1471;
+ uint32_t x1472;
+ fiat_secp384r1_uint1 x1473;
+ uint32_t x1474;
+ fiat_secp384r1_uint1 x1475;
+ uint32_t x1476;
+ fiat_secp384r1_uint1 x1477;
+ uint32_t x1478;
+ fiat_secp384r1_uint1 x1479;
+ uint32_t x1480;
+ fiat_secp384r1_uint1 x1481;
+ uint32_t x1482;
+ fiat_secp384r1_uint1 x1483;
+ uint32_t x1484;
+ fiat_secp384r1_uint1 x1485;
+ uint32_t x1486;
+ fiat_secp384r1_uint1 x1487;
+ uint32_t x1488;
+ fiat_secp384r1_uint1 x1489;
+ uint32_t x1490;
+ fiat_secp384r1_uint1 x1491;
+ uint32_t x1492;
+ uint32_t x1493;
+ uint32_t x1494;
+ uint32_t x1495;
+ uint32_t x1496;
+ uint32_t x1497;
+ uint32_t x1498;
+ uint32_t x1499;
+ uint32_t x1500;
+ uint32_t x1501;
+ uint32_t x1502;
+ uint32_t x1503;
+ uint32_t x1504;
+ uint32_t x1505;
+ uint32_t x1506;
+ uint32_t x1507;
+ uint32_t x1508;
+ uint32_t x1509;
+ uint32_t x1510;
+ uint32_t x1511;
+ uint32_t x1512;
+ uint32_t x1513;
+ uint32_t x1514;
+ uint32_t x1515;
+ uint32_t x1516;
+ uint32_t x1517;
+ fiat_secp384r1_uint1 x1518;
+ uint32_t x1519;
+ fiat_secp384r1_uint1 x1520;
+ uint32_t x1521;
+ fiat_secp384r1_uint1 x1522;
+ uint32_t x1523;
+ fiat_secp384r1_uint1 x1524;
+ uint32_t x1525;
+ fiat_secp384r1_uint1 x1526;
+ uint32_t x1527;
+ fiat_secp384r1_uint1 x1528;
+ uint32_t x1529;
+ fiat_secp384r1_uint1 x1530;
+ uint32_t x1531;
+ fiat_secp384r1_uint1 x1532;
+ uint32_t x1533;
+ fiat_secp384r1_uint1 x1534;
+ uint32_t x1535;
+ fiat_secp384r1_uint1 x1536;
+ uint32_t x1537;
+ fiat_secp384r1_uint1 x1538;
+ uint32_t x1539;
+ uint32_t x1540;
+ fiat_secp384r1_uint1 x1541;
+ uint32_t x1542;
+ fiat_secp384r1_uint1 x1543;
+ uint32_t x1544;
+ fiat_secp384r1_uint1 x1545;
+ uint32_t x1546;
+ fiat_secp384r1_uint1 x1547;
+ uint32_t x1548;
+ fiat_secp384r1_uint1 x1549;
+ uint32_t x1550;
+ fiat_secp384r1_uint1 x1551;
+ uint32_t x1552;
+ fiat_secp384r1_uint1 x1553;
+ uint32_t x1554;
+ fiat_secp384r1_uint1 x1555;
+ uint32_t x1556;
+ fiat_secp384r1_uint1 x1557;
+ uint32_t x1558;
+ fiat_secp384r1_uint1 x1559;
+ uint32_t x1560;
+ fiat_secp384r1_uint1 x1561;
+ uint32_t x1562;
+ fiat_secp384r1_uint1 x1563;
+ uint32_t x1564;
+ fiat_secp384r1_uint1 x1565;
+ uint32_t x1566;
+ uint32_t x1567;
+ uint32_t x1568;
+ uint32_t x1569;
+ uint32_t x1570;
+ uint32_t x1571;
+ uint32_t x1572;
+ uint32_t x1573;
+ uint32_t x1574;
+ uint32_t x1575;
+ uint32_t x1576;
+ uint32_t x1577;
+ uint32_t x1578;
+ uint32_t x1579;
+ uint32_t x1580;
+ uint32_t x1581;
+ uint32_t x1582;
+ uint32_t x1583;
+ uint32_t x1584;
+ uint32_t x1585;
+ uint32_t x1586;
+ fiat_secp384r1_uint1 x1587;
+ uint32_t x1588;
+ fiat_secp384r1_uint1 x1589;
+ uint32_t x1590;
+ fiat_secp384r1_uint1 x1591;
+ uint32_t x1592;
+ fiat_secp384r1_uint1 x1593;
+ uint32_t x1594;
+ fiat_secp384r1_uint1 x1595;
+ uint32_t x1596;
+ fiat_secp384r1_uint1 x1597;
+ uint32_t x1598;
+ fiat_secp384r1_uint1 x1599;
+ uint32_t x1600;
+ fiat_secp384r1_uint1 x1601;
+ uint32_t x1602;
+ uint32_t x1603;
+ fiat_secp384r1_uint1 x1604;
+ uint32_t x1605;
+ fiat_secp384r1_uint1 x1606;
+ uint32_t x1607;
+ fiat_secp384r1_uint1 x1608;
+ uint32_t x1609;
+ fiat_secp384r1_uint1 x1610;
+ uint32_t x1611;
+ fiat_secp384r1_uint1 x1612;
+ uint32_t x1613;
+ fiat_secp384r1_uint1 x1614;
+ uint32_t x1615;
+ fiat_secp384r1_uint1 x1616;
+ uint32_t x1617;
+ fiat_secp384r1_uint1 x1618;
+ uint32_t x1619;
+ fiat_secp384r1_uint1 x1620;
+ uint32_t x1621;
+ fiat_secp384r1_uint1 x1622;
+ uint32_t x1623;
+ fiat_secp384r1_uint1 x1624;
+ uint32_t x1625;
+ fiat_secp384r1_uint1 x1626;
+ uint32_t x1627;
+ fiat_secp384r1_uint1 x1628;
+ uint32_t x1629;
+ uint32_t x1630;
+ fiat_secp384r1_uint1 x1631;
+ uint32_t x1632;
+ fiat_secp384r1_uint1 x1633;
+ uint32_t x1634;
+ fiat_secp384r1_uint1 x1635;
+ uint32_t x1636;
+ fiat_secp384r1_uint1 x1637;
+ uint32_t x1638;
+ fiat_secp384r1_uint1 x1639;
+ uint32_t x1640;
+ fiat_secp384r1_uint1 x1641;
+ uint32_t x1642;
+ fiat_secp384r1_uint1 x1643;
+ uint32_t x1644;
+ fiat_secp384r1_uint1 x1645;
+ uint32_t x1646;
+ fiat_secp384r1_uint1 x1647;
+ uint32_t x1648;
+ fiat_secp384r1_uint1 x1649;
+ uint32_t x1650;
+ fiat_secp384r1_uint1 x1651;
+ uint32_t x1652;
+ fiat_secp384r1_uint1 x1653;
+ uint32_t x1654;
+ fiat_secp384r1_uint1 x1655;
+ uint32_t x1656;
+ uint32_t x1657;
+ uint32_t x1658;
+ uint32_t x1659;
+ uint32_t x1660;
+ uint32_t x1661;
+ uint32_t x1662;
+ uint32_t x1663;
+ uint32_t x1664;
+ uint32_t x1665;
+ uint32_t x1666;
+ uint32_t x1667;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[6]);
+ x7 = (arg1[7]);
+ x8 = (arg1[8]);
+ x9 = (arg1[9]);
+ x10 = (arg1[10]);
+ x11 = (arg1[11]);
+ x12 = (arg1[0]);
+ fiat_secp384r1_mulx_u32(&x13, &x14, x12, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x15, &x16, x12, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x17, &x18, x12, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x19, &x20, x12, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x21, &x22, x12, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x23, &x24, x12, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x25, &x26, x12, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x27, &x28, x12, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x29, &x30, x12, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x31, &x32, x12, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x33, &x34, x12, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x35, &x36, x12, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x37, &x38, 0x0, x36, x33);
+ fiat_secp384r1_addcarryx_u32(&x39, &x40, x38, x34, x31);
+ fiat_secp384r1_addcarryx_u32(&x41, &x42, x40, x32, x29);
+ fiat_secp384r1_addcarryx_u32(&x43, &x44, x42, x30, x27);
+ fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x28, x25);
+ fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x26, x23);
+ fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x24, x21);
+ fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x22, x19);
+ fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x20, x17);
+ fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x18, x15);
+ fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x16, x13);
+ x59 = (x58 + x14);
+ fiat_secp384r1_mulx_u32(&x60, &x61, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x62, &x63, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x64, &x65, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x66, &x67, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x68, &x69, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x70, &x71, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x72, &x73, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x74, &x75, x35, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x76, &x77, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x78, &x79, x35, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x80, &x81, 0x0, x77, x74);
+ fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x75, x72);
+ fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x73, x70);
+ fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x71, x68);
+ fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x69, x66);
+ fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x67, x64);
+ fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x65, x62);
+ fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x63, x60);
+ x96 = (x95 + x61);
+ fiat_secp384r1_addcarryx_u32(&x97, &x98, 0x0, x35, x78);
+ fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x37, x79);
+ fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x39, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x41, x76);
+ fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x43, x80);
+ fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x45, x82);
+ fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x47, x84);
+ fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x49, x86);
+ fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x51, x88);
+ fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x53, x90);
+ fiat_secp384r1_addcarryx_u32(&x117, &x118, x116, x55, x92);
+ fiat_secp384r1_addcarryx_u32(&x119, &x120, x118, x57, x94);
+ fiat_secp384r1_addcarryx_u32(&x121, &x122, x120, x59, x96);
+ fiat_secp384r1_mulx_u32(&x123, &x124, x1, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x125, &x126, x1, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x127, &x128, x1, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x129, &x130, x1, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x131, &x132, x1, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x133, &x134, x1, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x135, &x136, x1, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x137, &x138, x1, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x139, &x140, x1, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x141, &x142, x1, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x143, &x144, x1, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x145, &x146, x1, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x147, &x148, 0x0, x146, x143);
+ fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x144, x141);
+ fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x142, x139);
+ fiat_secp384r1_addcarryx_u32(&x153, &x154, x152, x140, x137);
+ fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x138, x135);
+ fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x136, x133);
+ fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x134, x131);
+ fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x132, x129);
+ fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x130, x127);
+ fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x128, x125);
+ fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x126, x123);
+ x169 = (x168 + x124);
+ fiat_secp384r1_addcarryx_u32(&x170, &x171, 0x0, x99, x145);
+ fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x101, x147);
+ fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x103, x149);
+ fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x105, x151);
+ fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x107, x153);
+ fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x109, x155);
+ fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x111, x157);
+ fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x113, x159);
+ fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x115, x161);
+ fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, x117, x163);
+ fiat_secp384r1_addcarryx_u32(&x190, &x191, x189, x119, x165);
+ fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x121, x167);
+ fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x122, x169);
+ fiat_secp384r1_mulx_u32(&x196, &x197, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x198, &x199, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x200, &x201, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x202, &x203, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x204, &x205, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x206, &x207, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x208, &x209, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x210, &x211, x170, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x212, &x213, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x214, &x215, x170, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x216, &x217, 0x0, x213, x210);
+ fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x211, x208);
+ fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x209, x206);
+ fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x207, x204);
+ fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x205, x202);
+ fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x203, x200);
+ fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x201, x198);
+ fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x199, x196);
+ x232 = (x231 + x197);
+ fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x170, x214);
+ fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x172, x215);
+ fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x174, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x176, x212);
+ fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x178, x216);
+ fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x180, x218);
+ fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x182, x220);
+ fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x184, x222);
+ fiat_secp384r1_addcarryx_u32(&x249, &x250, x248, x186, x224);
+ fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x188, x226);
+ fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x190, x228);
+ fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x192, x230);
+ fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x194, x232);
+ x259 = ((uint32_t)x258 + x195);
+ fiat_secp384r1_mulx_u32(&x260, &x261, x2, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x262, &x263, x2, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x264, &x265, x2, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x266, &x267, x2, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x268, &x269, x2, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x270, &x271, x2, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x272, &x273, x2, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x274, &x275, x2, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x276, &x277, x2, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x278, &x279, x2, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x280, &x281, x2, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x282, &x283, x2, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x284, &x285, 0x0, x283, x280);
+ fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x281, x278);
+ fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x279, x276);
+ fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x277, x274);
+ fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x275, x272);
+ fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x273, x270);
+ fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x271, x268);
+ fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x269, x266);
+ fiat_secp384r1_addcarryx_u32(&x300, &x301, x299, x267, x264);
+ fiat_secp384r1_addcarryx_u32(&x302, &x303, x301, x265, x262);
+ fiat_secp384r1_addcarryx_u32(&x304, &x305, x303, x263, x260);
+ x306 = (x305 + x261);
+ fiat_secp384r1_addcarryx_u32(&x307, &x308, 0x0, x235, x282);
+ fiat_secp384r1_addcarryx_u32(&x309, &x310, x308, x237, x284);
+ fiat_secp384r1_addcarryx_u32(&x311, &x312, x310, x239, x286);
+ fiat_secp384r1_addcarryx_u32(&x313, &x314, x312, x241, x288);
+ fiat_secp384r1_addcarryx_u32(&x315, &x316, x314, x243, x290);
+ fiat_secp384r1_addcarryx_u32(&x317, &x318, x316, x245, x292);
+ fiat_secp384r1_addcarryx_u32(&x319, &x320, x318, x247, x294);
+ fiat_secp384r1_addcarryx_u32(&x321, &x322, x320, x249, x296);
+ fiat_secp384r1_addcarryx_u32(&x323, &x324, x322, x251, x298);
+ fiat_secp384r1_addcarryx_u32(&x325, &x326, x324, x253, x300);
+ fiat_secp384r1_addcarryx_u32(&x327, &x328, x326, x255, x302);
+ fiat_secp384r1_addcarryx_u32(&x329, &x330, x328, x257, x304);
+ fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x259, x306);
+ fiat_secp384r1_mulx_u32(&x333, &x334, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x335, &x336, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x337, &x338, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x339, &x340, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x341, &x342, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x343, &x344, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x345, &x346, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x347, &x348, x307, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x349, &x350, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x351, &x352, x307, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x353, &x354, 0x0, x350, x347);
+ fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x348, x345);
+ fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x346, x343);
+ fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x344, x341);
+ fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x342, x339);
+ fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x340, x337);
+ fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x338, x335);
+ fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x336, x333);
+ x369 = (x368 + x334);
+ fiat_secp384r1_addcarryx_u32(&x370, &x371, 0x0, x307, x351);
+ fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x309, x352);
+ fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x311, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x313, x349);
+ fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x315, x353);
+ fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x317, x355);
+ fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x319, x357);
+ fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x321, x359);
+ fiat_secp384r1_addcarryx_u32(&x386, &x387, x385, x323, x361);
+ fiat_secp384r1_addcarryx_u32(&x388, &x389, x387, x325, x363);
+ fiat_secp384r1_addcarryx_u32(&x390, &x391, x389, x327, x365);
+ fiat_secp384r1_addcarryx_u32(&x392, &x393, x391, x329, x367);
+ fiat_secp384r1_addcarryx_u32(&x394, &x395, x393, x331, x369);
+ x396 = ((uint32_t)x395 + x332);
+ fiat_secp384r1_mulx_u32(&x397, &x398, x3, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x399, &x400, x3, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x401, &x402, x3, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x403, &x404, x3, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x405, &x406, x3, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x407, &x408, x3, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x409, &x410, x3, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x411, &x412, x3, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x413, &x414, x3, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x415, &x416, x3, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x417, &x418, x3, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x419, &x420, x3, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x421, &x422, 0x0, x420, x417);
+ fiat_secp384r1_addcarryx_u32(&x423, &x424, x422, x418, x415);
+ fiat_secp384r1_addcarryx_u32(&x425, &x426, x424, x416, x413);
+ fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x414, x411);
+ fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x412, x409);
+ fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x410, x407);
+ fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x408, x405);
+ fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x406, x403);
+ fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x404, x401);
+ fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x402, x399);
+ fiat_secp384r1_addcarryx_u32(&x441, &x442, x440, x400, x397);
+ x443 = (x442 + x398);
+ fiat_secp384r1_addcarryx_u32(&x444, &x445, 0x0, x372, x419);
+ fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, x374, x421);
+ fiat_secp384r1_addcarryx_u32(&x448, &x449, x447, x376, x423);
+ fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x378, x425);
+ fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x380, x427);
+ fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x382, x429);
+ fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x384, x431);
+ fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x386, x433);
+ fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x388, x435);
+ fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x390, x437);
+ fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x392, x439);
+ fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x394, x441);
+ fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x396, x443);
+ fiat_secp384r1_mulx_u32(&x470, &x471, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x472, &x473, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x474, &x475, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x476, &x477, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x478, &x479, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x480, &x481, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x482, &x483, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x484, &x485, x444, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x486, &x487, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x488, &x489, x444, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x490, &x491, 0x0, x487, x484);
+ fiat_secp384r1_addcarryx_u32(&x492, &x493, x491, x485, x482);
+ fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x483, x480);
+ fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x481, x478);
+ fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x479, x476);
+ fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x477, x474);
+ fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x475, x472);
+ fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x473, x470);
+ x506 = (x505 + x471);
+ fiat_secp384r1_addcarryx_u32(&x507, &x508, 0x0, x444, x488);
+ fiat_secp384r1_addcarryx_u32(&x509, &x510, x508, x446, x489);
+ fiat_secp384r1_addcarryx_u32(&x511, &x512, x510, x448, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x513, &x514, x512, x450, x486);
+ fiat_secp384r1_addcarryx_u32(&x515, &x516, x514, x452, x490);
+ fiat_secp384r1_addcarryx_u32(&x517, &x518, x516, x454, x492);
+ fiat_secp384r1_addcarryx_u32(&x519, &x520, x518, x456, x494);
+ fiat_secp384r1_addcarryx_u32(&x521, &x522, x520, x458, x496);
+ fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x460, x498);
+ fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x462, x500);
+ fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x464, x502);
+ fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x466, x504);
+ fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x468, x506);
+ x533 = ((uint32_t)x532 + x469);
+ fiat_secp384r1_mulx_u32(&x534, &x535, x4, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x536, &x537, x4, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x538, &x539, x4, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x540, &x541, x4, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x542, &x543, x4, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x544, &x545, x4, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x546, &x547, x4, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x548, &x549, x4, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x550, &x551, x4, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x552, &x553, x4, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x554, &x555, x4, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x556, &x557, x4, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x558, &x559, 0x0, x557, x554);
+ fiat_secp384r1_addcarryx_u32(&x560, &x561, x559, x555, x552);
+ fiat_secp384r1_addcarryx_u32(&x562, &x563, x561, x553, x550);
+ fiat_secp384r1_addcarryx_u32(&x564, &x565, x563, x551, x548);
+ fiat_secp384r1_addcarryx_u32(&x566, &x567, x565, x549, x546);
+ fiat_secp384r1_addcarryx_u32(&x568, &x569, x567, x547, x544);
+ fiat_secp384r1_addcarryx_u32(&x570, &x571, x569, x545, x542);
+ fiat_secp384r1_addcarryx_u32(&x572, &x573, x571, x543, x540);
+ fiat_secp384r1_addcarryx_u32(&x574, &x575, x573, x541, x538);
+ fiat_secp384r1_addcarryx_u32(&x576, &x577, x575, x539, x536);
+ fiat_secp384r1_addcarryx_u32(&x578, &x579, x577, x537, x534);
+ x580 = (x579 + x535);
+ fiat_secp384r1_addcarryx_u32(&x581, &x582, 0x0, x509, x556);
+ fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x511, x558);
+ fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x513, x560);
+ fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x515, x562);
+ fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x517, x564);
+ fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x519, x566);
+ fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x521, x568);
+ fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x523, x570);
+ fiat_secp384r1_addcarryx_u32(&x597, &x598, x596, x525, x572);
+ fiat_secp384r1_addcarryx_u32(&x599, &x600, x598, x527, x574);
+ fiat_secp384r1_addcarryx_u32(&x601, &x602, x600, x529, x576);
+ fiat_secp384r1_addcarryx_u32(&x603, &x604, x602, x531, x578);
+ fiat_secp384r1_addcarryx_u32(&x605, &x606, x604, x533, x580);
+ fiat_secp384r1_mulx_u32(&x607, &x608, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x609, &x610, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x611, &x612, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x613, &x614, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x615, &x616, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x617, &x618, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x619, &x620, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x621, &x622, x581, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x623, &x624, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x625, &x626, x581, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x627, &x628, 0x0, x624, x621);
+ fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x622, x619);
+ fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x620, x617);
+ fiat_secp384r1_addcarryx_u32(&x633, &x634, x632, x618, x615);
+ fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x616, x613);
+ fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x614, x611);
+ fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x612, x609);
+ fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x610, x607);
+ x643 = (x642 + x608);
+ fiat_secp384r1_addcarryx_u32(&x644, &x645, 0x0, x581, x625);
+ fiat_secp384r1_addcarryx_u32(&x646, &x647, x645, x583, x626);
+ fiat_secp384r1_addcarryx_u32(&x648, &x649, x647, x585, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x650, &x651, x649, x587, x623);
+ fiat_secp384r1_addcarryx_u32(&x652, &x653, x651, x589, x627);
+ fiat_secp384r1_addcarryx_u32(&x654, &x655, x653, x591, x629);
+ fiat_secp384r1_addcarryx_u32(&x656, &x657, x655, x593, x631);
+ fiat_secp384r1_addcarryx_u32(&x658, &x659, x657, x595, x633);
+ fiat_secp384r1_addcarryx_u32(&x660, &x661, x659, x597, x635);
+ fiat_secp384r1_addcarryx_u32(&x662, &x663, x661, x599, x637);
+ fiat_secp384r1_addcarryx_u32(&x664, &x665, x663, x601, x639);
+ fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x603, x641);
+ fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x605, x643);
+ x670 = ((uint32_t)x669 + x606);
+ fiat_secp384r1_mulx_u32(&x671, &x672, x5, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x673, &x674, x5, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x675, &x676, x5, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x677, &x678, x5, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x679, &x680, x5, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x681, &x682, x5, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x683, &x684, x5, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x685, &x686, x5, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x687, &x688, x5, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x689, &x690, x5, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x691, &x692, x5, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x693, &x694, x5, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x695, &x696, 0x0, x694, x691);
+ fiat_secp384r1_addcarryx_u32(&x697, &x698, x696, x692, x689);
+ fiat_secp384r1_addcarryx_u32(&x699, &x700, x698, x690, x687);
+ fiat_secp384r1_addcarryx_u32(&x701, &x702, x700, x688, x685);
+ fiat_secp384r1_addcarryx_u32(&x703, &x704, x702, x686, x683);
+ fiat_secp384r1_addcarryx_u32(&x705, &x706, x704, x684, x681);
+ fiat_secp384r1_addcarryx_u32(&x707, &x708, x706, x682, x679);
+ fiat_secp384r1_addcarryx_u32(&x709, &x710, x708, x680, x677);
+ fiat_secp384r1_addcarryx_u32(&x711, &x712, x710, x678, x675);
+ fiat_secp384r1_addcarryx_u32(&x713, &x714, x712, x676, x673);
+ fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x674, x671);
+ x717 = (x716 + x672);
+ fiat_secp384r1_addcarryx_u32(&x718, &x719, 0x0, x646, x693);
+ fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x648, x695);
+ fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x650, x697);
+ fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x652, x699);
+ fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x654, x701);
+ fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x656, x703);
+ fiat_secp384r1_addcarryx_u32(&x730, &x731, x729, x658, x705);
+ fiat_secp384r1_addcarryx_u32(&x732, &x733, x731, x660, x707);
+ fiat_secp384r1_addcarryx_u32(&x734, &x735, x733, x662, x709);
+ fiat_secp384r1_addcarryx_u32(&x736, &x737, x735, x664, x711);
+ fiat_secp384r1_addcarryx_u32(&x738, &x739, x737, x666, x713);
+ fiat_secp384r1_addcarryx_u32(&x740, &x741, x739, x668, x715);
+ fiat_secp384r1_addcarryx_u32(&x742, &x743, x741, x670, x717);
+ fiat_secp384r1_mulx_u32(&x744, &x745, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x746, &x747, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x748, &x749, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x750, &x751, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x752, &x753, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x754, &x755, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x756, &x757, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x758, &x759, x718, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x760, &x761, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x762, &x763, x718, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x764, &x765, 0x0, x761, x758);
+ fiat_secp384r1_addcarryx_u32(&x766, &x767, x765, x759, x756);
+ fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x757, x754);
+ fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x755, x752);
+ fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x753, x750);
+ fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x751, x748);
+ fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x749, x746);
+ fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x747, x744);
+ x780 = (x779 + x745);
+ fiat_secp384r1_addcarryx_u32(&x781, &x782, 0x0, x718, x762);
+ fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x720, x763);
+ fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x722, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x724, x760);
+ fiat_secp384r1_addcarryx_u32(&x789, &x790, x788, x726, x764);
+ fiat_secp384r1_addcarryx_u32(&x791, &x792, x790, x728, x766);
+ fiat_secp384r1_addcarryx_u32(&x793, &x794, x792, x730, x768);
+ fiat_secp384r1_addcarryx_u32(&x795, &x796, x794, x732, x770);
+ fiat_secp384r1_addcarryx_u32(&x797, &x798, x796, x734, x772);
+ fiat_secp384r1_addcarryx_u32(&x799, &x800, x798, x736, x774);
+ fiat_secp384r1_addcarryx_u32(&x801, &x802, x800, x738, x776);
+ fiat_secp384r1_addcarryx_u32(&x803, &x804, x802, x740, x778);
+ fiat_secp384r1_addcarryx_u32(&x805, &x806, x804, x742, x780);
+ x807 = ((uint32_t)x806 + x743);
+ fiat_secp384r1_mulx_u32(&x808, &x809, x6, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x810, &x811, x6, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x812, &x813, x6, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x814, &x815, x6, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x816, &x817, x6, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x818, &x819, x6, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x820, &x821, x6, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x822, &x823, x6, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x824, &x825, x6, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x826, &x827, x6, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x828, &x829, x6, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x830, &x831, x6, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x832, &x833, 0x0, x831, x828);
+ fiat_secp384r1_addcarryx_u32(&x834, &x835, x833, x829, x826);
+ fiat_secp384r1_addcarryx_u32(&x836, &x837, x835, x827, x824);
+ fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x825, x822);
+ fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x823, x820);
+ fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x821, x818);
+ fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x819, x816);
+ fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x817, x814);
+ fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x815, x812);
+ fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x813, x810);
+ fiat_secp384r1_addcarryx_u32(&x852, &x853, x851, x811, x808);
+ x854 = (x853 + x809);
+ fiat_secp384r1_addcarryx_u32(&x855, &x856, 0x0, x783, x830);
+ fiat_secp384r1_addcarryx_u32(&x857, &x858, x856, x785, x832);
+ fiat_secp384r1_addcarryx_u32(&x859, &x860, x858, x787, x834);
+ fiat_secp384r1_addcarryx_u32(&x861, &x862, x860, x789, x836);
+ fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x791, x838);
+ fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x793, x840);
+ fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x795, x842);
+ fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x797, x844);
+ fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x799, x846);
+ fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x801, x848);
+ fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x803, x850);
+ fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x805, x852);
+ fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x807, x854);
+ fiat_secp384r1_mulx_u32(&x881, &x882, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x883, &x884, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x885, &x886, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x887, &x888, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x889, &x890, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x891, &x892, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x893, &x894, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x895, &x896, x855, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x897, &x898, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x899, &x900, x855, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x901, &x902, 0x0, x898, x895);
+ fiat_secp384r1_addcarryx_u32(&x903, &x904, x902, x896, x893);
+ fiat_secp384r1_addcarryx_u32(&x905, &x906, x904, x894, x891);
+ fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x892, x889);
+ fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x890, x887);
+ fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x888, x885);
+ fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x886, x883);
+ fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x884, x881);
+ x917 = (x916 + x882);
+ fiat_secp384r1_addcarryx_u32(&x918, &x919, 0x0, x855, x899);
+ fiat_secp384r1_addcarryx_u32(&x920, &x921, x919, x857, x900);
+ fiat_secp384r1_addcarryx_u32(&x922, &x923, x921, x859, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x861, x897);
+ fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x863, x901);
+ fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x865, x903);
+ fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x867, x905);
+ fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x869, x907);
+ fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x871, x909);
+ fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x873, x911);
+ fiat_secp384r1_addcarryx_u32(&x938, &x939, x937, x875, x913);
+ fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x877, x915);
+ fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x879, x917);
+ x944 = ((uint32_t)x943 + x880);
+ fiat_secp384r1_mulx_u32(&x945, &x946, x7, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x947, &x948, x7, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x949, &x950, x7, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x951, &x952, x7, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x953, &x954, x7, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x955, &x956, x7, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x957, &x958, x7, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x959, &x960, x7, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x961, &x962, x7, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x963, &x964, x7, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x965, &x966, x7, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x967, &x968, x7, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x969, &x970, 0x0, x968, x965);
+ fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x966, x963);
+ fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x964, x961);
+ fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x962, x959);
+ fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x960, x957);
+ fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x958, x955);
+ fiat_secp384r1_addcarryx_u32(&x981, &x982, x980, x956, x953);
+ fiat_secp384r1_addcarryx_u32(&x983, &x984, x982, x954, x951);
+ fiat_secp384r1_addcarryx_u32(&x985, &x986, x984, x952, x949);
+ fiat_secp384r1_addcarryx_u32(&x987, &x988, x986, x950, x947);
+ fiat_secp384r1_addcarryx_u32(&x989, &x990, x988, x948, x945);
+ x991 = (x990 + x946);
+ fiat_secp384r1_addcarryx_u32(&x992, &x993, 0x0, x920, x967);
+ fiat_secp384r1_addcarryx_u32(&x994, &x995, x993, x922, x969);
+ fiat_secp384r1_addcarryx_u32(&x996, &x997, x995, x924, x971);
+ fiat_secp384r1_addcarryx_u32(&x998, &x999, x997, x926, x973);
+ fiat_secp384r1_addcarryx_u32(&x1000, &x1001, x999, x928, x975);
+ fiat_secp384r1_addcarryx_u32(&x1002, &x1003, x1001, x930, x977);
+ fiat_secp384r1_addcarryx_u32(&x1004, &x1005, x1003, x932, x979);
+ fiat_secp384r1_addcarryx_u32(&x1006, &x1007, x1005, x934, x981);
+ fiat_secp384r1_addcarryx_u32(&x1008, &x1009, x1007, x936, x983);
+ fiat_secp384r1_addcarryx_u32(&x1010, &x1011, x1009, x938, x985);
+ fiat_secp384r1_addcarryx_u32(&x1012, &x1013, x1011, x940, x987);
+ fiat_secp384r1_addcarryx_u32(&x1014, &x1015, x1013, x942, x989);
+ fiat_secp384r1_addcarryx_u32(&x1016, &x1017, x1015, x944, x991);
+ fiat_secp384r1_mulx_u32(&x1018, &x1019, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1020, &x1021, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1022, &x1023, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1024, &x1025, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1026, &x1027, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1028, &x1029, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1030, &x1031, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1032, &x1033, x992, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1034, &x1035, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1036, &x1037, x992, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1038, &x1039, 0x0, x1035, x1032);
+ fiat_secp384r1_addcarryx_u32(&x1040, &x1041, x1039, x1033, x1030);
+ fiat_secp384r1_addcarryx_u32(&x1042, &x1043, x1041, x1031, x1028);
+ fiat_secp384r1_addcarryx_u32(&x1044, &x1045, x1043, x1029, x1026);
+ fiat_secp384r1_addcarryx_u32(&x1046, &x1047, x1045, x1027, x1024);
+ fiat_secp384r1_addcarryx_u32(&x1048, &x1049, x1047, x1025, x1022);
+ fiat_secp384r1_addcarryx_u32(&x1050, &x1051, x1049, x1023, x1020);
+ fiat_secp384r1_addcarryx_u32(&x1052, &x1053, x1051, x1021, x1018);
+ x1054 = (x1053 + x1019);
+ fiat_secp384r1_addcarryx_u32(&x1055, &x1056, 0x0, x992, x1036);
+ fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x994, x1037);
+ fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x996, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x998, x1034);
+ fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1000, x1038);
+ fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1002, x1040);
+ fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1004, x1042);
+ fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1006, x1044);
+ fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1008, x1046);
+ fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1010, x1048);
+ fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1012, x1050);
+ fiat_secp384r1_addcarryx_u32(&x1077, &x1078, x1076, x1014, x1052);
+ fiat_secp384r1_addcarryx_u32(&x1079, &x1080, x1078, x1016, x1054);
+ x1081 = ((uint32_t)x1080 + x1017);
+ fiat_secp384r1_mulx_u32(&x1082, &x1083, x8, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x1084, &x1085, x8, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x1086, &x1087, x8, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x1088, &x1089, x8, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x1090, &x1091, x8, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x1092, &x1093, x8, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x1094, &x1095, x8, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x1096, &x1097, x8, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x1098, &x1099, x8, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x1100, &x1101, x8, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x1102, &x1103, x8, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x1104, &x1105, x8, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x1106, &x1107, 0x0, x1105, x1102);
+ fiat_secp384r1_addcarryx_u32(&x1108, &x1109, x1107, x1103, x1100);
+ fiat_secp384r1_addcarryx_u32(&x1110, &x1111, x1109, x1101, x1098);
+ fiat_secp384r1_addcarryx_u32(&x1112, &x1113, x1111, x1099, x1096);
+ fiat_secp384r1_addcarryx_u32(&x1114, &x1115, x1113, x1097, x1094);
+ fiat_secp384r1_addcarryx_u32(&x1116, &x1117, x1115, x1095, x1092);
+ fiat_secp384r1_addcarryx_u32(&x1118, &x1119, x1117, x1093, x1090);
+ fiat_secp384r1_addcarryx_u32(&x1120, &x1121, x1119, x1091, x1088);
+ fiat_secp384r1_addcarryx_u32(&x1122, &x1123, x1121, x1089, x1086);
+ fiat_secp384r1_addcarryx_u32(&x1124, &x1125, x1123, x1087, x1084);
+ fiat_secp384r1_addcarryx_u32(&x1126, &x1127, x1125, x1085, x1082);
+ x1128 = (x1127 + x1083);
+ fiat_secp384r1_addcarryx_u32(&x1129, &x1130, 0x0, x1057, x1104);
+ fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1059, x1106);
+ fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1061, x1108);
+ fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1063, x1110);
+ fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136, x1065, x1112);
+ fiat_secp384r1_addcarryx_u32(&x1139, &x1140, x1138, x1067, x1114);
+ fiat_secp384r1_addcarryx_u32(&x1141, &x1142, x1140, x1069, x1116);
+ fiat_secp384r1_addcarryx_u32(&x1143, &x1144, x1142, x1071, x1118);
+ fiat_secp384r1_addcarryx_u32(&x1145, &x1146, x1144, x1073, x1120);
+ fiat_secp384r1_addcarryx_u32(&x1147, &x1148, x1146, x1075, x1122);
+ fiat_secp384r1_addcarryx_u32(&x1149, &x1150, x1148, x1077, x1124);
+ fiat_secp384r1_addcarryx_u32(&x1151, &x1152, x1150, x1079, x1126);
+ fiat_secp384r1_addcarryx_u32(&x1153, &x1154, x1152, x1081, x1128);
+ fiat_secp384r1_mulx_u32(&x1155, &x1156, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1157, &x1158, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1159, &x1160, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1161, &x1162, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1163, &x1164, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1165, &x1166, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1167, &x1168, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1169, &x1170, x1129, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1171, &x1172, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1173, &x1174, x1129, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1175, &x1176, 0x0, x1172, x1169);
+ fiat_secp384r1_addcarryx_u32(&x1177, &x1178, x1176, x1170, x1167);
+ fiat_secp384r1_addcarryx_u32(&x1179, &x1180, x1178, x1168, x1165);
+ fiat_secp384r1_addcarryx_u32(&x1181, &x1182, x1180, x1166, x1163);
+ fiat_secp384r1_addcarryx_u32(&x1183, &x1184, x1182, x1164, x1161);
+ fiat_secp384r1_addcarryx_u32(&x1185, &x1186, x1184, x1162, x1159);
+ fiat_secp384r1_addcarryx_u32(&x1187, &x1188, x1186, x1160, x1157);
+ fiat_secp384r1_addcarryx_u32(&x1189, &x1190, x1188, x1158, x1155);
+ x1191 = (x1190 + x1156);
+ fiat_secp384r1_addcarryx_u32(&x1192, &x1193, 0x0, x1129, x1173);
+ fiat_secp384r1_addcarryx_u32(&x1194, &x1195, x1193, x1131, x1174);
+ fiat_secp384r1_addcarryx_u32(&x1196, &x1197, x1195, x1133, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1198, &x1199, x1197, x1135, x1171);
+ fiat_secp384r1_addcarryx_u32(&x1200, &x1201, x1199, x1137, x1175);
+ fiat_secp384r1_addcarryx_u32(&x1202, &x1203, x1201, x1139, x1177);
+ fiat_secp384r1_addcarryx_u32(&x1204, &x1205, x1203, x1141, x1179);
+ fiat_secp384r1_addcarryx_u32(&x1206, &x1207, x1205, x1143, x1181);
+ fiat_secp384r1_addcarryx_u32(&x1208, &x1209, x1207, x1145, x1183);
+ fiat_secp384r1_addcarryx_u32(&x1210, &x1211, x1209, x1147, x1185);
+ fiat_secp384r1_addcarryx_u32(&x1212, &x1213, x1211, x1149, x1187);
+ fiat_secp384r1_addcarryx_u32(&x1214, &x1215, x1213, x1151, x1189);
+ fiat_secp384r1_addcarryx_u32(&x1216, &x1217, x1215, x1153, x1191);
+ x1218 = ((uint32_t)x1217 + x1154);
+ fiat_secp384r1_mulx_u32(&x1219, &x1220, x9, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x1221, &x1222, x9, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x1223, &x1224, x9, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x1225, &x1226, x9, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x1227, &x1228, x9, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x1229, &x1230, x9, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x1231, &x1232, x9, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x1233, &x1234, x9, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x1235, &x1236, x9, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x1237, &x1238, x9, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x1239, &x1240, x9, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x1241, &x1242, x9, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x1243, &x1244, 0x0, x1242, x1239);
+ fiat_secp384r1_addcarryx_u32(&x1245, &x1246, x1244, x1240, x1237);
+ fiat_secp384r1_addcarryx_u32(&x1247, &x1248, x1246, x1238, x1235);
+ fiat_secp384r1_addcarryx_u32(&x1249, &x1250, x1248, x1236, x1233);
+ fiat_secp384r1_addcarryx_u32(&x1251, &x1252, x1250, x1234, x1231);
+ fiat_secp384r1_addcarryx_u32(&x1253, &x1254, x1252, x1232, x1229);
+ fiat_secp384r1_addcarryx_u32(&x1255, &x1256, x1254, x1230, x1227);
+ fiat_secp384r1_addcarryx_u32(&x1257, &x1258, x1256, x1228, x1225);
+ fiat_secp384r1_addcarryx_u32(&x1259, &x1260, x1258, x1226, x1223);
+ fiat_secp384r1_addcarryx_u32(&x1261, &x1262, x1260, x1224, x1221);
+ fiat_secp384r1_addcarryx_u32(&x1263, &x1264, x1262, x1222, x1219);
+ x1265 = (x1264 + x1220);
+ fiat_secp384r1_addcarryx_u32(&x1266, &x1267, 0x0, x1194, x1241);
+ fiat_secp384r1_addcarryx_u32(&x1268, &x1269, x1267, x1196, x1243);
+ fiat_secp384r1_addcarryx_u32(&x1270, &x1271, x1269, x1198, x1245);
+ fiat_secp384r1_addcarryx_u32(&x1272, &x1273, x1271, x1200, x1247);
+ fiat_secp384r1_addcarryx_u32(&x1274, &x1275, x1273, x1202, x1249);
+ fiat_secp384r1_addcarryx_u32(&x1276, &x1277, x1275, x1204, x1251);
+ fiat_secp384r1_addcarryx_u32(&x1278, &x1279, x1277, x1206, x1253);
+ fiat_secp384r1_addcarryx_u32(&x1280, &x1281, x1279, x1208, x1255);
+ fiat_secp384r1_addcarryx_u32(&x1282, &x1283, x1281, x1210, x1257);
+ fiat_secp384r1_addcarryx_u32(&x1284, &x1285, x1283, x1212, x1259);
+ fiat_secp384r1_addcarryx_u32(&x1286, &x1287, x1285, x1214, x1261);
+ fiat_secp384r1_addcarryx_u32(&x1288, &x1289, x1287, x1216, x1263);
+ fiat_secp384r1_addcarryx_u32(&x1290, &x1291, x1289, x1218, x1265);
+ fiat_secp384r1_mulx_u32(&x1292, &x1293, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1294, &x1295, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1296, &x1297, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1298, &x1299, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1300, &x1301, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1302, &x1303, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1304, &x1305, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1306, &x1307, x1266, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1308, &x1309, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1310, &x1311, x1266, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1312, &x1313, 0x0, x1309, x1306);
+ fiat_secp384r1_addcarryx_u32(&x1314, &x1315, x1313, x1307, x1304);
+ fiat_secp384r1_addcarryx_u32(&x1316, &x1317, x1315, x1305, x1302);
+ fiat_secp384r1_addcarryx_u32(&x1318, &x1319, x1317, x1303, x1300);
+ fiat_secp384r1_addcarryx_u32(&x1320, &x1321, x1319, x1301, x1298);
+ fiat_secp384r1_addcarryx_u32(&x1322, &x1323, x1321, x1299, x1296);
+ fiat_secp384r1_addcarryx_u32(&x1324, &x1325, x1323, x1297, x1294);
+ fiat_secp384r1_addcarryx_u32(&x1326, &x1327, x1325, x1295, x1292);
+ x1328 = (x1327 + x1293);
+ fiat_secp384r1_addcarryx_u32(&x1329, &x1330, 0x0, x1266, x1310);
+ fiat_secp384r1_addcarryx_u32(&x1331, &x1332, x1330, x1268, x1311);
+ fiat_secp384r1_addcarryx_u32(&x1333, &x1334, x1332, x1270, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1335, &x1336, x1334, x1272, x1308);
+ fiat_secp384r1_addcarryx_u32(&x1337, &x1338, x1336, x1274, x1312);
+ fiat_secp384r1_addcarryx_u32(&x1339, &x1340, x1338, x1276, x1314);
+ fiat_secp384r1_addcarryx_u32(&x1341, &x1342, x1340, x1278, x1316);
+ fiat_secp384r1_addcarryx_u32(&x1343, &x1344, x1342, x1280, x1318);
+ fiat_secp384r1_addcarryx_u32(&x1345, &x1346, x1344, x1282, x1320);
+ fiat_secp384r1_addcarryx_u32(&x1347, &x1348, x1346, x1284, x1322);
+ fiat_secp384r1_addcarryx_u32(&x1349, &x1350, x1348, x1286, x1324);
+ fiat_secp384r1_addcarryx_u32(&x1351, &x1352, x1350, x1288, x1326);
+ fiat_secp384r1_addcarryx_u32(&x1353, &x1354, x1352, x1290, x1328);
+ x1355 = ((uint32_t)x1354 + x1291);
+ fiat_secp384r1_mulx_u32(&x1356, &x1357, x10, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x1358, &x1359, x10, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x1360, &x1361, x10, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x1362, &x1363, x10, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x1364, &x1365, x10, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x1366, &x1367, x10, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x1368, &x1369, x10, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x1370, &x1371, x10, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x1372, &x1373, x10, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x1374, &x1375, x10, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x1376, &x1377, x10, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x1378, &x1379, x10, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x1380, &x1381, 0x0, x1379, x1376);
+ fiat_secp384r1_addcarryx_u32(&x1382, &x1383, x1381, x1377, x1374);
+ fiat_secp384r1_addcarryx_u32(&x1384, &x1385, x1383, x1375, x1372);
+ fiat_secp384r1_addcarryx_u32(&x1386, &x1387, x1385, x1373, x1370);
+ fiat_secp384r1_addcarryx_u32(&x1388, &x1389, x1387, x1371, x1368);
+ fiat_secp384r1_addcarryx_u32(&x1390, &x1391, x1389, x1369, x1366);
+ fiat_secp384r1_addcarryx_u32(&x1392, &x1393, x1391, x1367, x1364);
+ fiat_secp384r1_addcarryx_u32(&x1394, &x1395, x1393, x1365, x1362);
+ fiat_secp384r1_addcarryx_u32(&x1396, &x1397, x1395, x1363, x1360);
+ fiat_secp384r1_addcarryx_u32(&x1398, &x1399, x1397, x1361, x1358);
+ fiat_secp384r1_addcarryx_u32(&x1400, &x1401, x1399, x1359, x1356);
+ x1402 = (x1401 + x1357);
+ fiat_secp384r1_addcarryx_u32(&x1403, &x1404, 0x0, x1331, x1378);
+ fiat_secp384r1_addcarryx_u32(&x1405, &x1406, x1404, x1333, x1380);
+ fiat_secp384r1_addcarryx_u32(&x1407, &x1408, x1406, x1335, x1382);
+ fiat_secp384r1_addcarryx_u32(&x1409, &x1410, x1408, x1337, x1384);
+ fiat_secp384r1_addcarryx_u32(&x1411, &x1412, x1410, x1339, x1386);
+ fiat_secp384r1_addcarryx_u32(&x1413, &x1414, x1412, x1341, x1388);
+ fiat_secp384r1_addcarryx_u32(&x1415, &x1416, x1414, x1343, x1390);
+ fiat_secp384r1_addcarryx_u32(&x1417, &x1418, x1416, x1345, x1392);
+ fiat_secp384r1_addcarryx_u32(&x1419, &x1420, x1418, x1347, x1394);
+ fiat_secp384r1_addcarryx_u32(&x1421, &x1422, x1420, x1349, x1396);
+ fiat_secp384r1_addcarryx_u32(&x1423, &x1424, x1422, x1351, x1398);
+ fiat_secp384r1_addcarryx_u32(&x1425, &x1426, x1424, x1353, x1400);
+ fiat_secp384r1_addcarryx_u32(&x1427, &x1428, x1426, x1355, x1402);
+ fiat_secp384r1_mulx_u32(&x1429, &x1430, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1431, &x1432, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1433, &x1434, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1435, &x1436, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1437, &x1438, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1439, &x1440, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1441, &x1442, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1443, &x1444, x1403, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1445, &x1446, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1447, &x1448, x1403, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1449, &x1450, 0x0, x1446, x1443);
+ fiat_secp384r1_addcarryx_u32(&x1451, &x1452, x1450, x1444, x1441);
+ fiat_secp384r1_addcarryx_u32(&x1453, &x1454, x1452, x1442, x1439);
+ fiat_secp384r1_addcarryx_u32(&x1455, &x1456, x1454, x1440, x1437);
+ fiat_secp384r1_addcarryx_u32(&x1457, &x1458, x1456, x1438, x1435);
+ fiat_secp384r1_addcarryx_u32(&x1459, &x1460, x1458, x1436, x1433);
+ fiat_secp384r1_addcarryx_u32(&x1461, &x1462, x1460, x1434, x1431);
+ fiat_secp384r1_addcarryx_u32(&x1463, &x1464, x1462, x1432, x1429);
+ x1465 = (x1464 + x1430);
+ fiat_secp384r1_addcarryx_u32(&x1466, &x1467, 0x0, x1403, x1447);
+ fiat_secp384r1_addcarryx_u32(&x1468, &x1469, x1467, x1405, x1448);
+ fiat_secp384r1_addcarryx_u32(&x1470, &x1471, x1469, x1407, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1472, &x1473, x1471, x1409, x1445);
+ fiat_secp384r1_addcarryx_u32(&x1474, &x1475, x1473, x1411, x1449);
+ fiat_secp384r1_addcarryx_u32(&x1476, &x1477, x1475, x1413, x1451);
+ fiat_secp384r1_addcarryx_u32(&x1478, &x1479, x1477, x1415, x1453);
+ fiat_secp384r1_addcarryx_u32(&x1480, &x1481, x1479, x1417, x1455);
+ fiat_secp384r1_addcarryx_u32(&x1482, &x1483, x1481, x1419, x1457);
+ fiat_secp384r1_addcarryx_u32(&x1484, &x1485, x1483, x1421, x1459);
+ fiat_secp384r1_addcarryx_u32(&x1486, &x1487, x1485, x1423, x1461);
+ fiat_secp384r1_addcarryx_u32(&x1488, &x1489, x1487, x1425, x1463);
+ fiat_secp384r1_addcarryx_u32(&x1490, &x1491, x1489, x1427, x1465);
+ x1492 = ((uint32_t)x1491 + x1428);
+ fiat_secp384r1_mulx_u32(&x1493, &x1494, x11, (arg1[11]));
+ fiat_secp384r1_mulx_u32(&x1495, &x1496, x11, (arg1[10]));
+ fiat_secp384r1_mulx_u32(&x1497, &x1498, x11, (arg1[9]));
+ fiat_secp384r1_mulx_u32(&x1499, &x1500, x11, (arg1[8]));
+ fiat_secp384r1_mulx_u32(&x1501, &x1502, x11, (arg1[7]));
+ fiat_secp384r1_mulx_u32(&x1503, &x1504, x11, (arg1[6]));
+ fiat_secp384r1_mulx_u32(&x1505, &x1506, x11, (arg1[5]));
+ fiat_secp384r1_mulx_u32(&x1507, &x1508, x11, (arg1[4]));
+ fiat_secp384r1_mulx_u32(&x1509, &x1510, x11, (arg1[3]));
+ fiat_secp384r1_mulx_u32(&x1511, &x1512, x11, (arg1[2]));
+ fiat_secp384r1_mulx_u32(&x1513, &x1514, x11, (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x1515, &x1516, x11, (arg1[0]));
+ fiat_secp384r1_addcarryx_u32(&x1517, &x1518, 0x0, x1516, x1513);
+ fiat_secp384r1_addcarryx_u32(&x1519, &x1520, x1518, x1514, x1511);
+ fiat_secp384r1_addcarryx_u32(&x1521, &x1522, x1520, x1512, x1509);
+ fiat_secp384r1_addcarryx_u32(&x1523, &x1524, x1522, x1510, x1507);
+ fiat_secp384r1_addcarryx_u32(&x1525, &x1526, x1524, x1508, x1505);
+ fiat_secp384r1_addcarryx_u32(&x1527, &x1528, x1526, x1506, x1503);
+ fiat_secp384r1_addcarryx_u32(&x1529, &x1530, x1528, x1504, x1501);
+ fiat_secp384r1_addcarryx_u32(&x1531, &x1532, x1530, x1502, x1499);
+ fiat_secp384r1_addcarryx_u32(&x1533, &x1534, x1532, x1500, x1497);
+ fiat_secp384r1_addcarryx_u32(&x1535, &x1536, x1534, x1498, x1495);
+ fiat_secp384r1_addcarryx_u32(&x1537, &x1538, x1536, x1496, x1493);
+ x1539 = (x1538 + x1494);
+ fiat_secp384r1_addcarryx_u32(&x1540, &x1541, 0x0, x1468, x1515);
+ fiat_secp384r1_addcarryx_u32(&x1542, &x1543, x1541, x1470, x1517);
+ fiat_secp384r1_addcarryx_u32(&x1544, &x1545, x1543, x1472, x1519);
+ fiat_secp384r1_addcarryx_u32(&x1546, &x1547, x1545, x1474, x1521);
+ fiat_secp384r1_addcarryx_u32(&x1548, &x1549, x1547, x1476, x1523);
+ fiat_secp384r1_addcarryx_u32(&x1550, &x1551, x1549, x1478, x1525);
+ fiat_secp384r1_addcarryx_u32(&x1552, &x1553, x1551, x1480, x1527);
+ fiat_secp384r1_addcarryx_u32(&x1554, &x1555, x1553, x1482, x1529);
+ fiat_secp384r1_addcarryx_u32(&x1556, &x1557, x1555, x1484, x1531);
+ fiat_secp384r1_addcarryx_u32(&x1558, &x1559, x1557, x1486, x1533);
+ fiat_secp384r1_addcarryx_u32(&x1560, &x1561, x1559, x1488, x1535);
+ fiat_secp384r1_addcarryx_u32(&x1562, &x1563, x1561, x1490, x1537);
+ fiat_secp384r1_addcarryx_u32(&x1564, &x1565, x1563, x1492, x1539);
+ fiat_secp384r1_mulx_u32(&x1566, &x1567, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1568, &x1569, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1570, &x1571, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1572, &x1573, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1574, &x1575, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1576, &x1577, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1578, &x1579, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1580, &x1581, x1540, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1582, &x1583, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1584, &x1585, x1540, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1586, &x1587, 0x0, x1583, x1580);
+ fiat_secp384r1_addcarryx_u32(&x1588, &x1589, x1587, x1581, x1578);
+ fiat_secp384r1_addcarryx_u32(&x1590, &x1591, x1589, x1579, x1576);
+ fiat_secp384r1_addcarryx_u32(&x1592, &x1593, x1591, x1577, x1574);
+ fiat_secp384r1_addcarryx_u32(&x1594, &x1595, x1593, x1575, x1572);
+ fiat_secp384r1_addcarryx_u32(&x1596, &x1597, x1595, x1573, x1570);
+ fiat_secp384r1_addcarryx_u32(&x1598, &x1599, x1597, x1571, x1568);
+ fiat_secp384r1_addcarryx_u32(&x1600, &x1601, x1599, x1569, x1566);
+ x1602 = (x1601 + x1567);
+ fiat_secp384r1_addcarryx_u32(&x1603, &x1604, 0x0, x1540, x1584);
+ fiat_secp384r1_addcarryx_u32(&x1605, &x1606, x1604, x1542, x1585);
+ fiat_secp384r1_addcarryx_u32(&x1607, &x1608, x1606, x1544, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1609, &x1610, x1608, x1546, x1582);
+ fiat_secp384r1_addcarryx_u32(&x1611, &x1612, x1610, x1548, x1586);
+ fiat_secp384r1_addcarryx_u32(&x1613, &x1614, x1612, x1550, x1588);
+ fiat_secp384r1_addcarryx_u32(&x1615, &x1616, x1614, x1552, x1590);
+ fiat_secp384r1_addcarryx_u32(&x1617, &x1618, x1616, x1554, x1592);
+ fiat_secp384r1_addcarryx_u32(&x1619, &x1620, x1618, x1556, x1594);
+ fiat_secp384r1_addcarryx_u32(&x1621, &x1622, x1620, x1558, x1596);
+ fiat_secp384r1_addcarryx_u32(&x1623, &x1624, x1622, x1560, x1598);
+ fiat_secp384r1_addcarryx_u32(&x1625, &x1626, x1624, x1562, x1600);
+ fiat_secp384r1_addcarryx_u32(&x1627, &x1628, x1626, x1564, x1602);
+ x1629 = ((uint32_t)x1628 + x1565);
+ fiat_secp384r1_subborrowx_u32(&x1630, &x1631, 0x0, x1605,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1632, &x1633, x1631, x1607, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1634, &x1635, x1633, x1609, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1636, &x1637, x1635, x1611,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1638, &x1639, x1637, x1613,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x1640, &x1641, x1639, x1615,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1642, &x1643, x1641, x1617,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1644, &x1645, x1643, x1619,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1646, &x1647, x1645, x1621,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1648, &x1649, x1647, x1623,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1650, &x1651, x1649, x1625,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1652, &x1653, x1651, x1627,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1654, &x1655, x1653, x1629, 0x0);
+ fiat_secp384r1_cmovznz_u32(&x1656, x1655, x1630, x1605);
+ fiat_secp384r1_cmovznz_u32(&x1657, x1655, x1632, x1607);
+ fiat_secp384r1_cmovznz_u32(&x1658, x1655, x1634, x1609);
+ fiat_secp384r1_cmovznz_u32(&x1659, x1655, x1636, x1611);
+ fiat_secp384r1_cmovznz_u32(&x1660, x1655, x1638, x1613);
+ fiat_secp384r1_cmovznz_u32(&x1661, x1655, x1640, x1615);
+ fiat_secp384r1_cmovznz_u32(&x1662, x1655, x1642, x1617);
+ fiat_secp384r1_cmovznz_u32(&x1663, x1655, x1644, x1619);
+ fiat_secp384r1_cmovznz_u32(&x1664, x1655, x1646, x1621);
+ fiat_secp384r1_cmovznz_u32(&x1665, x1655, x1648, x1623);
+ fiat_secp384r1_cmovznz_u32(&x1666, x1655, x1650, x1625);
+ fiat_secp384r1_cmovznz_u32(&x1667, x1655, x1652, x1627);
+ out1[0] = x1656;
+ out1[1] = x1657;
+ out1[2] = x1658;
+ out1[3] = x1659;
+ out1[4] = x1660;
+ out1[5] = x1661;
+ out1[6] = x1662;
+ out1[7] = x1663;
+ out1[8] = x1664;
+ out1[9] = x1665;
+ out1[10] = x1666;
+ out1[11] = x1667;
+}
+
+/*
+ * The function fiat_secp384r1_add adds two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_add(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint32_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint32_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint32_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint32_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint32_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint32_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint32_t x13;
+ fiat_secp384r1_uint1 x14;
+ uint32_t x15;
+ fiat_secp384r1_uint1 x16;
+ uint32_t x17;
+ fiat_secp384r1_uint1 x18;
+ uint32_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint32_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint32_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint32_t x25;
+ fiat_secp384r1_uint1 x26;
+ uint32_t x27;
+ fiat_secp384r1_uint1 x28;
+ uint32_t x29;
+ fiat_secp384r1_uint1 x30;
+ uint32_t x31;
+ fiat_secp384r1_uint1 x32;
+ uint32_t x33;
+ fiat_secp384r1_uint1 x34;
+ uint32_t x35;
+ fiat_secp384r1_uint1 x36;
+ uint32_t x37;
+ fiat_secp384r1_uint1 x38;
+ uint32_t x39;
+ fiat_secp384r1_uint1 x40;
+ uint32_t x41;
+ fiat_secp384r1_uint1 x42;
+ uint32_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint32_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint32_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint32_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint32_t x51;
+ uint32_t x52;
+ uint32_t x53;
+ uint32_t x54;
+ uint32_t x55;
+ uint32_t x56;
+ uint32_t x57;
+ uint32_t x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ fiat_secp384r1_addcarryx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+ fiat_secp384r1_addcarryx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+ fiat_secp384r1_addcarryx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+ fiat_secp384r1_addcarryx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+ fiat_secp384r1_addcarryx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+ fiat_secp384r1_addcarryx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+ fiat_secp384r1_addcarryx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
+ fiat_secp384r1_addcarryx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
+ fiat_secp384r1_addcarryx_u32(&x17, &x18, x16, (arg1[8]), (arg2[8]));
+ fiat_secp384r1_addcarryx_u32(&x19, &x20, x18, (arg1[9]), (arg2[9]));
+ fiat_secp384r1_addcarryx_u32(&x21, &x22, x20, (arg1[10]), (arg2[10]));
+ fiat_secp384r1_addcarryx_u32(&x23, &x24, x22, (arg1[11]), (arg2[11]));
+ fiat_secp384r1_subborrowx_u32(&x25, &x26, 0x0, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x27, &x28, x26, x3, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x29, &x30, x28, x5, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x31, &x32, x30, x7, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x33, &x34, x32, x9, UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x35, &x36, x34, x11, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x37, &x38, x36, x13, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x39, &x40, x38, x15, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x41, &x42, x40, x17, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x43, &x44, x42, x19, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x45, &x46, x44, x21, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x47, &x48, x46, x23, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x49, &x50, x48, x24, 0x0);
+ fiat_secp384r1_cmovznz_u32(&x51, x50, x25, x1);
+ fiat_secp384r1_cmovznz_u32(&x52, x50, x27, x3);
+ fiat_secp384r1_cmovznz_u32(&x53, x50, x29, x5);
+ fiat_secp384r1_cmovznz_u32(&x54, x50, x31, x7);
+ fiat_secp384r1_cmovznz_u32(&x55, x50, x33, x9);
+ fiat_secp384r1_cmovznz_u32(&x56, x50, x35, x11);
+ fiat_secp384r1_cmovznz_u32(&x57, x50, x37, x13);
+ fiat_secp384r1_cmovznz_u32(&x58, x50, x39, x15);
+ fiat_secp384r1_cmovznz_u32(&x59, x50, x41, x17);
+ fiat_secp384r1_cmovznz_u32(&x60, x50, x43, x19);
+ fiat_secp384r1_cmovznz_u32(&x61, x50, x45, x21);
+ fiat_secp384r1_cmovznz_u32(&x62, x50, x47, x23);
+ out1[0] = x51;
+ out1[1] = x52;
+ out1[2] = x53;
+ out1[3] = x54;
+ out1[4] = x55;
+ out1[5] = x56;
+ out1[6] = x57;
+ out1[7] = x58;
+ out1[8] = x59;
+ out1[9] = x60;
+ out1[10] = x61;
+ out1[11] = x62;
+}
+
+/*
+ * The function fiat_secp384r1_sub subtracts two field elements in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * 0 ≤ eval arg2 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_sub(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1,
+ const fiat_secp384r1_montgomery_domain_field_element arg2)
+{
+ uint32_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint32_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint32_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint32_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint32_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint32_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint32_t x13;
+ fiat_secp384r1_uint1 x14;
+ uint32_t x15;
+ fiat_secp384r1_uint1 x16;
+ uint32_t x17;
+ fiat_secp384r1_uint1 x18;
+ uint32_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint32_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint32_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint32_t x25;
+ uint32_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint32_t x28;
+ fiat_secp384r1_uint1 x29;
+ uint32_t x30;
+ fiat_secp384r1_uint1 x31;
+ uint32_t x32;
+ fiat_secp384r1_uint1 x33;
+ uint32_t x34;
+ fiat_secp384r1_uint1 x35;
+ uint32_t x36;
+ fiat_secp384r1_uint1 x37;
+ uint32_t x38;
+ fiat_secp384r1_uint1 x39;
+ uint32_t x40;
+ fiat_secp384r1_uint1 x41;
+ uint32_t x42;
+ fiat_secp384r1_uint1 x43;
+ uint32_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint32_t x46;
+ fiat_secp384r1_uint1 x47;
+ uint32_t x48;
+ fiat_secp384r1_uint1 x49;
+ fiat_secp384r1_subborrowx_u32(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
+ fiat_secp384r1_subborrowx_u32(&x3, &x4, x2, (arg1[1]), (arg2[1]));
+ fiat_secp384r1_subborrowx_u32(&x5, &x6, x4, (arg1[2]), (arg2[2]));
+ fiat_secp384r1_subborrowx_u32(&x7, &x8, x6, (arg1[3]), (arg2[3]));
+ fiat_secp384r1_subborrowx_u32(&x9, &x10, x8, (arg1[4]), (arg2[4]));
+ fiat_secp384r1_subborrowx_u32(&x11, &x12, x10, (arg1[5]), (arg2[5]));
+ fiat_secp384r1_subborrowx_u32(&x13, &x14, x12, (arg1[6]), (arg2[6]));
+ fiat_secp384r1_subborrowx_u32(&x15, &x16, x14, (arg1[7]), (arg2[7]));
+ fiat_secp384r1_subborrowx_u32(&x17, &x18, x16, (arg1[8]), (arg2[8]));
+ fiat_secp384r1_subborrowx_u32(&x19, &x20, x18, (arg1[9]), (arg2[9]));
+ fiat_secp384r1_subborrowx_u32(&x21, &x22, x20, (arg1[10]), (arg2[10]));
+ fiat_secp384r1_subborrowx_u32(&x23, &x24, x22, (arg1[11]), (arg2[11]));
+ fiat_secp384r1_cmovznz_u32(&x25, x24, 0x0, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x26, &x27, 0x0, x1, x25);
+ fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x3, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x5, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x7, x25);
+ fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x9,
+ (x25 & UINT32_C(0xfffffffe)));
+ fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x11, x25);
+ fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, x13, x25);
+ fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, x15, x25);
+ fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, x17, x25);
+ fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, x19, x25);
+ fiat_secp384r1_addcarryx_u32(&x46, &x47, x45, x21, x25);
+ fiat_secp384r1_addcarryx_u32(&x48, &x49, x47, x23, x25);
+ out1[0] = x26;
+ out1[1] = x28;
+ out1[2] = x30;
+ out1[3] = x32;
+ out1[4] = x34;
+ out1[5] = x36;
+ out1[6] = x38;
+ out1[7] = x40;
+ out1[8] = x42;
+ out1[9] = x44;
+ out1[10] = x46;
+ out1[11] = x48;
+}
+
+/*
+ * The function fiat_secp384r1_opp negates a field element in the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_opp(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint32_t x1;
+ fiat_secp384r1_uint1 x2;
+ uint32_t x3;
+ fiat_secp384r1_uint1 x4;
+ uint32_t x5;
+ fiat_secp384r1_uint1 x6;
+ uint32_t x7;
+ fiat_secp384r1_uint1 x8;
+ uint32_t x9;
+ fiat_secp384r1_uint1 x10;
+ uint32_t x11;
+ fiat_secp384r1_uint1 x12;
+ uint32_t x13;
+ fiat_secp384r1_uint1 x14;
+ uint32_t x15;
+ fiat_secp384r1_uint1 x16;
+ uint32_t x17;
+ fiat_secp384r1_uint1 x18;
+ uint32_t x19;
+ fiat_secp384r1_uint1 x20;
+ uint32_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint32_t x23;
+ fiat_secp384r1_uint1 x24;
+ uint32_t x25;
+ uint32_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint32_t x28;
+ fiat_secp384r1_uint1 x29;
+ uint32_t x30;
+ fiat_secp384r1_uint1 x31;
+ uint32_t x32;
+ fiat_secp384r1_uint1 x33;
+ uint32_t x34;
+ fiat_secp384r1_uint1 x35;
+ uint32_t x36;
+ fiat_secp384r1_uint1 x37;
+ uint32_t x38;
+ fiat_secp384r1_uint1 x39;
+ uint32_t x40;
+ fiat_secp384r1_uint1 x41;
+ uint32_t x42;
+ fiat_secp384r1_uint1 x43;
+ uint32_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint32_t x46;
+ fiat_secp384r1_uint1 x47;
+ uint32_t x48;
+ fiat_secp384r1_uint1 x49;
+ fiat_secp384r1_subborrowx_u32(&x1, &x2, 0x0, 0x0, (arg1[0]));
+ fiat_secp384r1_subborrowx_u32(&x3, &x4, x2, 0x0, (arg1[1]));
+ fiat_secp384r1_subborrowx_u32(&x5, &x6, x4, 0x0, (arg1[2]));
+ fiat_secp384r1_subborrowx_u32(&x7, &x8, x6, 0x0, (arg1[3]));
+ fiat_secp384r1_subborrowx_u32(&x9, &x10, x8, 0x0, (arg1[4]));
+ fiat_secp384r1_subborrowx_u32(&x11, &x12, x10, 0x0, (arg1[5]));
+ fiat_secp384r1_subborrowx_u32(&x13, &x14, x12, 0x0, (arg1[6]));
+ fiat_secp384r1_subborrowx_u32(&x15, &x16, x14, 0x0, (arg1[7]));
+ fiat_secp384r1_subborrowx_u32(&x17, &x18, x16, 0x0, (arg1[8]));
+ fiat_secp384r1_subborrowx_u32(&x19, &x20, x18, 0x0, (arg1[9]));
+ fiat_secp384r1_subborrowx_u32(&x21, &x22, x20, 0x0, (arg1[10]));
+ fiat_secp384r1_subborrowx_u32(&x23, &x24, x22, 0x0, (arg1[11]));
+ fiat_secp384r1_cmovznz_u32(&x25, x24, 0x0, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x26, &x27, 0x0, x1, x25);
+ fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x3, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x5, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x7, x25);
+ fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x9,
+ (x25 & UINT32_C(0xfffffffe)));
+ fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x11, x25);
+ fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, x13, x25);
+ fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, x15, x25);
+ fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, x17, x25);
+ fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, x19, x25);
+ fiat_secp384r1_addcarryx_u32(&x46, &x47, x45, x21, x25);
+ fiat_secp384r1_addcarryx_u32(&x48, &x49, x47, x23, x25);
+ out1[0] = x26;
+ out1[1] = x28;
+ out1[2] = x30;
+ out1[3] = x32;
+ out1[4] = x34;
+ out1[5] = x36;
+ out1[6] = x38;
+ out1[7] = x40;
+ out1[8] = x42;
+ out1[9] = x44;
+ out1[10] = x46;
+ out1[11] = x48;
+}
+
+/*
+ * The function fiat_secp384r1_from_montgomery translates a field element out of the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * ((2^32)⁻¹ mod m)^12) mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_from_montgomery(
+ fiat_secp384r1_non_montgomery_domain_field_element out1,
+ const fiat_secp384r1_montgomery_domain_field_element arg1)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint32_t x24;
+ fiat_secp384r1_uint1 x25;
+ uint32_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint32_t x28;
+ fiat_secp384r1_uint1 x29;
+ uint32_t x30;
+ fiat_secp384r1_uint1 x31;
+ uint32_t x32;
+ fiat_secp384r1_uint1 x33;
+ uint32_t x34;
+ fiat_secp384r1_uint1 x35;
+ uint32_t x36;
+ fiat_secp384r1_uint1 x37;
+ uint32_t x38;
+ fiat_secp384r1_uint1 x39;
+ uint32_t x40;
+ fiat_secp384r1_uint1 x41;
+ uint32_t x42;
+ uint32_t x43;
+ uint32_t x44;
+ uint32_t x45;
+ uint32_t x46;
+ uint32_t x47;
+ uint32_t x48;
+ uint32_t x49;
+ uint32_t x50;
+ uint32_t x51;
+ uint32_t x52;
+ uint32_t x53;
+ uint32_t x54;
+ uint32_t x55;
+ uint32_t x56;
+ uint32_t x57;
+ uint32_t x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ fiat_secp384r1_uint1 x63;
+ uint32_t x64;
+ fiat_secp384r1_uint1 x65;
+ uint32_t x66;
+ fiat_secp384r1_uint1 x67;
+ uint32_t x68;
+ fiat_secp384r1_uint1 x69;
+ uint32_t x70;
+ fiat_secp384r1_uint1 x71;
+ uint32_t x72;
+ fiat_secp384r1_uint1 x73;
+ uint32_t x74;
+ fiat_secp384r1_uint1 x75;
+ uint32_t x76;
+ fiat_secp384r1_uint1 x77;
+ uint32_t x78;
+ fiat_secp384r1_uint1 x79;
+ uint32_t x80;
+ fiat_secp384r1_uint1 x81;
+ uint32_t x82;
+ fiat_secp384r1_uint1 x83;
+ uint32_t x84;
+ fiat_secp384r1_uint1 x85;
+ uint32_t x86;
+ fiat_secp384r1_uint1 x87;
+ uint32_t x88;
+ fiat_secp384r1_uint1 x89;
+ uint32_t x90;
+ fiat_secp384r1_uint1 x91;
+ uint32_t x92;
+ fiat_secp384r1_uint1 x93;
+ uint32_t x94;
+ fiat_secp384r1_uint1 x95;
+ uint32_t x96;
+ fiat_secp384r1_uint1 x97;
+ uint32_t x98;
+ fiat_secp384r1_uint1 x99;
+ uint32_t x100;
+ fiat_secp384r1_uint1 x101;
+ uint32_t x102;
+ fiat_secp384r1_uint1 x103;
+ uint32_t x104;
+ fiat_secp384r1_uint1 x105;
+ uint32_t x106;
+ fiat_secp384r1_uint1 x107;
+ uint32_t x108;
+ fiat_secp384r1_uint1 x109;
+ uint32_t x110;
+ fiat_secp384r1_uint1 x111;
+ uint32_t x112;
+ fiat_secp384r1_uint1 x113;
+ uint32_t x114;
+ fiat_secp384r1_uint1 x115;
+ uint32_t x116;
+ fiat_secp384r1_uint1 x117;
+ uint32_t x118;
+ fiat_secp384r1_uint1 x119;
+ uint32_t x120;
+ fiat_secp384r1_uint1 x121;
+ uint32_t x122;
+ fiat_secp384r1_uint1 x123;
+ uint32_t x124;
+ fiat_secp384r1_uint1 x125;
+ uint32_t x126;
+ fiat_secp384r1_uint1 x127;
+ uint32_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint32_t x133;
+ uint32_t x134;
+ uint32_t x135;
+ uint32_t x136;
+ uint32_t x137;
+ uint32_t x138;
+ uint32_t x139;
+ uint32_t x140;
+ uint32_t x141;
+ uint32_t x142;
+ uint32_t x143;
+ uint32_t x144;
+ uint32_t x145;
+ uint32_t x146;
+ uint32_t x147;
+ uint32_t x148;
+ fiat_secp384r1_uint1 x149;
+ uint32_t x150;
+ fiat_secp384r1_uint1 x151;
+ uint32_t x152;
+ fiat_secp384r1_uint1 x153;
+ uint32_t x154;
+ fiat_secp384r1_uint1 x155;
+ uint32_t x156;
+ fiat_secp384r1_uint1 x157;
+ uint32_t x158;
+ fiat_secp384r1_uint1 x159;
+ uint32_t x160;
+ fiat_secp384r1_uint1 x161;
+ uint32_t x162;
+ fiat_secp384r1_uint1 x163;
+ uint32_t x164;
+ fiat_secp384r1_uint1 x165;
+ uint32_t x166;
+ fiat_secp384r1_uint1 x167;
+ uint32_t x168;
+ fiat_secp384r1_uint1 x169;
+ uint32_t x170;
+ fiat_secp384r1_uint1 x171;
+ uint32_t x172;
+ fiat_secp384r1_uint1 x173;
+ uint32_t x174;
+ fiat_secp384r1_uint1 x175;
+ uint32_t x176;
+ fiat_secp384r1_uint1 x177;
+ uint32_t x178;
+ fiat_secp384r1_uint1 x179;
+ uint32_t x180;
+ fiat_secp384r1_uint1 x181;
+ uint32_t x182;
+ fiat_secp384r1_uint1 x183;
+ uint32_t x184;
+ fiat_secp384r1_uint1 x185;
+ uint32_t x186;
+ fiat_secp384r1_uint1 x187;
+ uint32_t x188;
+ fiat_secp384r1_uint1 x189;
+ uint32_t x190;
+ fiat_secp384r1_uint1 x191;
+ uint32_t x192;
+ fiat_secp384r1_uint1 x193;
+ uint32_t x194;
+ fiat_secp384r1_uint1 x195;
+ uint32_t x196;
+ fiat_secp384r1_uint1 x197;
+ uint32_t x198;
+ fiat_secp384r1_uint1 x199;
+ uint32_t x200;
+ fiat_secp384r1_uint1 x201;
+ uint32_t x202;
+ fiat_secp384r1_uint1 x203;
+ uint32_t x204;
+ fiat_secp384r1_uint1 x205;
+ uint32_t x206;
+ fiat_secp384r1_uint1 x207;
+ uint32_t x208;
+ fiat_secp384r1_uint1 x209;
+ uint32_t x210;
+ fiat_secp384r1_uint1 x211;
+ uint32_t x212;
+ fiat_secp384r1_uint1 x213;
+ uint32_t x214;
+ uint32_t x215;
+ uint32_t x216;
+ uint32_t x217;
+ uint32_t x218;
+ uint32_t x219;
+ uint32_t x220;
+ uint32_t x221;
+ uint32_t x222;
+ uint32_t x223;
+ uint32_t x224;
+ uint32_t x225;
+ uint32_t x226;
+ uint32_t x227;
+ uint32_t x228;
+ uint32_t x229;
+ uint32_t x230;
+ uint32_t x231;
+ uint32_t x232;
+ uint32_t x233;
+ uint32_t x234;
+ fiat_secp384r1_uint1 x235;
+ uint32_t x236;
+ fiat_secp384r1_uint1 x237;
+ uint32_t x238;
+ fiat_secp384r1_uint1 x239;
+ uint32_t x240;
+ fiat_secp384r1_uint1 x241;
+ uint32_t x242;
+ fiat_secp384r1_uint1 x243;
+ uint32_t x244;
+ fiat_secp384r1_uint1 x245;
+ uint32_t x246;
+ fiat_secp384r1_uint1 x247;
+ uint32_t x248;
+ fiat_secp384r1_uint1 x249;
+ uint32_t x250;
+ fiat_secp384r1_uint1 x251;
+ uint32_t x252;
+ fiat_secp384r1_uint1 x253;
+ uint32_t x254;
+ fiat_secp384r1_uint1 x255;
+ uint32_t x256;
+ fiat_secp384r1_uint1 x257;
+ uint32_t x258;
+ fiat_secp384r1_uint1 x259;
+ uint32_t x260;
+ fiat_secp384r1_uint1 x261;
+ uint32_t x262;
+ fiat_secp384r1_uint1 x263;
+ uint32_t x264;
+ fiat_secp384r1_uint1 x265;
+ uint32_t x266;
+ fiat_secp384r1_uint1 x267;
+ uint32_t x268;
+ fiat_secp384r1_uint1 x269;
+ uint32_t x270;
+ fiat_secp384r1_uint1 x271;
+ uint32_t x272;
+ fiat_secp384r1_uint1 x273;
+ uint32_t x274;
+ fiat_secp384r1_uint1 x275;
+ uint32_t x276;
+ fiat_secp384r1_uint1 x277;
+ uint32_t x278;
+ fiat_secp384r1_uint1 x279;
+ uint32_t x280;
+ fiat_secp384r1_uint1 x281;
+ uint32_t x282;
+ fiat_secp384r1_uint1 x283;
+ uint32_t x284;
+ fiat_secp384r1_uint1 x285;
+ uint32_t x286;
+ fiat_secp384r1_uint1 x287;
+ uint32_t x288;
+ fiat_secp384r1_uint1 x289;
+ uint32_t x290;
+ fiat_secp384r1_uint1 x291;
+ uint32_t x292;
+ fiat_secp384r1_uint1 x293;
+ uint32_t x294;
+ fiat_secp384r1_uint1 x295;
+ uint32_t x296;
+ fiat_secp384r1_uint1 x297;
+ uint32_t x298;
+ fiat_secp384r1_uint1 x299;
+ uint32_t x300;
+ uint32_t x301;
+ uint32_t x302;
+ uint32_t x303;
+ uint32_t x304;
+ uint32_t x305;
+ uint32_t x306;
+ uint32_t x307;
+ uint32_t x308;
+ uint32_t x309;
+ uint32_t x310;
+ uint32_t x311;
+ uint32_t x312;
+ uint32_t x313;
+ uint32_t x314;
+ uint32_t x315;
+ uint32_t x316;
+ uint32_t x317;
+ uint32_t x318;
+ uint32_t x319;
+ uint32_t x320;
+ fiat_secp384r1_uint1 x321;
+ uint32_t x322;
+ fiat_secp384r1_uint1 x323;
+ uint32_t x324;
+ fiat_secp384r1_uint1 x325;
+ uint32_t x326;
+ fiat_secp384r1_uint1 x327;
+ uint32_t x328;
+ fiat_secp384r1_uint1 x329;
+ uint32_t x330;
+ fiat_secp384r1_uint1 x331;
+ uint32_t x332;
+ fiat_secp384r1_uint1 x333;
+ uint32_t x334;
+ fiat_secp384r1_uint1 x335;
+ uint32_t x336;
+ fiat_secp384r1_uint1 x337;
+ uint32_t x338;
+ fiat_secp384r1_uint1 x339;
+ uint32_t x340;
+ fiat_secp384r1_uint1 x341;
+ uint32_t x342;
+ fiat_secp384r1_uint1 x343;
+ uint32_t x344;
+ fiat_secp384r1_uint1 x345;
+ uint32_t x346;
+ fiat_secp384r1_uint1 x347;
+ uint32_t x348;
+ fiat_secp384r1_uint1 x349;
+ uint32_t x350;
+ fiat_secp384r1_uint1 x351;
+ uint32_t x352;
+ fiat_secp384r1_uint1 x353;
+ uint32_t x354;
+ fiat_secp384r1_uint1 x355;
+ uint32_t x356;
+ fiat_secp384r1_uint1 x357;
+ uint32_t x358;
+ fiat_secp384r1_uint1 x359;
+ uint32_t x360;
+ fiat_secp384r1_uint1 x361;
+ uint32_t x362;
+ fiat_secp384r1_uint1 x363;
+ uint32_t x364;
+ fiat_secp384r1_uint1 x365;
+ uint32_t x366;
+ fiat_secp384r1_uint1 x367;
+ uint32_t x368;
+ fiat_secp384r1_uint1 x369;
+ uint32_t x370;
+ fiat_secp384r1_uint1 x371;
+ uint32_t x372;
+ fiat_secp384r1_uint1 x373;
+ uint32_t x374;
+ fiat_secp384r1_uint1 x375;
+ uint32_t x376;
+ fiat_secp384r1_uint1 x377;
+ uint32_t x378;
+ fiat_secp384r1_uint1 x379;
+ uint32_t x380;
+ fiat_secp384r1_uint1 x381;
+ uint32_t x382;
+ fiat_secp384r1_uint1 x383;
+ uint32_t x384;
+ fiat_secp384r1_uint1 x385;
+ uint32_t x386;
+ uint32_t x387;
+ uint32_t x388;
+ uint32_t x389;
+ uint32_t x390;
+ uint32_t x391;
+ uint32_t x392;
+ uint32_t x393;
+ uint32_t x394;
+ uint32_t x395;
+ uint32_t x396;
+ uint32_t x397;
+ uint32_t x398;
+ uint32_t x399;
+ uint32_t x400;
+ uint32_t x401;
+ uint32_t x402;
+ uint32_t x403;
+ uint32_t x404;
+ uint32_t x405;
+ uint32_t x406;
+ fiat_secp384r1_uint1 x407;
+ uint32_t x408;
+ fiat_secp384r1_uint1 x409;
+ uint32_t x410;
+ fiat_secp384r1_uint1 x411;
+ uint32_t x412;
+ fiat_secp384r1_uint1 x413;
+ uint32_t x414;
+ fiat_secp384r1_uint1 x415;
+ uint32_t x416;
+ fiat_secp384r1_uint1 x417;
+ uint32_t x418;
+ fiat_secp384r1_uint1 x419;
+ uint32_t x420;
+ fiat_secp384r1_uint1 x421;
+ uint32_t x422;
+ fiat_secp384r1_uint1 x423;
+ uint32_t x424;
+ fiat_secp384r1_uint1 x425;
+ uint32_t x426;
+ fiat_secp384r1_uint1 x427;
+ uint32_t x428;
+ fiat_secp384r1_uint1 x429;
+ uint32_t x430;
+ fiat_secp384r1_uint1 x431;
+ uint32_t x432;
+ fiat_secp384r1_uint1 x433;
+ uint32_t x434;
+ fiat_secp384r1_uint1 x435;
+ uint32_t x436;
+ fiat_secp384r1_uint1 x437;
+ uint32_t x438;
+ fiat_secp384r1_uint1 x439;
+ uint32_t x440;
+ fiat_secp384r1_uint1 x441;
+ uint32_t x442;
+ fiat_secp384r1_uint1 x443;
+ uint32_t x444;
+ fiat_secp384r1_uint1 x445;
+ uint32_t x446;
+ fiat_secp384r1_uint1 x447;
+ uint32_t x448;
+ fiat_secp384r1_uint1 x449;
+ uint32_t x450;
+ fiat_secp384r1_uint1 x451;
+ uint32_t x452;
+ fiat_secp384r1_uint1 x453;
+ uint32_t x454;
+ fiat_secp384r1_uint1 x455;
+ uint32_t x456;
+ fiat_secp384r1_uint1 x457;
+ uint32_t x458;
+ fiat_secp384r1_uint1 x459;
+ uint32_t x460;
+ fiat_secp384r1_uint1 x461;
+ uint32_t x462;
+ fiat_secp384r1_uint1 x463;
+ uint32_t x464;
+ fiat_secp384r1_uint1 x465;
+ uint32_t x466;
+ fiat_secp384r1_uint1 x467;
+ uint32_t x468;
+ fiat_secp384r1_uint1 x469;
+ uint32_t x470;
+ fiat_secp384r1_uint1 x471;
+ uint32_t x472;
+ uint32_t x473;
+ uint32_t x474;
+ uint32_t x475;
+ uint32_t x476;
+ uint32_t x477;
+ uint32_t x478;
+ uint32_t x479;
+ uint32_t x480;
+ uint32_t x481;
+ uint32_t x482;
+ uint32_t x483;
+ uint32_t x484;
+ uint32_t x485;
+ uint32_t x486;
+ uint32_t x487;
+ uint32_t x488;
+ uint32_t x489;
+ uint32_t x490;
+ uint32_t x491;
+ uint32_t x492;
+ fiat_secp384r1_uint1 x493;
+ uint32_t x494;
+ fiat_secp384r1_uint1 x495;
+ uint32_t x496;
+ fiat_secp384r1_uint1 x497;
+ uint32_t x498;
+ fiat_secp384r1_uint1 x499;
+ uint32_t x500;
+ fiat_secp384r1_uint1 x501;
+ uint32_t x502;
+ fiat_secp384r1_uint1 x503;
+ uint32_t x504;
+ fiat_secp384r1_uint1 x505;
+ uint32_t x506;
+ fiat_secp384r1_uint1 x507;
+ uint32_t x508;
+ fiat_secp384r1_uint1 x509;
+ uint32_t x510;
+ fiat_secp384r1_uint1 x511;
+ uint32_t x512;
+ fiat_secp384r1_uint1 x513;
+ uint32_t x514;
+ fiat_secp384r1_uint1 x515;
+ uint32_t x516;
+ fiat_secp384r1_uint1 x517;
+ uint32_t x518;
+ fiat_secp384r1_uint1 x519;
+ uint32_t x520;
+ fiat_secp384r1_uint1 x521;
+ uint32_t x522;
+ fiat_secp384r1_uint1 x523;
+ uint32_t x524;
+ fiat_secp384r1_uint1 x525;
+ uint32_t x526;
+ fiat_secp384r1_uint1 x527;
+ uint32_t x528;
+ fiat_secp384r1_uint1 x529;
+ uint32_t x530;
+ fiat_secp384r1_uint1 x531;
+ uint32_t x532;
+ fiat_secp384r1_uint1 x533;
+ uint32_t x534;
+ fiat_secp384r1_uint1 x535;
+ uint32_t x536;
+ fiat_secp384r1_uint1 x537;
+ uint32_t x538;
+ fiat_secp384r1_uint1 x539;
+ uint32_t x540;
+ fiat_secp384r1_uint1 x541;
+ uint32_t x542;
+ fiat_secp384r1_uint1 x543;
+ uint32_t x544;
+ fiat_secp384r1_uint1 x545;
+ uint32_t x546;
+ fiat_secp384r1_uint1 x547;
+ uint32_t x548;
+ fiat_secp384r1_uint1 x549;
+ uint32_t x550;
+ fiat_secp384r1_uint1 x551;
+ uint32_t x552;
+ fiat_secp384r1_uint1 x553;
+ uint32_t x554;
+ fiat_secp384r1_uint1 x555;
+ uint32_t x556;
+ fiat_secp384r1_uint1 x557;
+ uint32_t x558;
+ uint32_t x559;
+ uint32_t x560;
+ uint32_t x561;
+ uint32_t x562;
+ uint32_t x563;
+ uint32_t x564;
+ uint32_t x565;
+ uint32_t x566;
+ uint32_t x567;
+ uint32_t x568;
+ uint32_t x569;
+ uint32_t x570;
+ uint32_t x571;
+ uint32_t x572;
+ uint32_t x573;
+ uint32_t x574;
+ uint32_t x575;
+ uint32_t x576;
+ uint32_t x577;
+ uint32_t x578;
+ fiat_secp384r1_uint1 x579;
+ uint32_t x580;
+ fiat_secp384r1_uint1 x581;
+ uint32_t x582;
+ fiat_secp384r1_uint1 x583;
+ uint32_t x584;
+ fiat_secp384r1_uint1 x585;
+ uint32_t x586;
+ fiat_secp384r1_uint1 x587;
+ uint32_t x588;
+ fiat_secp384r1_uint1 x589;
+ uint32_t x590;
+ fiat_secp384r1_uint1 x591;
+ uint32_t x592;
+ fiat_secp384r1_uint1 x593;
+ uint32_t x594;
+ fiat_secp384r1_uint1 x595;
+ uint32_t x596;
+ fiat_secp384r1_uint1 x597;
+ uint32_t x598;
+ fiat_secp384r1_uint1 x599;
+ uint32_t x600;
+ fiat_secp384r1_uint1 x601;
+ uint32_t x602;
+ fiat_secp384r1_uint1 x603;
+ uint32_t x604;
+ fiat_secp384r1_uint1 x605;
+ uint32_t x606;
+ fiat_secp384r1_uint1 x607;
+ uint32_t x608;
+ fiat_secp384r1_uint1 x609;
+ uint32_t x610;
+ fiat_secp384r1_uint1 x611;
+ uint32_t x612;
+ fiat_secp384r1_uint1 x613;
+ uint32_t x614;
+ fiat_secp384r1_uint1 x615;
+ uint32_t x616;
+ fiat_secp384r1_uint1 x617;
+ uint32_t x618;
+ fiat_secp384r1_uint1 x619;
+ uint32_t x620;
+ fiat_secp384r1_uint1 x621;
+ uint32_t x622;
+ fiat_secp384r1_uint1 x623;
+ uint32_t x624;
+ fiat_secp384r1_uint1 x625;
+ uint32_t x626;
+ fiat_secp384r1_uint1 x627;
+ uint32_t x628;
+ fiat_secp384r1_uint1 x629;
+ uint32_t x630;
+ fiat_secp384r1_uint1 x631;
+ uint32_t x632;
+ fiat_secp384r1_uint1 x633;
+ uint32_t x634;
+ fiat_secp384r1_uint1 x635;
+ uint32_t x636;
+ fiat_secp384r1_uint1 x637;
+ uint32_t x638;
+ fiat_secp384r1_uint1 x639;
+ uint32_t x640;
+ fiat_secp384r1_uint1 x641;
+ uint32_t x642;
+ fiat_secp384r1_uint1 x643;
+ uint32_t x644;
+ uint32_t x645;
+ uint32_t x646;
+ uint32_t x647;
+ uint32_t x648;
+ uint32_t x649;
+ uint32_t x650;
+ uint32_t x651;
+ uint32_t x652;
+ uint32_t x653;
+ uint32_t x654;
+ uint32_t x655;
+ uint32_t x656;
+ uint32_t x657;
+ uint32_t x658;
+ uint32_t x659;
+ uint32_t x660;
+ uint32_t x661;
+ uint32_t x662;
+ uint32_t x663;
+ uint32_t x664;
+ fiat_secp384r1_uint1 x665;
+ uint32_t x666;
+ fiat_secp384r1_uint1 x667;
+ uint32_t x668;
+ fiat_secp384r1_uint1 x669;
+ uint32_t x670;
+ fiat_secp384r1_uint1 x671;
+ uint32_t x672;
+ fiat_secp384r1_uint1 x673;
+ uint32_t x674;
+ fiat_secp384r1_uint1 x675;
+ uint32_t x676;
+ fiat_secp384r1_uint1 x677;
+ uint32_t x678;
+ fiat_secp384r1_uint1 x679;
+ uint32_t x680;
+ fiat_secp384r1_uint1 x681;
+ uint32_t x682;
+ fiat_secp384r1_uint1 x683;
+ uint32_t x684;
+ fiat_secp384r1_uint1 x685;
+ uint32_t x686;
+ fiat_secp384r1_uint1 x687;
+ uint32_t x688;
+ fiat_secp384r1_uint1 x689;
+ uint32_t x690;
+ fiat_secp384r1_uint1 x691;
+ uint32_t x692;
+ fiat_secp384r1_uint1 x693;
+ uint32_t x694;
+ fiat_secp384r1_uint1 x695;
+ uint32_t x696;
+ fiat_secp384r1_uint1 x697;
+ uint32_t x698;
+ fiat_secp384r1_uint1 x699;
+ uint32_t x700;
+ fiat_secp384r1_uint1 x701;
+ uint32_t x702;
+ fiat_secp384r1_uint1 x703;
+ uint32_t x704;
+ fiat_secp384r1_uint1 x705;
+ uint32_t x706;
+ fiat_secp384r1_uint1 x707;
+ uint32_t x708;
+ fiat_secp384r1_uint1 x709;
+ uint32_t x710;
+ fiat_secp384r1_uint1 x711;
+ uint32_t x712;
+ fiat_secp384r1_uint1 x713;
+ uint32_t x714;
+ fiat_secp384r1_uint1 x715;
+ uint32_t x716;
+ fiat_secp384r1_uint1 x717;
+ uint32_t x718;
+ fiat_secp384r1_uint1 x719;
+ uint32_t x720;
+ fiat_secp384r1_uint1 x721;
+ uint32_t x722;
+ fiat_secp384r1_uint1 x723;
+ uint32_t x724;
+ fiat_secp384r1_uint1 x725;
+ uint32_t x726;
+ fiat_secp384r1_uint1 x727;
+ uint32_t x728;
+ fiat_secp384r1_uint1 x729;
+ uint32_t x730;
+ uint32_t x731;
+ uint32_t x732;
+ uint32_t x733;
+ uint32_t x734;
+ uint32_t x735;
+ uint32_t x736;
+ uint32_t x737;
+ uint32_t x738;
+ uint32_t x739;
+ uint32_t x740;
+ uint32_t x741;
+ uint32_t x742;
+ uint32_t x743;
+ uint32_t x744;
+ uint32_t x745;
+ uint32_t x746;
+ uint32_t x747;
+ uint32_t x748;
+ uint32_t x749;
+ uint32_t x750;
+ fiat_secp384r1_uint1 x751;
+ uint32_t x752;
+ fiat_secp384r1_uint1 x753;
+ uint32_t x754;
+ fiat_secp384r1_uint1 x755;
+ uint32_t x756;
+ fiat_secp384r1_uint1 x757;
+ uint32_t x758;
+ fiat_secp384r1_uint1 x759;
+ uint32_t x760;
+ fiat_secp384r1_uint1 x761;
+ uint32_t x762;
+ fiat_secp384r1_uint1 x763;
+ uint32_t x764;
+ fiat_secp384r1_uint1 x765;
+ uint32_t x766;
+ fiat_secp384r1_uint1 x767;
+ uint32_t x768;
+ fiat_secp384r1_uint1 x769;
+ uint32_t x770;
+ fiat_secp384r1_uint1 x771;
+ uint32_t x772;
+ fiat_secp384r1_uint1 x773;
+ uint32_t x774;
+ fiat_secp384r1_uint1 x775;
+ uint32_t x776;
+ fiat_secp384r1_uint1 x777;
+ uint32_t x778;
+ fiat_secp384r1_uint1 x779;
+ uint32_t x780;
+ fiat_secp384r1_uint1 x781;
+ uint32_t x782;
+ fiat_secp384r1_uint1 x783;
+ uint32_t x784;
+ fiat_secp384r1_uint1 x785;
+ uint32_t x786;
+ fiat_secp384r1_uint1 x787;
+ uint32_t x788;
+ fiat_secp384r1_uint1 x789;
+ uint32_t x790;
+ fiat_secp384r1_uint1 x791;
+ uint32_t x792;
+ fiat_secp384r1_uint1 x793;
+ uint32_t x794;
+ fiat_secp384r1_uint1 x795;
+ uint32_t x796;
+ fiat_secp384r1_uint1 x797;
+ uint32_t x798;
+ fiat_secp384r1_uint1 x799;
+ uint32_t x800;
+ fiat_secp384r1_uint1 x801;
+ uint32_t x802;
+ fiat_secp384r1_uint1 x803;
+ uint32_t x804;
+ fiat_secp384r1_uint1 x805;
+ uint32_t x806;
+ fiat_secp384r1_uint1 x807;
+ uint32_t x808;
+ fiat_secp384r1_uint1 x809;
+ uint32_t x810;
+ fiat_secp384r1_uint1 x811;
+ uint32_t x812;
+ fiat_secp384r1_uint1 x813;
+ uint32_t x814;
+ fiat_secp384r1_uint1 x815;
+ uint32_t x816;
+ uint32_t x817;
+ uint32_t x818;
+ uint32_t x819;
+ uint32_t x820;
+ uint32_t x821;
+ uint32_t x822;
+ uint32_t x823;
+ uint32_t x824;
+ uint32_t x825;
+ uint32_t x826;
+ uint32_t x827;
+ uint32_t x828;
+ uint32_t x829;
+ uint32_t x830;
+ uint32_t x831;
+ uint32_t x832;
+ uint32_t x833;
+ uint32_t x834;
+ uint32_t x835;
+ uint32_t x836;
+ fiat_secp384r1_uint1 x837;
+ uint32_t x838;
+ fiat_secp384r1_uint1 x839;
+ uint32_t x840;
+ fiat_secp384r1_uint1 x841;
+ uint32_t x842;
+ fiat_secp384r1_uint1 x843;
+ uint32_t x844;
+ fiat_secp384r1_uint1 x845;
+ uint32_t x846;
+ fiat_secp384r1_uint1 x847;
+ uint32_t x848;
+ fiat_secp384r1_uint1 x849;
+ uint32_t x850;
+ fiat_secp384r1_uint1 x851;
+ uint32_t x852;
+ fiat_secp384r1_uint1 x853;
+ uint32_t x854;
+ fiat_secp384r1_uint1 x855;
+ uint32_t x856;
+ fiat_secp384r1_uint1 x857;
+ uint32_t x858;
+ fiat_secp384r1_uint1 x859;
+ uint32_t x860;
+ fiat_secp384r1_uint1 x861;
+ uint32_t x862;
+ fiat_secp384r1_uint1 x863;
+ uint32_t x864;
+ fiat_secp384r1_uint1 x865;
+ uint32_t x866;
+ fiat_secp384r1_uint1 x867;
+ uint32_t x868;
+ fiat_secp384r1_uint1 x869;
+ uint32_t x870;
+ fiat_secp384r1_uint1 x871;
+ uint32_t x872;
+ fiat_secp384r1_uint1 x873;
+ uint32_t x874;
+ fiat_secp384r1_uint1 x875;
+ uint32_t x876;
+ fiat_secp384r1_uint1 x877;
+ uint32_t x878;
+ fiat_secp384r1_uint1 x879;
+ uint32_t x880;
+ fiat_secp384r1_uint1 x881;
+ uint32_t x882;
+ fiat_secp384r1_uint1 x883;
+ uint32_t x884;
+ fiat_secp384r1_uint1 x885;
+ uint32_t x886;
+ fiat_secp384r1_uint1 x887;
+ uint32_t x888;
+ fiat_secp384r1_uint1 x889;
+ uint32_t x890;
+ fiat_secp384r1_uint1 x891;
+ uint32_t x892;
+ fiat_secp384r1_uint1 x893;
+ uint32_t x894;
+ fiat_secp384r1_uint1 x895;
+ uint32_t x896;
+ fiat_secp384r1_uint1 x897;
+ uint32_t x898;
+ fiat_secp384r1_uint1 x899;
+ uint32_t x900;
+ fiat_secp384r1_uint1 x901;
+ uint32_t x902;
+ uint32_t x903;
+ uint32_t x904;
+ uint32_t x905;
+ uint32_t x906;
+ uint32_t x907;
+ uint32_t x908;
+ uint32_t x909;
+ uint32_t x910;
+ uint32_t x911;
+ uint32_t x912;
+ uint32_t x913;
+ uint32_t x914;
+ uint32_t x915;
+ uint32_t x916;
+ uint32_t x917;
+ uint32_t x918;
+ uint32_t x919;
+ uint32_t x920;
+ uint32_t x921;
+ uint32_t x922;
+ fiat_secp384r1_uint1 x923;
+ uint32_t x924;
+ fiat_secp384r1_uint1 x925;
+ uint32_t x926;
+ fiat_secp384r1_uint1 x927;
+ uint32_t x928;
+ fiat_secp384r1_uint1 x929;
+ uint32_t x930;
+ fiat_secp384r1_uint1 x931;
+ uint32_t x932;
+ fiat_secp384r1_uint1 x933;
+ uint32_t x934;
+ fiat_secp384r1_uint1 x935;
+ uint32_t x936;
+ fiat_secp384r1_uint1 x937;
+ uint32_t x938;
+ fiat_secp384r1_uint1 x939;
+ uint32_t x940;
+ fiat_secp384r1_uint1 x941;
+ uint32_t x942;
+ fiat_secp384r1_uint1 x943;
+ uint32_t x944;
+ fiat_secp384r1_uint1 x945;
+ uint32_t x946;
+ fiat_secp384r1_uint1 x947;
+ uint32_t x948;
+ fiat_secp384r1_uint1 x949;
+ uint32_t x950;
+ fiat_secp384r1_uint1 x951;
+ uint32_t x952;
+ fiat_secp384r1_uint1 x953;
+ uint32_t x954;
+ fiat_secp384r1_uint1 x955;
+ uint32_t x956;
+ fiat_secp384r1_uint1 x957;
+ uint32_t x958;
+ fiat_secp384r1_uint1 x959;
+ uint32_t x960;
+ fiat_secp384r1_uint1 x961;
+ uint32_t x962;
+ fiat_secp384r1_uint1 x963;
+ uint32_t x964;
+ fiat_secp384r1_uint1 x965;
+ uint32_t x966;
+ fiat_secp384r1_uint1 x967;
+ uint32_t x968;
+ fiat_secp384r1_uint1 x969;
+ uint32_t x970;
+ fiat_secp384r1_uint1 x971;
+ uint32_t x972;
+ fiat_secp384r1_uint1 x973;
+ uint32_t x974;
+ fiat_secp384r1_uint1 x975;
+ uint32_t x976;
+ fiat_secp384r1_uint1 x977;
+ uint32_t x978;
+ fiat_secp384r1_uint1 x979;
+ uint32_t x980;
+ fiat_secp384r1_uint1 x981;
+ uint32_t x982;
+ fiat_secp384r1_uint1 x983;
+ uint32_t x984;
+ fiat_secp384r1_uint1 x985;
+ uint32_t x986;
+ fiat_secp384r1_uint1 x987;
+ uint32_t x988;
+ fiat_secp384r1_uint1 x989;
+ uint32_t x990;
+ uint32_t x991;
+ uint32_t x992;
+ uint32_t x993;
+ uint32_t x994;
+ uint32_t x995;
+ uint32_t x996;
+ uint32_t x997;
+ uint32_t x998;
+ uint32_t x999;
+ uint32_t x1000;
+ uint32_t x1001;
+ x1 = (arg1[0]);
+ fiat_secp384r1_mulx_u32(&x2, &x3, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x4, &x5, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x6, &x7, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x8, &x9, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x10, &x11, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x12, &x13, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x14, &x15, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x16, &x17, x1, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x18, &x19, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x20, &x21, x1, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x22, &x23, 0x0, x19, x16);
+ fiat_secp384r1_addcarryx_u32(&x24, &x25, x23, x17, x14);
+ fiat_secp384r1_addcarryx_u32(&x26, &x27, x25, x15, x12);
+ fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, x13, x10);
+ fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, x11, x8);
+ fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, x9, x6);
+ fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, x7, x4);
+ fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, x5, x2);
+ fiat_secp384r1_addcarryx_u32(&x38, &x39, 0x0, x1, x20);
+ fiat_secp384r1_addcarryx_u32(&x40, &x41, 0x0, (x39 + x21), (arg1[1]));
+ fiat_secp384r1_mulx_u32(&x42, &x43, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x44, &x45, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x46, &x47, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x48, &x49, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x50, &x51, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x52, &x53, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x54, &x55, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x56, &x57, x40, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x58, &x59, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x60, &x61, x40, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x62, &x63, 0x0, x59, x56);
+ fiat_secp384r1_addcarryx_u32(&x64, &x65, x63, x57, x54);
+ fiat_secp384r1_addcarryx_u32(&x66, &x67, x65, x55, x52);
+ fiat_secp384r1_addcarryx_u32(&x68, &x69, x67, x53, x50);
+ fiat_secp384r1_addcarryx_u32(&x70, &x71, x69, x51, x48);
+ fiat_secp384r1_addcarryx_u32(&x72, &x73, x71, x49, x46);
+ fiat_secp384r1_addcarryx_u32(&x74, &x75, x73, x47, x44);
+ fiat_secp384r1_addcarryx_u32(&x76, &x77, x75, x45, x42);
+ fiat_secp384r1_addcarryx_u32(&x78, &x79, 0x0, x40, x60);
+ fiat_secp384r1_addcarryx_u32(&x80, &x81, x79, x41, x61);
+ fiat_secp384r1_addcarryx_u32(&x82, &x83, x81, x18, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x84, &x85, x83, x22, x58);
+ fiat_secp384r1_addcarryx_u32(&x86, &x87, x85, x24, x62);
+ fiat_secp384r1_addcarryx_u32(&x88, &x89, x87, x26, x64);
+ fiat_secp384r1_addcarryx_u32(&x90, &x91, x89, x28, x66);
+ fiat_secp384r1_addcarryx_u32(&x92, &x93, x91, x30, x68);
+ fiat_secp384r1_addcarryx_u32(&x94, &x95, x93, x32, x70);
+ fiat_secp384r1_addcarryx_u32(&x96, &x97, x95, x34, x72);
+ fiat_secp384r1_addcarryx_u32(&x98, &x99, x97, x36, x74);
+ fiat_secp384r1_addcarryx_u32(&x100, &x101, x99, (x37 + x3), x76);
+ fiat_secp384r1_addcarryx_u32(&x102, &x103, x101, 0x0, (x77 + x43));
+ fiat_secp384r1_addcarryx_u32(&x104, &x105, 0x0, x80, (arg1[2]));
+ fiat_secp384r1_addcarryx_u32(&x106, &x107, x105, x82, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x108, &x109, x107, x84, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x110, &x111, x109, x86, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x112, &x113, x111, x88, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x114, &x115, x113, x90, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x116, &x117, x115, x92, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x118, &x119, x117, x94, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x120, &x121, x119, x96, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x122, &x123, x121, x98, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x124, &x125, x123, x100, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x126, &x127, x125, x102, 0x0);
+ fiat_secp384r1_mulx_u32(&x128, &x129, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x130, &x131, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x132, &x133, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x134, &x135, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x136, &x137, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x138, &x139, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x140, &x141, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x142, &x143, x104, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x144, &x145, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x146, &x147, x104, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x148, &x149, 0x0, x145, x142);
+ fiat_secp384r1_addcarryx_u32(&x150, &x151, x149, x143, x140);
+ fiat_secp384r1_addcarryx_u32(&x152, &x153, x151, x141, x138);
+ fiat_secp384r1_addcarryx_u32(&x154, &x155, x153, x139, x136);
+ fiat_secp384r1_addcarryx_u32(&x156, &x157, x155, x137, x134);
+ fiat_secp384r1_addcarryx_u32(&x158, &x159, x157, x135, x132);
+ fiat_secp384r1_addcarryx_u32(&x160, &x161, x159, x133, x130);
+ fiat_secp384r1_addcarryx_u32(&x162, &x163, x161, x131, x128);
+ fiat_secp384r1_addcarryx_u32(&x164, &x165, 0x0, x104, x146);
+ fiat_secp384r1_addcarryx_u32(&x166, &x167, x165, x106, x147);
+ fiat_secp384r1_addcarryx_u32(&x168, &x169, x167, x108, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x170, &x171, x169, x110, x144);
+ fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x112, x148);
+ fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x114, x150);
+ fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x116, x152);
+ fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x118, x154);
+ fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x120, x156);
+ fiat_secp384r1_addcarryx_u32(&x182, &x183, x181, x122, x158);
+ fiat_secp384r1_addcarryx_u32(&x184, &x185, x183, x124, x160);
+ fiat_secp384r1_addcarryx_u32(&x186, &x187, x185, x126, x162);
+ fiat_secp384r1_addcarryx_u32(&x188, &x189, x187, ((uint32_t)x127 + x103),
+ (x163 + x129));
+ fiat_secp384r1_addcarryx_u32(&x190, &x191, 0x0, x166, (arg1[3]));
+ fiat_secp384r1_addcarryx_u32(&x192, &x193, x191, x168, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x194, &x195, x193, x170, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x196, &x197, x195, x172, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x198, &x199, x197, x174, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x200, &x201, x199, x176, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x202, &x203, x201, x178, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x204, &x205, x203, x180, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x206, &x207, x205, x182, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x208, &x209, x207, x184, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x210, &x211, x209, x186, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x212, &x213, x211, x188, 0x0);
+ fiat_secp384r1_mulx_u32(&x214, &x215, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x216, &x217, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x218, &x219, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x220, &x221, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x222, &x223, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x224, &x225, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x226, &x227, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x228, &x229, x190, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x230, &x231, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x232, &x233, x190, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x234, &x235, 0x0, x231, x228);
+ fiat_secp384r1_addcarryx_u32(&x236, &x237, x235, x229, x226);
+ fiat_secp384r1_addcarryx_u32(&x238, &x239, x237, x227, x224);
+ fiat_secp384r1_addcarryx_u32(&x240, &x241, x239, x225, x222);
+ fiat_secp384r1_addcarryx_u32(&x242, &x243, x241, x223, x220);
+ fiat_secp384r1_addcarryx_u32(&x244, &x245, x243, x221, x218);
+ fiat_secp384r1_addcarryx_u32(&x246, &x247, x245, x219, x216);
+ fiat_secp384r1_addcarryx_u32(&x248, &x249, x247, x217, x214);
+ fiat_secp384r1_addcarryx_u32(&x250, &x251, 0x0, x190, x232);
+ fiat_secp384r1_addcarryx_u32(&x252, &x253, x251, x192, x233);
+ fiat_secp384r1_addcarryx_u32(&x254, &x255, x253, x194, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x256, &x257, x255, x196, x230);
+ fiat_secp384r1_addcarryx_u32(&x258, &x259, x257, x198, x234);
+ fiat_secp384r1_addcarryx_u32(&x260, &x261, x259, x200, x236);
+ fiat_secp384r1_addcarryx_u32(&x262, &x263, x261, x202, x238);
+ fiat_secp384r1_addcarryx_u32(&x264, &x265, x263, x204, x240);
+ fiat_secp384r1_addcarryx_u32(&x266, &x267, x265, x206, x242);
+ fiat_secp384r1_addcarryx_u32(&x268, &x269, x267, x208, x244);
+ fiat_secp384r1_addcarryx_u32(&x270, &x271, x269, x210, x246);
+ fiat_secp384r1_addcarryx_u32(&x272, &x273, x271, x212, x248);
+ fiat_secp384r1_addcarryx_u32(&x274, &x275, x273, ((uint32_t)x213 + x189),
+ (x249 + x215));
+ fiat_secp384r1_addcarryx_u32(&x276, &x277, 0x0, x252, (arg1[4]));
+ fiat_secp384r1_addcarryx_u32(&x278, &x279, x277, x254, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x280, &x281, x279, x256, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x282, &x283, x281, x258, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x284, &x285, x283, x260, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x286, &x287, x285, x262, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x288, &x289, x287, x264, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x290, &x291, x289, x266, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x292, &x293, x291, x268, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x294, &x295, x293, x270, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x296, &x297, x295, x272, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x298, &x299, x297, x274, 0x0);
+ fiat_secp384r1_mulx_u32(&x300, &x301, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x302, &x303, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x304, &x305, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x306, &x307, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x308, &x309, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x310, &x311, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x312, &x313, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x314, &x315, x276, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x316, &x317, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x318, &x319, x276, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x320, &x321, 0x0, x317, x314);
+ fiat_secp384r1_addcarryx_u32(&x322, &x323, x321, x315, x312);
+ fiat_secp384r1_addcarryx_u32(&x324, &x325, x323, x313, x310);
+ fiat_secp384r1_addcarryx_u32(&x326, &x327, x325, x311, x308);
+ fiat_secp384r1_addcarryx_u32(&x328, &x329, x327, x309, x306);
+ fiat_secp384r1_addcarryx_u32(&x330, &x331, x329, x307, x304);
+ fiat_secp384r1_addcarryx_u32(&x332, &x333, x331, x305, x302);
+ fiat_secp384r1_addcarryx_u32(&x334, &x335, x333, x303, x300);
+ fiat_secp384r1_addcarryx_u32(&x336, &x337, 0x0, x276, x318);
+ fiat_secp384r1_addcarryx_u32(&x338, &x339, x337, x278, x319);
+ fiat_secp384r1_addcarryx_u32(&x340, &x341, x339, x280, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x342, &x343, x341, x282, x316);
+ fiat_secp384r1_addcarryx_u32(&x344, &x345, x343, x284, x320);
+ fiat_secp384r1_addcarryx_u32(&x346, &x347, x345, x286, x322);
+ fiat_secp384r1_addcarryx_u32(&x348, &x349, x347, x288, x324);
+ fiat_secp384r1_addcarryx_u32(&x350, &x351, x349, x290, x326);
+ fiat_secp384r1_addcarryx_u32(&x352, &x353, x351, x292, x328);
+ fiat_secp384r1_addcarryx_u32(&x354, &x355, x353, x294, x330);
+ fiat_secp384r1_addcarryx_u32(&x356, &x357, x355, x296, x332);
+ fiat_secp384r1_addcarryx_u32(&x358, &x359, x357, x298, x334);
+ fiat_secp384r1_addcarryx_u32(&x360, &x361, x359, ((uint32_t)x299 + x275),
+ (x335 + x301));
+ fiat_secp384r1_addcarryx_u32(&x362, &x363, 0x0, x338, (arg1[5]));
+ fiat_secp384r1_addcarryx_u32(&x364, &x365, x363, x340, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x366, &x367, x365, x342, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x368, &x369, x367, x344, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x370, &x371, x369, x346, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x372, &x373, x371, x348, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x374, &x375, x373, x350, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x376, &x377, x375, x352, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x378, &x379, x377, x354, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x380, &x381, x379, x356, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x382, &x383, x381, x358, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x384, &x385, x383, x360, 0x0);
+ fiat_secp384r1_mulx_u32(&x386, &x387, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x388, &x389, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x390, &x391, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x392, &x393, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x394, &x395, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x396, &x397, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x398, &x399, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x400, &x401, x362, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x402, &x403, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x404, &x405, x362, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x406, &x407, 0x0, x403, x400);
+ fiat_secp384r1_addcarryx_u32(&x408, &x409, x407, x401, x398);
+ fiat_secp384r1_addcarryx_u32(&x410, &x411, x409, x399, x396);
+ fiat_secp384r1_addcarryx_u32(&x412, &x413, x411, x397, x394);
+ fiat_secp384r1_addcarryx_u32(&x414, &x415, x413, x395, x392);
+ fiat_secp384r1_addcarryx_u32(&x416, &x417, x415, x393, x390);
+ fiat_secp384r1_addcarryx_u32(&x418, &x419, x417, x391, x388);
+ fiat_secp384r1_addcarryx_u32(&x420, &x421, x419, x389, x386);
+ fiat_secp384r1_addcarryx_u32(&x422, &x423, 0x0, x362, x404);
+ fiat_secp384r1_addcarryx_u32(&x424, &x425, x423, x364, x405);
+ fiat_secp384r1_addcarryx_u32(&x426, &x427, x425, x366, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x428, &x429, x427, x368, x402);
+ fiat_secp384r1_addcarryx_u32(&x430, &x431, x429, x370, x406);
+ fiat_secp384r1_addcarryx_u32(&x432, &x433, x431, x372, x408);
+ fiat_secp384r1_addcarryx_u32(&x434, &x435, x433, x374, x410);
+ fiat_secp384r1_addcarryx_u32(&x436, &x437, x435, x376, x412);
+ fiat_secp384r1_addcarryx_u32(&x438, &x439, x437, x378, x414);
+ fiat_secp384r1_addcarryx_u32(&x440, &x441, x439, x380, x416);
+ fiat_secp384r1_addcarryx_u32(&x442, &x443, x441, x382, x418);
+ fiat_secp384r1_addcarryx_u32(&x444, &x445, x443, x384, x420);
+ fiat_secp384r1_addcarryx_u32(&x446, &x447, x445, ((uint32_t)x385 + x361),
+ (x421 + x387));
+ fiat_secp384r1_addcarryx_u32(&x448, &x449, 0x0, x424, (arg1[6]));
+ fiat_secp384r1_addcarryx_u32(&x450, &x451, x449, x426, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x452, &x453, x451, x428, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x454, &x455, x453, x430, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x456, &x457, x455, x432, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x458, &x459, x457, x434, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x460, &x461, x459, x436, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x462, &x463, x461, x438, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x464, &x465, x463, x440, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x466, &x467, x465, x442, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x468, &x469, x467, x444, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x470, &x471, x469, x446, 0x0);
+ fiat_secp384r1_mulx_u32(&x472, &x473, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x474, &x475, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x476, &x477, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x478, &x479, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x480, &x481, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x482, &x483, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x484, &x485, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x486, &x487, x448, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x488, &x489, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x490, &x491, x448, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x492, &x493, 0x0, x489, x486);
+ fiat_secp384r1_addcarryx_u32(&x494, &x495, x493, x487, x484);
+ fiat_secp384r1_addcarryx_u32(&x496, &x497, x495, x485, x482);
+ fiat_secp384r1_addcarryx_u32(&x498, &x499, x497, x483, x480);
+ fiat_secp384r1_addcarryx_u32(&x500, &x501, x499, x481, x478);
+ fiat_secp384r1_addcarryx_u32(&x502, &x503, x501, x479, x476);
+ fiat_secp384r1_addcarryx_u32(&x504, &x505, x503, x477, x474);
+ fiat_secp384r1_addcarryx_u32(&x506, &x507, x505, x475, x472);
+ fiat_secp384r1_addcarryx_u32(&x508, &x509, 0x0, x448, x490);
+ fiat_secp384r1_addcarryx_u32(&x510, &x511, x509, x450, x491);
+ fiat_secp384r1_addcarryx_u32(&x512, &x513, x511, x452, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x514, &x515, x513, x454, x488);
+ fiat_secp384r1_addcarryx_u32(&x516, &x517, x515, x456, x492);
+ fiat_secp384r1_addcarryx_u32(&x518, &x519, x517, x458, x494);
+ fiat_secp384r1_addcarryx_u32(&x520, &x521, x519, x460, x496);
+ fiat_secp384r1_addcarryx_u32(&x522, &x523, x521, x462, x498);
+ fiat_secp384r1_addcarryx_u32(&x524, &x525, x523, x464, x500);
+ fiat_secp384r1_addcarryx_u32(&x526, &x527, x525, x466, x502);
+ fiat_secp384r1_addcarryx_u32(&x528, &x529, x527, x468, x504);
+ fiat_secp384r1_addcarryx_u32(&x530, &x531, x529, x470, x506);
+ fiat_secp384r1_addcarryx_u32(&x532, &x533, x531, ((uint32_t)x471 + x447),
+ (x507 + x473));
+ fiat_secp384r1_addcarryx_u32(&x534, &x535, 0x0, x510, (arg1[7]));
+ fiat_secp384r1_addcarryx_u32(&x536, &x537, x535, x512, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x538, &x539, x537, x514, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x540, &x541, x539, x516, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x542, &x543, x541, x518, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x544, &x545, x543, x520, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x546, &x547, x545, x522, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x548, &x549, x547, x524, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x550, &x551, x549, x526, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x552, &x553, x551, x528, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x554, &x555, x553, x530, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x556, &x557, x555, x532, 0x0);
+ fiat_secp384r1_mulx_u32(&x558, &x559, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x560, &x561, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x562, &x563, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x564, &x565, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x566, &x567, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x568, &x569, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x570, &x571, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x572, &x573, x534, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x574, &x575, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x576, &x577, x534, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x578, &x579, 0x0, x575, x572);
+ fiat_secp384r1_addcarryx_u32(&x580, &x581, x579, x573, x570);
+ fiat_secp384r1_addcarryx_u32(&x582, &x583, x581, x571, x568);
+ fiat_secp384r1_addcarryx_u32(&x584, &x585, x583, x569, x566);
+ fiat_secp384r1_addcarryx_u32(&x586, &x587, x585, x567, x564);
+ fiat_secp384r1_addcarryx_u32(&x588, &x589, x587, x565, x562);
+ fiat_secp384r1_addcarryx_u32(&x590, &x591, x589, x563, x560);
+ fiat_secp384r1_addcarryx_u32(&x592, &x593, x591, x561, x558);
+ fiat_secp384r1_addcarryx_u32(&x594, &x595, 0x0, x534, x576);
+ fiat_secp384r1_addcarryx_u32(&x596, &x597, x595, x536, x577);
+ fiat_secp384r1_addcarryx_u32(&x598, &x599, x597, x538, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x600, &x601, x599, x540, x574);
+ fiat_secp384r1_addcarryx_u32(&x602, &x603, x601, x542, x578);
+ fiat_secp384r1_addcarryx_u32(&x604, &x605, x603, x544, x580);
+ fiat_secp384r1_addcarryx_u32(&x606, &x607, x605, x546, x582);
+ fiat_secp384r1_addcarryx_u32(&x608, &x609, x607, x548, x584);
+ fiat_secp384r1_addcarryx_u32(&x610, &x611, x609, x550, x586);
+ fiat_secp384r1_addcarryx_u32(&x612, &x613, x611, x552, x588);
+ fiat_secp384r1_addcarryx_u32(&x614, &x615, x613, x554, x590);
+ fiat_secp384r1_addcarryx_u32(&x616, &x617, x615, x556, x592);
+ fiat_secp384r1_addcarryx_u32(&x618, &x619, x617, ((uint32_t)x557 + x533),
+ (x593 + x559));
+ fiat_secp384r1_addcarryx_u32(&x620, &x621, 0x0, x596, (arg1[8]));
+ fiat_secp384r1_addcarryx_u32(&x622, &x623, x621, x598, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x624, &x625, x623, x600, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x626, &x627, x625, x602, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x628, &x629, x627, x604, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x630, &x631, x629, x606, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x632, &x633, x631, x608, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x634, &x635, x633, x610, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x636, &x637, x635, x612, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x638, &x639, x637, x614, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x640, &x641, x639, x616, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x642, &x643, x641, x618, 0x0);
+ fiat_secp384r1_mulx_u32(&x644, &x645, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x646, &x647, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x648, &x649, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x650, &x651, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x652, &x653, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x654, &x655, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x656, &x657, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x658, &x659, x620, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x660, &x661, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x662, &x663, x620, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x664, &x665, 0x0, x661, x658);
+ fiat_secp384r1_addcarryx_u32(&x666, &x667, x665, x659, x656);
+ fiat_secp384r1_addcarryx_u32(&x668, &x669, x667, x657, x654);
+ fiat_secp384r1_addcarryx_u32(&x670, &x671, x669, x655, x652);
+ fiat_secp384r1_addcarryx_u32(&x672, &x673, x671, x653, x650);
+ fiat_secp384r1_addcarryx_u32(&x674, &x675, x673, x651, x648);
+ fiat_secp384r1_addcarryx_u32(&x676, &x677, x675, x649, x646);
+ fiat_secp384r1_addcarryx_u32(&x678, &x679, x677, x647, x644);
+ fiat_secp384r1_addcarryx_u32(&x680, &x681, 0x0, x620, x662);
+ fiat_secp384r1_addcarryx_u32(&x682, &x683, x681, x622, x663);
+ fiat_secp384r1_addcarryx_u32(&x684, &x685, x683, x624, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x686, &x687, x685, x626, x660);
+ fiat_secp384r1_addcarryx_u32(&x688, &x689, x687, x628, x664);
+ fiat_secp384r1_addcarryx_u32(&x690, &x691, x689, x630, x666);
+ fiat_secp384r1_addcarryx_u32(&x692, &x693, x691, x632, x668);
+ fiat_secp384r1_addcarryx_u32(&x694, &x695, x693, x634, x670);
+ fiat_secp384r1_addcarryx_u32(&x696, &x697, x695, x636, x672);
+ fiat_secp384r1_addcarryx_u32(&x698, &x699, x697, x638, x674);
+ fiat_secp384r1_addcarryx_u32(&x700, &x701, x699, x640, x676);
+ fiat_secp384r1_addcarryx_u32(&x702, &x703, x701, x642, x678);
+ fiat_secp384r1_addcarryx_u32(&x704, &x705, x703, ((uint32_t)x643 + x619),
+ (x679 + x645));
+ fiat_secp384r1_addcarryx_u32(&x706, &x707, 0x0, x682, (arg1[9]));
+ fiat_secp384r1_addcarryx_u32(&x708, &x709, x707, x684, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x710, &x711, x709, x686, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x712, &x713, x711, x688, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x714, &x715, x713, x690, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x716, &x717, x715, x692, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x718, &x719, x717, x694, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x720, &x721, x719, x696, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x722, &x723, x721, x698, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x724, &x725, x723, x700, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x726, &x727, x725, x702, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x728, &x729, x727, x704, 0x0);
+ fiat_secp384r1_mulx_u32(&x730, &x731, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x732, &x733, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x734, &x735, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x736, &x737, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x738, &x739, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x740, &x741, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x742, &x743, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x744, &x745, x706, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x746, &x747, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x748, &x749, x706, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x750, &x751, 0x0, x747, x744);
+ fiat_secp384r1_addcarryx_u32(&x752, &x753, x751, x745, x742);
+ fiat_secp384r1_addcarryx_u32(&x754, &x755, x753, x743, x740);
+ fiat_secp384r1_addcarryx_u32(&x756, &x757, x755, x741, x738);
+ fiat_secp384r1_addcarryx_u32(&x758, &x759, x757, x739, x736);
+ fiat_secp384r1_addcarryx_u32(&x760, &x761, x759, x737, x734);
+ fiat_secp384r1_addcarryx_u32(&x762, &x763, x761, x735, x732);
+ fiat_secp384r1_addcarryx_u32(&x764, &x765, x763, x733, x730);
+ fiat_secp384r1_addcarryx_u32(&x766, &x767, 0x0, x706, x748);
+ fiat_secp384r1_addcarryx_u32(&x768, &x769, x767, x708, x749);
+ fiat_secp384r1_addcarryx_u32(&x770, &x771, x769, x710, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x772, &x773, x771, x712, x746);
+ fiat_secp384r1_addcarryx_u32(&x774, &x775, x773, x714, x750);
+ fiat_secp384r1_addcarryx_u32(&x776, &x777, x775, x716, x752);
+ fiat_secp384r1_addcarryx_u32(&x778, &x779, x777, x718, x754);
+ fiat_secp384r1_addcarryx_u32(&x780, &x781, x779, x720, x756);
+ fiat_secp384r1_addcarryx_u32(&x782, &x783, x781, x722, x758);
+ fiat_secp384r1_addcarryx_u32(&x784, &x785, x783, x724, x760);
+ fiat_secp384r1_addcarryx_u32(&x786, &x787, x785, x726, x762);
+ fiat_secp384r1_addcarryx_u32(&x788, &x789, x787, x728, x764);
+ fiat_secp384r1_addcarryx_u32(&x790, &x791, x789, ((uint32_t)x729 + x705),
+ (x765 + x731));
+ fiat_secp384r1_addcarryx_u32(&x792, &x793, 0x0, x768, (arg1[10]));
+ fiat_secp384r1_addcarryx_u32(&x794, &x795, x793, x770, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x796, &x797, x795, x772, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x798, &x799, x797, x774, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x800, &x801, x799, x776, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x802, &x803, x801, x778, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x804, &x805, x803, x780, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x806, &x807, x805, x782, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x808, &x809, x807, x784, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x810, &x811, x809, x786, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x812, &x813, x811, x788, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x814, &x815, x813, x790, 0x0);
+ fiat_secp384r1_mulx_u32(&x816, &x817, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x818, &x819, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x820, &x821, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x822, &x823, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x824, &x825, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x826, &x827, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x828, &x829, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x830, &x831, x792, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x832, &x833, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x834, &x835, x792, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x836, &x837, 0x0, x833, x830);
+ fiat_secp384r1_addcarryx_u32(&x838, &x839, x837, x831, x828);
+ fiat_secp384r1_addcarryx_u32(&x840, &x841, x839, x829, x826);
+ fiat_secp384r1_addcarryx_u32(&x842, &x843, x841, x827, x824);
+ fiat_secp384r1_addcarryx_u32(&x844, &x845, x843, x825, x822);
+ fiat_secp384r1_addcarryx_u32(&x846, &x847, x845, x823, x820);
+ fiat_secp384r1_addcarryx_u32(&x848, &x849, x847, x821, x818);
+ fiat_secp384r1_addcarryx_u32(&x850, &x851, x849, x819, x816);
+ fiat_secp384r1_addcarryx_u32(&x852, &x853, 0x0, x792, x834);
+ fiat_secp384r1_addcarryx_u32(&x854, &x855, x853, x794, x835);
+ fiat_secp384r1_addcarryx_u32(&x856, &x857, x855, x796, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x858, &x859, x857, x798, x832);
+ fiat_secp384r1_addcarryx_u32(&x860, &x861, x859, x800, x836);
+ fiat_secp384r1_addcarryx_u32(&x862, &x863, x861, x802, x838);
+ fiat_secp384r1_addcarryx_u32(&x864, &x865, x863, x804, x840);
+ fiat_secp384r1_addcarryx_u32(&x866, &x867, x865, x806, x842);
+ fiat_secp384r1_addcarryx_u32(&x868, &x869, x867, x808, x844);
+ fiat_secp384r1_addcarryx_u32(&x870, &x871, x869, x810, x846);
+ fiat_secp384r1_addcarryx_u32(&x872, &x873, x871, x812, x848);
+ fiat_secp384r1_addcarryx_u32(&x874, &x875, x873, x814, x850);
+ fiat_secp384r1_addcarryx_u32(&x876, &x877, x875, ((uint32_t)x815 + x791),
+ (x851 + x817));
+ fiat_secp384r1_addcarryx_u32(&x878, &x879, 0x0, x854, (arg1[11]));
+ fiat_secp384r1_addcarryx_u32(&x880, &x881, x879, x856, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x882, &x883, x881, x858, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x884, &x885, x883, x860, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x886, &x887, x885, x862, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x888, &x889, x887, x864, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x890, &x891, x889, x866, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x892, &x893, x891, x868, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x894, &x895, x893, x870, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x896, &x897, x895, x872, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x898, &x899, x897, x874, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x900, &x901, x899, x876, 0x0);
+ fiat_secp384r1_mulx_u32(&x902, &x903, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x904, &x905, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x906, &x907, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x908, &x909, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x910, &x911, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x912, &x913, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x914, &x915, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x916, &x917, x878, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x918, &x919, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x920, &x921, x878, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x922, &x923, 0x0, x919, x916);
+ fiat_secp384r1_addcarryx_u32(&x924, &x925, x923, x917, x914);
+ fiat_secp384r1_addcarryx_u32(&x926, &x927, x925, x915, x912);
+ fiat_secp384r1_addcarryx_u32(&x928, &x929, x927, x913, x910);
+ fiat_secp384r1_addcarryx_u32(&x930, &x931, x929, x911, x908);
+ fiat_secp384r1_addcarryx_u32(&x932, &x933, x931, x909, x906);
+ fiat_secp384r1_addcarryx_u32(&x934, &x935, x933, x907, x904);
+ fiat_secp384r1_addcarryx_u32(&x936, &x937, x935, x905, x902);
+ fiat_secp384r1_addcarryx_u32(&x938, &x939, 0x0, x878, x920);
+ fiat_secp384r1_addcarryx_u32(&x940, &x941, x939, x880, x921);
+ fiat_secp384r1_addcarryx_u32(&x942, &x943, x941, x882, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x944, &x945, x943, x884, x918);
+ fiat_secp384r1_addcarryx_u32(&x946, &x947, x945, x886, x922);
+ fiat_secp384r1_addcarryx_u32(&x948, &x949, x947, x888, x924);
+ fiat_secp384r1_addcarryx_u32(&x950, &x951, x949, x890, x926);
+ fiat_secp384r1_addcarryx_u32(&x952, &x953, x951, x892, x928);
+ fiat_secp384r1_addcarryx_u32(&x954, &x955, x953, x894, x930);
+ fiat_secp384r1_addcarryx_u32(&x956, &x957, x955, x896, x932);
+ fiat_secp384r1_addcarryx_u32(&x958, &x959, x957, x898, x934);
+ fiat_secp384r1_addcarryx_u32(&x960, &x961, x959, x900, x936);
+ fiat_secp384r1_addcarryx_u32(&x962, &x963, x961, ((uint32_t)x901 + x877),
+ (x937 + x903));
+ fiat_secp384r1_subborrowx_u32(&x964, &x965, 0x0, x940,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x966, &x967, x965, x942, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x968, &x969, x967, x944, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x970, &x971, x969, x946,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x972, &x973, x971, x948,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x974, &x975, x973, x950,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x976, &x977, x975, x952,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x978, &x979, x977, x954,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x980, &x981, x979, x956,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x982, &x983, x981, x958,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x984, &x985, x983, x960,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x986, &x987, x985, x962,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x988, &x989, x987, x963, 0x0);
+ fiat_secp384r1_cmovznz_u32(&x990, x989, x964, x940);
+ fiat_secp384r1_cmovznz_u32(&x991, x989, x966, x942);
+ fiat_secp384r1_cmovznz_u32(&x992, x989, x968, x944);
+ fiat_secp384r1_cmovznz_u32(&x993, x989, x970, x946);
+ fiat_secp384r1_cmovznz_u32(&x994, x989, x972, x948);
+ fiat_secp384r1_cmovznz_u32(&x995, x989, x974, x950);
+ fiat_secp384r1_cmovznz_u32(&x996, x989, x976, x952);
+ fiat_secp384r1_cmovznz_u32(&x997, x989, x978, x954);
+ fiat_secp384r1_cmovznz_u32(&x998, x989, x980, x956);
+ fiat_secp384r1_cmovznz_u32(&x999, x989, x982, x958);
+ fiat_secp384r1_cmovznz_u32(&x1000, x989, x984, x960);
+ fiat_secp384r1_cmovznz_u32(&x1001, x989, x986, x962);
+ out1[0] = x990;
+ out1[1] = x991;
+ out1[2] = x992;
+ out1[3] = x993;
+ out1[4] = x994;
+ out1[5] = x995;
+ out1[6] = x996;
+ out1[7] = x997;
+ out1[8] = x998;
+ out1[9] = x999;
+ out1[10] = x1000;
+ out1[11] = x1001;
+}
+
+/*
+ * The function fiat_secp384r1_to_montgomery translates a field element into the Montgomery domain.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * eval (from_montgomery out1) mod m = eval arg1 mod m
+ * 0 ≤ eval out1 < m
+ *
+ */
+static void
+fiat_secp384r1_to_montgomery(
+ fiat_secp384r1_montgomery_domain_field_element out1,
+ const fiat_secp384r1_non_montgomery_domain_field_element arg1)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ fiat_secp384r1_uint1 x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint32_t x40;
+ uint32_t x41;
+ uint32_t x42;
+ uint32_t x43;
+ fiat_secp384r1_uint1 x44;
+ uint32_t x45;
+ fiat_secp384r1_uint1 x46;
+ uint32_t x47;
+ fiat_secp384r1_uint1 x48;
+ uint32_t x49;
+ fiat_secp384r1_uint1 x50;
+ uint32_t x51;
+ fiat_secp384r1_uint1 x52;
+ uint32_t x53;
+ fiat_secp384r1_uint1 x54;
+ uint32_t x55;
+ fiat_secp384r1_uint1 x56;
+ uint32_t x57;
+ fiat_secp384r1_uint1 x58;
+ uint32_t x59;
+ fiat_secp384r1_uint1 x60;
+ uint32_t x61;
+ fiat_secp384r1_uint1 x62;
+ uint32_t x63;
+ fiat_secp384r1_uint1 x64;
+ uint32_t x65;
+ fiat_secp384r1_uint1 x66;
+ uint32_t x67;
+ fiat_secp384r1_uint1 x68;
+ uint32_t x69;
+ fiat_secp384r1_uint1 x70;
+ uint32_t x71;
+ fiat_secp384r1_uint1 x72;
+ uint32_t x73;
+ fiat_secp384r1_uint1 x74;
+ uint32_t x75;
+ fiat_secp384r1_uint1 x76;
+ uint32_t x77;
+ fiat_secp384r1_uint1 x78;
+ uint32_t x79;
+ fiat_secp384r1_uint1 x80;
+ uint32_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint32_t x83;
+ uint32_t x84;
+ uint32_t x85;
+ uint32_t x86;
+ uint32_t x87;
+ uint32_t x88;
+ uint32_t x89;
+ uint32_t x90;
+ uint32_t x91;
+ fiat_secp384r1_uint1 x92;
+ uint32_t x93;
+ fiat_secp384r1_uint1 x94;
+ uint32_t x95;
+ fiat_secp384r1_uint1 x96;
+ uint32_t x97;
+ fiat_secp384r1_uint1 x98;
+ uint32_t x99;
+ fiat_secp384r1_uint1 x100;
+ uint32_t x101;
+ fiat_secp384r1_uint1 x102;
+ uint32_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint32_t x105;
+ fiat_secp384r1_uint1 x106;
+ uint32_t x107;
+ fiat_secp384r1_uint1 x108;
+ uint32_t x109;
+ fiat_secp384r1_uint1 x110;
+ uint32_t x111;
+ fiat_secp384r1_uint1 x112;
+ uint32_t x113;
+ fiat_secp384r1_uint1 x114;
+ uint32_t x115;
+ fiat_secp384r1_uint1 x116;
+ uint32_t x117;
+ uint32_t x118;
+ uint32_t x119;
+ uint32_t x120;
+ uint32_t x121;
+ uint32_t x122;
+ uint32_t x123;
+ uint32_t x124;
+ uint32_t x125;
+ uint32_t x126;
+ uint32_t x127;
+ uint32_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint32_t x133;
+ uint32_t x134;
+ uint32_t x135;
+ uint32_t x136;
+ uint32_t x137;
+ fiat_secp384r1_uint1 x138;
+ uint32_t x139;
+ fiat_secp384r1_uint1 x140;
+ uint32_t x141;
+ fiat_secp384r1_uint1 x142;
+ uint32_t x143;
+ fiat_secp384r1_uint1 x144;
+ uint32_t x145;
+ fiat_secp384r1_uint1 x146;
+ uint32_t x147;
+ fiat_secp384r1_uint1 x148;
+ uint32_t x149;
+ fiat_secp384r1_uint1 x150;
+ uint32_t x151;
+ fiat_secp384r1_uint1 x152;
+ uint32_t x153;
+ fiat_secp384r1_uint1 x154;
+ uint32_t x155;
+ fiat_secp384r1_uint1 x156;
+ uint32_t x157;
+ fiat_secp384r1_uint1 x158;
+ uint32_t x159;
+ fiat_secp384r1_uint1 x160;
+ uint32_t x161;
+ fiat_secp384r1_uint1 x162;
+ uint32_t x163;
+ fiat_secp384r1_uint1 x164;
+ uint32_t x165;
+ fiat_secp384r1_uint1 x166;
+ uint32_t x167;
+ fiat_secp384r1_uint1 x168;
+ uint32_t x169;
+ fiat_secp384r1_uint1 x170;
+ uint32_t x171;
+ fiat_secp384r1_uint1 x172;
+ uint32_t x173;
+ fiat_secp384r1_uint1 x174;
+ uint32_t x175;
+ fiat_secp384r1_uint1 x176;
+ uint32_t x177;
+ fiat_secp384r1_uint1 x178;
+ uint32_t x179;
+ uint32_t x180;
+ uint32_t x181;
+ uint32_t x182;
+ uint32_t x183;
+ uint32_t x184;
+ uint32_t x185;
+ uint32_t x186;
+ uint32_t x187;
+ fiat_secp384r1_uint1 x188;
+ uint32_t x189;
+ fiat_secp384r1_uint1 x190;
+ uint32_t x191;
+ fiat_secp384r1_uint1 x192;
+ uint32_t x193;
+ fiat_secp384r1_uint1 x194;
+ uint32_t x195;
+ fiat_secp384r1_uint1 x196;
+ uint32_t x197;
+ fiat_secp384r1_uint1 x198;
+ uint32_t x199;
+ fiat_secp384r1_uint1 x200;
+ uint32_t x201;
+ fiat_secp384r1_uint1 x202;
+ uint32_t x203;
+ fiat_secp384r1_uint1 x204;
+ uint32_t x205;
+ fiat_secp384r1_uint1 x206;
+ uint32_t x207;
+ fiat_secp384r1_uint1 x208;
+ uint32_t x209;
+ fiat_secp384r1_uint1 x210;
+ uint32_t x211;
+ fiat_secp384r1_uint1 x212;
+ uint32_t x213;
+ uint32_t x214;
+ uint32_t x215;
+ uint32_t x216;
+ uint32_t x217;
+ uint32_t x218;
+ uint32_t x219;
+ uint32_t x220;
+ uint32_t x221;
+ uint32_t x222;
+ uint32_t x223;
+ uint32_t x224;
+ uint32_t x225;
+ uint32_t x226;
+ uint32_t x227;
+ uint32_t x228;
+ uint32_t x229;
+ uint32_t x230;
+ uint32_t x231;
+ uint32_t x232;
+ uint32_t x233;
+ fiat_secp384r1_uint1 x234;
+ uint32_t x235;
+ fiat_secp384r1_uint1 x236;
+ uint32_t x237;
+ fiat_secp384r1_uint1 x238;
+ uint32_t x239;
+ fiat_secp384r1_uint1 x240;
+ uint32_t x241;
+ fiat_secp384r1_uint1 x242;
+ uint32_t x243;
+ fiat_secp384r1_uint1 x244;
+ uint32_t x245;
+ fiat_secp384r1_uint1 x246;
+ uint32_t x247;
+ fiat_secp384r1_uint1 x248;
+ uint32_t x249;
+ fiat_secp384r1_uint1 x250;
+ uint32_t x251;
+ fiat_secp384r1_uint1 x252;
+ uint32_t x253;
+ fiat_secp384r1_uint1 x254;
+ uint32_t x255;
+ fiat_secp384r1_uint1 x256;
+ uint32_t x257;
+ fiat_secp384r1_uint1 x258;
+ uint32_t x259;
+ fiat_secp384r1_uint1 x260;
+ uint32_t x261;
+ fiat_secp384r1_uint1 x262;
+ uint32_t x263;
+ fiat_secp384r1_uint1 x264;
+ uint32_t x265;
+ fiat_secp384r1_uint1 x266;
+ uint32_t x267;
+ fiat_secp384r1_uint1 x268;
+ uint32_t x269;
+ fiat_secp384r1_uint1 x270;
+ uint32_t x271;
+ fiat_secp384r1_uint1 x272;
+ uint32_t x273;
+ fiat_secp384r1_uint1 x274;
+ uint32_t x275;
+ uint32_t x276;
+ uint32_t x277;
+ uint32_t x278;
+ uint32_t x279;
+ uint32_t x280;
+ uint32_t x281;
+ uint32_t x282;
+ uint32_t x283;
+ fiat_secp384r1_uint1 x284;
+ uint32_t x285;
+ fiat_secp384r1_uint1 x286;
+ uint32_t x287;
+ fiat_secp384r1_uint1 x288;
+ uint32_t x289;
+ fiat_secp384r1_uint1 x290;
+ uint32_t x291;
+ fiat_secp384r1_uint1 x292;
+ uint32_t x293;
+ fiat_secp384r1_uint1 x294;
+ uint32_t x295;
+ fiat_secp384r1_uint1 x296;
+ uint32_t x297;
+ fiat_secp384r1_uint1 x298;
+ uint32_t x299;
+ fiat_secp384r1_uint1 x300;
+ uint32_t x301;
+ fiat_secp384r1_uint1 x302;
+ uint32_t x303;
+ fiat_secp384r1_uint1 x304;
+ uint32_t x305;
+ fiat_secp384r1_uint1 x306;
+ uint32_t x307;
+ fiat_secp384r1_uint1 x308;
+ uint32_t x309;
+ uint32_t x310;
+ uint32_t x311;
+ uint32_t x312;
+ uint32_t x313;
+ uint32_t x314;
+ uint32_t x315;
+ uint32_t x316;
+ uint32_t x317;
+ uint32_t x318;
+ uint32_t x319;
+ uint32_t x320;
+ uint32_t x321;
+ uint32_t x322;
+ uint32_t x323;
+ uint32_t x324;
+ uint32_t x325;
+ uint32_t x326;
+ uint32_t x327;
+ uint32_t x328;
+ uint32_t x329;
+ fiat_secp384r1_uint1 x330;
+ uint32_t x331;
+ fiat_secp384r1_uint1 x332;
+ uint32_t x333;
+ fiat_secp384r1_uint1 x334;
+ uint32_t x335;
+ fiat_secp384r1_uint1 x336;
+ uint32_t x337;
+ fiat_secp384r1_uint1 x338;
+ uint32_t x339;
+ fiat_secp384r1_uint1 x340;
+ uint32_t x341;
+ fiat_secp384r1_uint1 x342;
+ uint32_t x343;
+ fiat_secp384r1_uint1 x344;
+ uint32_t x345;
+ fiat_secp384r1_uint1 x346;
+ uint32_t x347;
+ fiat_secp384r1_uint1 x348;
+ uint32_t x349;
+ fiat_secp384r1_uint1 x350;
+ uint32_t x351;
+ fiat_secp384r1_uint1 x352;
+ uint32_t x353;
+ fiat_secp384r1_uint1 x354;
+ uint32_t x355;
+ fiat_secp384r1_uint1 x356;
+ uint32_t x357;
+ fiat_secp384r1_uint1 x358;
+ uint32_t x359;
+ fiat_secp384r1_uint1 x360;
+ uint32_t x361;
+ fiat_secp384r1_uint1 x362;
+ uint32_t x363;
+ fiat_secp384r1_uint1 x364;
+ uint32_t x365;
+ fiat_secp384r1_uint1 x366;
+ uint32_t x367;
+ fiat_secp384r1_uint1 x368;
+ uint32_t x369;
+ fiat_secp384r1_uint1 x370;
+ uint32_t x371;
+ uint32_t x372;
+ uint32_t x373;
+ uint32_t x374;
+ uint32_t x375;
+ uint32_t x376;
+ uint32_t x377;
+ uint32_t x378;
+ uint32_t x379;
+ fiat_secp384r1_uint1 x380;
+ uint32_t x381;
+ fiat_secp384r1_uint1 x382;
+ uint32_t x383;
+ fiat_secp384r1_uint1 x384;
+ uint32_t x385;
+ fiat_secp384r1_uint1 x386;
+ uint32_t x387;
+ fiat_secp384r1_uint1 x388;
+ uint32_t x389;
+ fiat_secp384r1_uint1 x390;
+ uint32_t x391;
+ fiat_secp384r1_uint1 x392;
+ uint32_t x393;
+ fiat_secp384r1_uint1 x394;
+ uint32_t x395;
+ fiat_secp384r1_uint1 x396;
+ uint32_t x397;
+ fiat_secp384r1_uint1 x398;
+ uint32_t x399;
+ fiat_secp384r1_uint1 x400;
+ uint32_t x401;
+ fiat_secp384r1_uint1 x402;
+ uint32_t x403;
+ fiat_secp384r1_uint1 x404;
+ uint32_t x405;
+ uint32_t x406;
+ uint32_t x407;
+ uint32_t x408;
+ uint32_t x409;
+ uint32_t x410;
+ uint32_t x411;
+ uint32_t x412;
+ uint32_t x413;
+ uint32_t x414;
+ uint32_t x415;
+ uint32_t x416;
+ uint32_t x417;
+ uint32_t x418;
+ uint32_t x419;
+ uint32_t x420;
+ uint32_t x421;
+ uint32_t x422;
+ uint32_t x423;
+ uint32_t x424;
+ uint32_t x425;
+ fiat_secp384r1_uint1 x426;
+ uint32_t x427;
+ fiat_secp384r1_uint1 x428;
+ uint32_t x429;
+ fiat_secp384r1_uint1 x430;
+ uint32_t x431;
+ fiat_secp384r1_uint1 x432;
+ uint32_t x433;
+ fiat_secp384r1_uint1 x434;
+ uint32_t x435;
+ fiat_secp384r1_uint1 x436;
+ uint32_t x437;
+ fiat_secp384r1_uint1 x438;
+ uint32_t x439;
+ fiat_secp384r1_uint1 x440;
+ uint32_t x441;
+ fiat_secp384r1_uint1 x442;
+ uint32_t x443;
+ fiat_secp384r1_uint1 x444;
+ uint32_t x445;
+ fiat_secp384r1_uint1 x446;
+ uint32_t x447;
+ fiat_secp384r1_uint1 x448;
+ uint32_t x449;
+ fiat_secp384r1_uint1 x450;
+ uint32_t x451;
+ fiat_secp384r1_uint1 x452;
+ uint32_t x453;
+ fiat_secp384r1_uint1 x454;
+ uint32_t x455;
+ fiat_secp384r1_uint1 x456;
+ uint32_t x457;
+ fiat_secp384r1_uint1 x458;
+ uint32_t x459;
+ fiat_secp384r1_uint1 x460;
+ uint32_t x461;
+ fiat_secp384r1_uint1 x462;
+ uint32_t x463;
+ fiat_secp384r1_uint1 x464;
+ uint32_t x465;
+ fiat_secp384r1_uint1 x466;
+ uint32_t x467;
+ uint32_t x468;
+ uint32_t x469;
+ uint32_t x470;
+ uint32_t x471;
+ uint32_t x472;
+ uint32_t x473;
+ uint32_t x474;
+ uint32_t x475;
+ fiat_secp384r1_uint1 x476;
+ uint32_t x477;
+ fiat_secp384r1_uint1 x478;
+ uint32_t x479;
+ fiat_secp384r1_uint1 x480;
+ uint32_t x481;
+ fiat_secp384r1_uint1 x482;
+ uint32_t x483;
+ fiat_secp384r1_uint1 x484;
+ uint32_t x485;
+ fiat_secp384r1_uint1 x486;
+ uint32_t x487;
+ fiat_secp384r1_uint1 x488;
+ uint32_t x489;
+ fiat_secp384r1_uint1 x490;
+ uint32_t x491;
+ fiat_secp384r1_uint1 x492;
+ uint32_t x493;
+ fiat_secp384r1_uint1 x494;
+ uint32_t x495;
+ fiat_secp384r1_uint1 x496;
+ uint32_t x497;
+ fiat_secp384r1_uint1 x498;
+ uint32_t x499;
+ fiat_secp384r1_uint1 x500;
+ uint32_t x501;
+ uint32_t x502;
+ uint32_t x503;
+ uint32_t x504;
+ uint32_t x505;
+ uint32_t x506;
+ uint32_t x507;
+ uint32_t x508;
+ uint32_t x509;
+ uint32_t x510;
+ uint32_t x511;
+ uint32_t x512;
+ uint32_t x513;
+ uint32_t x514;
+ uint32_t x515;
+ uint32_t x516;
+ uint32_t x517;
+ uint32_t x518;
+ uint32_t x519;
+ uint32_t x520;
+ uint32_t x521;
+ fiat_secp384r1_uint1 x522;
+ uint32_t x523;
+ fiat_secp384r1_uint1 x524;
+ uint32_t x525;
+ fiat_secp384r1_uint1 x526;
+ uint32_t x527;
+ fiat_secp384r1_uint1 x528;
+ uint32_t x529;
+ fiat_secp384r1_uint1 x530;
+ uint32_t x531;
+ fiat_secp384r1_uint1 x532;
+ uint32_t x533;
+ fiat_secp384r1_uint1 x534;
+ uint32_t x535;
+ fiat_secp384r1_uint1 x536;
+ uint32_t x537;
+ fiat_secp384r1_uint1 x538;
+ uint32_t x539;
+ fiat_secp384r1_uint1 x540;
+ uint32_t x541;
+ fiat_secp384r1_uint1 x542;
+ uint32_t x543;
+ fiat_secp384r1_uint1 x544;
+ uint32_t x545;
+ fiat_secp384r1_uint1 x546;
+ uint32_t x547;
+ fiat_secp384r1_uint1 x548;
+ uint32_t x549;
+ fiat_secp384r1_uint1 x550;
+ uint32_t x551;
+ fiat_secp384r1_uint1 x552;
+ uint32_t x553;
+ fiat_secp384r1_uint1 x554;
+ uint32_t x555;
+ fiat_secp384r1_uint1 x556;
+ uint32_t x557;
+ fiat_secp384r1_uint1 x558;
+ uint32_t x559;
+ fiat_secp384r1_uint1 x560;
+ uint32_t x561;
+ fiat_secp384r1_uint1 x562;
+ uint32_t x563;
+ uint32_t x564;
+ uint32_t x565;
+ uint32_t x566;
+ uint32_t x567;
+ uint32_t x568;
+ uint32_t x569;
+ uint32_t x570;
+ uint32_t x571;
+ fiat_secp384r1_uint1 x572;
+ uint32_t x573;
+ fiat_secp384r1_uint1 x574;
+ uint32_t x575;
+ fiat_secp384r1_uint1 x576;
+ uint32_t x577;
+ fiat_secp384r1_uint1 x578;
+ uint32_t x579;
+ fiat_secp384r1_uint1 x580;
+ uint32_t x581;
+ fiat_secp384r1_uint1 x582;
+ uint32_t x583;
+ fiat_secp384r1_uint1 x584;
+ uint32_t x585;
+ fiat_secp384r1_uint1 x586;
+ uint32_t x587;
+ fiat_secp384r1_uint1 x588;
+ uint32_t x589;
+ fiat_secp384r1_uint1 x590;
+ uint32_t x591;
+ fiat_secp384r1_uint1 x592;
+ uint32_t x593;
+ fiat_secp384r1_uint1 x594;
+ uint32_t x595;
+ fiat_secp384r1_uint1 x596;
+ uint32_t x597;
+ uint32_t x598;
+ uint32_t x599;
+ uint32_t x600;
+ uint32_t x601;
+ uint32_t x602;
+ uint32_t x603;
+ uint32_t x604;
+ uint32_t x605;
+ uint32_t x606;
+ uint32_t x607;
+ uint32_t x608;
+ uint32_t x609;
+ uint32_t x610;
+ uint32_t x611;
+ uint32_t x612;
+ uint32_t x613;
+ uint32_t x614;
+ uint32_t x615;
+ uint32_t x616;
+ uint32_t x617;
+ fiat_secp384r1_uint1 x618;
+ uint32_t x619;
+ fiat_secp384r1_uint1 x620;
+ uint32_t x621;
+ fiat_secp384r1_uint1 x622;
+ uint32_t x623;
+ fiat_secp384r1_uint1 x624;
+ uint32_t x625;
+ fiat_secp384r1_uint1 x626;
+ uint32_t x627;
+ fiat_secp384r1_uint1 x628;
+ uint32_t x629;
+ fiat_secp384r1_uint1 x630;
+ uint32_t x631;
+ fiat_secp384r1_uint1 x632;
+ uint32_t x633;
+ fiat_secp384r1_uint1 x634;
+ uint32_t x635;
+ fiat_secp384r1_uint1 x636;
+ uint32_t x637;
+ fiat_secp384r1_uint1 x638;
+ uint32_t x639;
+ fiat_secp384r1_uint1 x640;
+ uint32_t x641;
+ fiat_secp384r1_uint1 x642;
+ uint32_t x643;
+ fiat_secp384r1_uint1 x644;
+ uint32_t x645;
+ fiat_secp384r1_uint1 x646;
+ uint32_t x647;
+ fiat_secp384r1_uint1 x648;
+ uint32_t x649;
+ fiat_secp384r1_uint1 x650;
+ uint32_t x651;
+ fiat_secp384r1_uint1 x652;
+ uint32_t x653;
+ fiat_secp384r1_uint1 x654;
+ uint32_t x655;
+ fiat_secp384r1_uint1 x656;
+ uint32_t x657;
+ fiat_secp384r1_uint1 x658;
+ uint32_t x659;
+ uint32_t x660;
+ uint32_t x661;
+ uint32_t x662;
+ uint32_t x663;
+ uint32_t x664;
+ uint32_t x665;
+ uint32_t x666;
+ uint32_t x667;
+ fiat_secp384r1_uint1 x668;
+ uint32_t x669;
+ fiat_secp384r1_uint1 x670;
+ uint32_t x671;
+ fiat_secp384r1_uint1 x672;
+ uint32_t x673;
+ fiat_secp384r1_uint1 x674;
+ uint32_t x675;
+ fiat_secp384r1_uint1 x676;
+ uint32_t x677;
+ fiat_secp384r1_uint1 x678;
+ uint32_t x679;
+ fiat_secp384r1_uint1 x680;
+ uint32_t x681;
+ fiat_secp384r1_uint1 x682;
+ uint32_t x683;
+ fiat_secp384r1_uint1 x684;
+ uint32_t x685;
+ fiat_secp384r1_uint1 x686;
+ uint32_t x687;
+ fiat_secp384r1_uint1 x688;
+ uint32_t x689;
+ fiat_secp384r1_uint1 x690;
+ uint32_t x691;
+ fiat_secp384r1_uint1 x692;
+ uint32_t x693;
+ uint32_t x694;
+ uint32_t x695;
+ uint32_t x696;
+ uint32_t x697;
+ uint32_t x698;
+ uint32_t x699;
+ uint32_t x700;
+ uint32_t x701;
+ uint32_t x702;
+ uint32_t x703;
+ uint32_t x704;
+ uint32_t x705;
+ uint32_t x706;
+ uint32_t x707;
+ uint32_t x708;
+ uint32_t x709;
+ uint32_t x710;
+ uint32_t x711;
+ uint32_t x712;
+ uint32_t x713;
+ fiat_secp384r1_uint1 x714;
+ uint32_t x715;
+ fiat_secp384r1_uint1 x716;
+ uint32_t x717;
+ fiat_secp384r1_uint1 x718;
+ uint32_t x719;
+ fiat_secp384r1_uint1 x720;
+ uint32_t x721;
+ fiat_secp384r1_uint1 x722;
+ uint32_t x723;
+ fiat_secp384r1_uint1 x724;
+ uint32_t x725;
+ fiat_secp384r1_uint1 x726;
+ uint32_t x727;
+ fiat_secp384r1_uint1 x728;
+ uint32_t x729;
+ fiat_secp384r1_uint1 x730;
+ uint32_t x731;
+ fiat_secp384r1_uint1 x732;
+ uint32_t x733;
+ fiat_secp384r1_uint1 x734;
+ uint32_t x735;
+ fiat_secp384r1_uint1 x736;
+ uint32_t x737;
+ fiat_secp384r1_uint1 x738;
+ uint32_t x739;
+ fiat_secp384r1_uint1 x740;
+ uint32_t x741;
+ fiat_secp384r1_uint1 x742;
+ uint32_t x743;
+ fiat_secp384r1_uint1 x744;
+ uint32_t x745;
+ fiat_secp384r1_uint1 x746;
+ uint32_t x747;
+ fiat_secp384r1_uint1 x748;
+ uint32_t x749;
+ fiat_secp384r1_uint1 x750;
+ uint32_t x751;
+ fiat_secp384r1_uint1 x752;
+ uint32_t x753;
+ fiat_secp384r1_uint1 x754;
+ uint32_t x755;
+ uint32_t x756;
+ uint32_t x757;
+ uint32_t x758;
+ uint32_t x759;
+ uint32_t x760;
+ uint32_t x761;
+ uint32_t x762;
+ uint32_t x763;
+ fiat_secp384r1_uint1 x764;
+ uint32_t x765;
+ fiat_secp384r1_uint1 x766;
+ uint32_t x767;
+ fiat_secp384r1_uint1 x768;
+ uint32_t x769;
+ fiat_secp384r1_uint1 x770;
+ uint32_t x771;
+ fiat_secp384r1_uint1 x772;
+ uint32_t x773;
+ fiat_secp384r1_uint1 x774;
+ uint32_t x775;
+ fiat_secp384r1_uint1 x776;
+ uint32_t x777;
+ fiat_secp384r1_uint1 x778;
+ uint32_t x779;
+ fiat_secp384r1_uint1 x780;
+ uint32_t x781;
+ fiat_secp384r1_uint1 x782;
+ uint32_t x783;
+ fiat_secp384r1_uint1 x784;
+ uint32_t x785;
+ fiat_secp384r1_uint1 x786;
+ uint32_t x787;
+ fiat_secp384r1_uint1 x788;
+ uint32_t x789;
+ uint32_t x790;
+ uint32_t x791;
+ uint32_t x792;
+ uint32_t x793;
+ uint32_t x794;
+ uint32_t x795;
+ uint32_t x796;
+ uint32_t x797;
+ uint32_t x798;
+ uint32_t x799;
+ uint32_t x800;
+ uint32_t x801;
+ uint32_t x802;
+ uint32_t x803;
+ uint32_t x804;
+ uint32_t x805;
+ uint32_t x806;
+ uint32_t x807;
+ uint32_t x808;
+ uint32_t x809;
+ fiat_secp384r1_uint1 x810;
+ uint32_t x811;
+ fiat_secp384r1_uint1 x812;
+ uint32_t x813;
+ fiat_secp384r1_uint1 x814;
+ uint32_t x815;
+ fiat_secp384r1_uint1 x816;
+ uint32_t x817;
+ fiat_secp384r1_uint1 x818;
+ uint32_t x819;
+ fiat_secp384r1_uint1 x820;
+ uint32_t x821;
+ fiat_secp384r1_uint1 x822;
+ uint32_t x823;
+ fiat_secp384r1_uint1 x824;
+ uint32_t x825;
+ fiat_secp384r1_uint1 x826;
+ uint32_t x827;
+ fiat_secp384r1_uint1 x828;
+ uint32_t x829;
+ fiat_secp384r1_uint1 x830;
+ uint32_t x831;
+ fiat_secp384r1_uint1 x832;
+ uint32_t x833;
+ fiat_secp384r1_uint1 x834;
+ uint32_t x835;
+ fiat_secp384r1_uint1 x836;
+ uint32_t x837;
+ fiat_secp384r1_uint1 x838;
+ uint32_t x839;
+ fiat_secp384r1_uint1 x840;
+ uint32_t x841;
+ fiat_secp384r1_uint1 x842;
+ uint32_t x843;
+ fiat_secp384r1_uint1 x844;
+ uint32_t x845;
+ fiat_secp384r1_uint1 x846;
+ uint32_t x847;
+ fiat_secp384r1_uint1 x848;
+ uint32_t x849;
+ fiat_secp384r1_uint1 x850;
+ uint32_t x851;
+ uint32_t x852;
+ uint32_t x853;
+ uint32_t x854;
+ uint32_t x855;
+ uint32_t x856;
+ uint32_t x857;
+ uint32_t x858;
+ uint32_t x859;
+ fiat_secp384r1_uint1 x860;
+ uint32_t x861;
+ fiat_secp384r1_uint1 x862;
+ uint32_t x863;
+ fiat_secp384r1_uint1 x864;
+ uint32_t x865;
+ fiat_secp384r1_uint1 x866;
+ uint32_t x867;
+ fiat_secp384r1_uint1 x868;
+ uint32_t x869;
+ fiat_secp384r1_uint1 x870;
+ uint32_t x871;
+ fiat_secp384r1_uint1 x872;
+ uint32_t x873;
+ fiat_secp384r1_uint1 x874;
+ uint32_t x875;
+ fiat_secp384r1_uint1 x876;
+ uint32_t x877;
+ fiat_secp384r1_uint1 x878;
+ uint32_t x879;
+ fiat_secp384r1_uint1 x880;
+ uint32_t x881;
+ fiat_secp384r1_uint1 x882;
+ uint32_t x883;
+ fiat_secp384r1_uint1 x884;
+ uint32_t x885;
+ uint32_t x886;
+ uint32_t x887;
+ uint32_t x888;
+ uint32_t x889;
+ uint32_t x890;
+ uint32_t x891;
+ uint32_t x892;
+ uint32_t x893;
+ uint32_t x894;
+ uint32_t x895;
+ uint32_t x896;
+ uint32_t x897;
+ uint32_t x898;
+ uint32_t x899;
+ uint32_t x900;
+ uint32_t x901;
+ uint32_t x902;
+ uint32_t x903;
+ uint32_t x904;
+ uint32_t x905;
+ fiat_secp384r1_uint1 x906;
+ uint32_t x907;
+ fiat_secp384r1_uint1 x908;
+ uint32_t x909;
+ fiat_secp384r1_uint1 x910;
+ uint32_t x911;
+ fiat_secp384r1_uint1 x912;
+ uint32_t x913;
+ fiat_secp384r1_uint1 x914;
+ uint32_t x915;
+ fiat_secp384r1_uint1 x916;
+ uint32_t x917;
+ fiat_secp384r1_uint1 x918;
+ uint32_t x919;
+ fiat_secp384r1_uint1 x920;
+ uint32_t x921;
+ fiat_secp384r1_uint1 x922;
+ uint32_t x923;
+ fiat_secp384r1_uint1 x924;
+ uint32_t x925;
+ fiat_secp384r1_uint1 x926;
+ uint32_t x927;
+ fiat_secp384r1_uint1 x928;
+ uint32_t x929;
+ fiat_secp384r1_uint1 x930;
+ uint32_t x931;
+ fiat_secp384r1_uint1 x932;
+ uint32_t x933;
+ fiat_secp384r1_uint1 x934;
+ uint32_t x935;
+ fiat_secp384r1_uint1 x936;
+ uint32_t x937;
+ fiat_secp384r1_uint1 x938;
+ uint32_t x939;
+ fiat_secp384r1_uint1 x940;
+ uint32_t x941;
+ fiat_secp384r1_uint1 x942;
+ uint32_t x943;
+ fiat_secp384r1_uint1 x944;
+ uint32_t x945;
+ fiat_secp384r1_uint1 x946;
+ uint32_t x947;
+ uint32_t x948;
+ uint32_t x949;
+ uint32_t x950;
+ uint32_t x951;
+ uint32_t x952;
+ uint32_t x953;
+ uint32_t x954;
+ uint32_t x955;
+ fiat_secp384r1_uint1 x956;
+ uint32_t x957;
+ fiat_secp384r1_uint1 x958;
+ uint32_t x959;
+ fiat_secp384r1_uint1 x960;
+ uint32_t x961;
+ fiat_secp384r1_uint1 x962;
+ uint32_t x963;
+ fiat_secp384r1_uint1 x964;
+ uint32_t x965;
+ fiat_secp384r1_uint1 x966;
+ uint32_t x967;
+ fiat_secp384r1_uint1 x968;
+ uint32_t x969;
+ fiat_secp384r1_uint1 x970;
+ uint32_t x971;
+ fiat_secp384r1_uint1 x972;
+ uint32_t x973;
+ fiat_secp384r1_uint1 x974;
+ uint32_t x975;
+ fiat_secp384r1_uint1 x976;
+ uint32_t x977;
+ fiat_secp384r1_uint1 x978;
+ uint32_t x979;
+ fiat_secp384r1_uint1 x980;
+ uint32_t x981;
+ uint32_t x982;
+ uint32_t x983;
+ uint32_t x984;
+ uint32_t x985;
+ uint32_t x986;
+ uint32_t x987;
+ uint32_t x988;
+ uint32_t x989;
+ uint32_t x990;
+ uint32_t x991;
+ uint32_t x992;
+ uint32_t x993;
+ uint32_t x994;
+ uint32_t x995;
+ uint32_t x996;
+ uint32_t x997;
+ uint32_t x998;
+ uint32_t x999;
+ uint32_t x1000;
+ uint32_t x1001;
+ fiat_secp384r1_uint1 x1002;
+ uint32_t x1003;
+ fiat_secp384r1_uint1 x1004;
+ uint32_t x1005;
+ fiat_secp384r1_uint1 x1006;
+ uint32_t x1007;
+ fiat_secp384r1_uint1 x1008;
+ uint32_t x1009;
+ fiat_secp384r1_uint1 x1010;
+ uint32_t x1011;
+ fiat_secp384r1_uint1 x1012;
+ uint32_t x1013;
+ fiat_secp384r1_uint1 x1014;
+ uint32_t x1015;
+ fiat_secp384r1_uint1 x1016;
+ uint32_t x1017;
+ fiat_secp384r1_uint1 x1018;
+ uint32_t x1019;
+ fiat_secp384r1_uint1 x1020;
+ uint32_t x1021;
+ fiat_secp384r1_uint1 x1022;
+ uint32_t x1023;
+ fiat_secp384r1_uint1 x1024;
+ uint32_t x1025;
+ fiat_secp384r1_uint1 x1026;
+ uint32_t x1027;
+ fiat_secp384r1_uint1 x1028;
+ uint32_t x1029;
+ fiat_secp384r1_uint1 x1030;
+ uint32_t x1031;
+ fiat_secp384r1_uint1 x1032;
+ uint32_t x1033;
+ fiat_secp384r1_uint1 x1034;
+ uint32_t x1035;
+ fiat_secp384r1_uint1 x1036;
+ uint32_t x1037;
+ fiat_secp384r1_uint1 x1038;
+ uint32_t x1039;
+ fiat_secp384r1_uint1 x1040;
+ uint32_t x1041;
+ fiat_secp384r1_uint1 x1042;
+ uint32_t x1043;
+ uint32_t x1044;
+ uint32_t x1045;
+ uint32_t x1046;
+ uint32_t x1047;
+ uint32_t x1048;
+ uint32_t x1049;
+ uint32_t x1050;
+ uint32_t x1051;
+ fiat_secp384r1_uint1 x1052;
+ uint32_t x1053;
+ fiat_secp384r1_uint1 x1054;
+ uint32_t x1055;
+ fiat_secp384r1_uint1 x1056;
+ uint32_t x1057;
+ fiat_secp384r1_uint1 x1058;
+ uint32_t x1059;
+ fiat_secp384r1_uint1 x1060;
+ uint32_t x1061;
+ fiat_secp384r1_uint1 x1062;
+ uint32_t x1063;
+ fiat_secp384r1_uint1 x1064;
+ uint32_t x1065;
+ fiat_secp384r1_uint1 x1066;
+ uint32_t x1067;
+ fiat_secp384r1_uint1 x1068;
+ uint32_t x1069;
+ fiat_secp384r1_uint1 x1070;
+ uint32_t x1071;
+ fiat_secp384r1_uint1 x1072;
+ uint32_t x1073;
+ fiat_secp384r1_uint1 x1074;
+ uint32_t x1075;
+ fiat_secp384r1_uint1 x1076;
+ uint32_t x1077;
+ uint32_t x1078;
+ uint32_t x1079;
+ uint32_t x1080;
+ uint32_t x1081;
+ uint32_t x1082;
+ uint32_t x1083;
+ uint32_t x1084;
+ uint32_t x1085;
+ uint32_t x1086;
+ uint32_t x1087;
+ uint32_t x1088;
+ uint32_t x1089;
+ uint32_t x1090;
+ uint32_t x1091;
+ uint32_t x1092;
+ uint32_t x1093;
+ uint32_t x1094;
+ uint32_t x1095;
+ uint32_t x1096;
+ uint32_t x1097;
+ fiat_secp384r1_uint1 x1098;
+ uint32_t x1099;
+ fiat_secp384r1_uint1 x1100;
+ uint32_t x1101;
+ fiat_secp384r1_uint1 x1102;
+ uint32_t x1103;
+ fiat_secp384r1_uint1 x1104;
+ uint32_t x1105;
+ fiat_secp384r1_uint1 x1106;
+ uint32_t x1107;
+ fiat_secp384r1_uint1 x1108;
+ uint32_t x1109;
+ fiat_secp384r1_uint1 x1110;
+ uint32_t x1111;
+ fiat_secp384r1_uint1 x1112;
+ uint32_t x1113;
+ fiat_secp384r1_uint1 x1114;
+ uint32_t x1115;
+ fiat_secp384r1_uint1 x1116;
+ uint32_t x1117;
+ fiat_secp384r1_uint1 x1118;
+ uint32_t x1119;
+ fiat_secp384r1_uint1 x1120;
+ uint32_t x1121;
+ fiat_secp384r1_uint1 x1122;
+ uint32_t x1123;
+ fiat_secp384r1_uint1 x1124;
+ uint32_t x1125;
+ fiat_secp384r1_uint1 x1126;
+ uint32_t x1127;
+ fiat_secp384r1_uint1 x1128;
+ uint32_t x1129;
+ fiat_secp384r1_uint1 x1130;
+ uint32_t x1131;
+ fiat_secp384r1_uint1 x1132;
+ uint32_t x1133;
+ fiat_secp384r1_uint1 x1134;
+ uint32_t x1135;
+ fiat_secp384r1_uint1 x1136;
+ uint32_t x1137;
+ fiat_secp384r1_uint1 x1138;
+ uint32_t x1139;
+ fiat_secp384r1_uint1 x1140;
+ uint32_t x1141;
+ fiat_secp384r1_uint1 x1142;
+ uint32_t x1143;
+ fiat_secp384r1_uint1 x1144;
+ uint32_t x1145;
+ fiat_secp384r1_uint1 x1146;
+ uint32_t x1147;
+ fiat_secp384r1_uint1 x1148;
+ uint32_t x1149;
+ fiat_secp384r1_uint1 x1150;
+ uint32_t x1151;
+ fiat_secp384r1_uint1 x1152;
+ uint32_t x1153;
+ fiat_secp384r1_uint1 x1154;
+ uint32_t x1155;
+ fiat_secp384r1_uint1 x1156;
+ uint32_t x1157;
+ fiat_secp384r1_uint1 x1158;
+ uint32_t x1159;
+ fiat_secp384r1_uint1 x1160;
+ uint32_t x1161;
+ fiat_secp384r1_uint1 x1162;
+ uint32_t x1163;
+ fiat_secp384r1_uint1 x1164;
+ uint32_t x1165;
+ uint32_t x1166;
+ uint32_t x1167;
+ uint32_t x1168;
+ uint32_t x1169;
+ uint32_t x1170;
+ uint32_t x1171;
+ uint32_t x1172;
+ uint32_t x1173;
+ uint32_t x1174;
+ uint32_t x1175;
+ uint32_t x1176;
+ x1 = (arg1[1]);
+ x2 = (arg1[2]);
+ x3 = (arg1[3]);
+ x4 = (arg1[4]);
+ x5 = (arg1[5]);
+ x6 = (arg1[6]);
+ x7 = (arg1[7]);
+ x8 = (arg1[8]);
+ x9 = (arg1[9]);
+ x10 = (arg1[10]);
+ x11 = (arg1[11]);
+ x12 = (arg1[0]);
+ fiat_secp384r1_mulx_u32(&x13, &x14, x12, 0x2);
+ fiat_secp384r1_mulx_u32(&x15, &x16, x12, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x17, &x18, x12, 0x2);
+ fiat_secp384r1_mulx_u32(&x19, &x20, x12, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x21, &x22, 0x0, (fiat_secp384r1_uint1)x14,
+ x12);
+ fiat_secp384r1_mulx_u32(&x23, &x24, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x25, &x26, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x27, &x28, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x29, &x30, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x31, &x32, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x33, &x34, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x35, &x36, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x37, &x38, x12, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x39, &x40, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x41, &x42, x12, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x43, &x44, 0x0, x40, x37);
+ fiat_secp384r1_addcarryx_u32(&x45, &x46, x44, x38, x35);
+ fiat_secp384r1_addcarryx_u32(&x47, &x48, x46, x36, x33);
+ fiat_secp384r1_addcarryx_u32(&x49, &x50, x48, x34, x31);
+ fiat_secp384r1_addcarryx_u32(&x51, &x52, x50, x32, x29);
+ fiat_secp384r1_addcarryx_u32(&x53, &x54, x52, x30, x27);
+ fiat_secp384r1_addcarryx_u32(&x55, &x56, x54, x28, x25);
+ fiat_secp384r1_addcarryx_u32(&x57, &x58, x56, x26, x23);
+ fiat_secp384r1_addcarryx_u32(&x59, &x60, 0x0, x12, x41);
+ fiat_secp384r1_addcarryx_u32(&x61, &x62, x60, x19, x42);
+ fiat_secp384r1_addcarryx_u32(&x63, &x64, 0x0, x17, x39);
+ fiat_secp384r1_addcarryx_u32(&x65, &x66, x64, (fiat_secp384r1_uint1)x18,
+ x43);
+ fiat_secp384r1_addcarryx_u32(&x67, &x68, x66, x15, x45);
+ fiat_secp384r1_addcarryx_u32(&x69, &x70, x68, x16, x47);
+ fiat_secp384r1_addcarryx_u32(&x71, &x72, x70, x13, x49);
+ fiat_secp384r1_addcarryx_u32(&x73, &x74, x72, x21, x51);
+ fiat_secp384r1_addcarryx_u32(&x75, &x76, x74, x22, x53);
+ fiat_secp384r1_addcarryx_u32(&x77, &x78, x76, 0x0, x55);
+ fiat_secp384r1_addcarryx_u32(&x79, &x80, x78, 0x0, x57);
+ fiat_secp384r1_addcarryx_u32(&x81, &x82, x80, 0x0, (x58 + x24));
+ fiat_secp384r1_mulx_u32(&x83, &x84, x1, 0x2);
+ fiat_secp384r1_mulx_u32(&x85, &x86, x1, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x87, &x88, x1, 0x2);
+ fiat_secp384r1_mulx_u32(&x89, &x90, x1, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x91, &x92, 0x0, (fiat_secp384r1_uint1)x84,
+ x1);
+ fiat_secp384r1_addcarryx_u32(&x93, &x94, 0x0, x61, x1);
+ fiat_secp384r1_addcarryx_u32(&x95, &x96, x94, (x62 + x20), x89);
+ fiat_secp384r1_addcarryx_u32(&x97, &x98, x96, x63, x90);
+ fiat_secp384r1_addcarryx_u32(&x99, &x100, x98, x65, x87);
+ fiat_secp384r1_addcarryx_u32(&x101, &x102, x100, x67,
+ (fiat_secp384r1_uint1)x88);
+ fiat_secp384r1_addcarryx_u32(&x103, &x104, x102, x69, x85);
+ fiat_secp384r1_addcarryx_u32(&x105, &x106, x104, x71, x86);
+ fiat_secp384r1_addcarryx_u32(&x107, &x108, x106, x73, x83);
+ fiat_secp384r1_addcarryx_u32(&x109, &x110, x108, x75, x91);
+ fiat_secp384r1_addcarryx_u32(&x111, &x112, x110, x77, x92);
+ fiat_secp384r1_addcarryx_u32(&x113, &x114, x112, x79, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x115, &x116, x114, x81, 0x0);
+ fiat_secp384r1_mulx_u32(&x117, &x118, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x119, &x120, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x121, &x122, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x123, &x124, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x125, &x126, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x127, &x128, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x129, &x130, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x131, &x132, x93, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x133, &x134, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x135, &x136, x93, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x137, &x138, 0x0, x134, x131);
+ fiat_secp384r1_addcarryx_u32(&x139, &x140, x138, x132, x129);
+ fiat_secp384r1_addcarryx_u32(&x141, &x142, x140, x130, x127);
+ fiat_secp384r1_addcarryx_u32(&x143, &x144, x142, x128, x125);
+ fiat_secp384r1_addcarryx_u32(&x145, &x146, x144, x126, x123);
+ fiat_secp384r1_addcarryx_u32(&x147, &x148, x146, x124, x121);
+ fiat_secp384r1_addcarryx_u32(&x149, &x150, x148, x122, x119);
+ fiat_secp384r1_addcarryx_u32(&x151, &x152, x150, x120, x117);
+ fiat_secp384r1_addcarryx_u32(&x153, &x154, 0x0, x93, x135);
+ fiat_secp384r1_addcarryx_u32(&x155, &x156, x154, x95, x136);
+ fiat_secp384r1_addcarryx_u32(&x157, &x158, x156, x97, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x159, &x160, x158, x99, x133);
+ fiat_secp384r1_addcarryx_u32(&x161, &x162, x160, x101, x137);
+ fiat_secp384r1_addcarryx_u32(&x163, &x164, x162, x103, x139);
+ fiat_secp384r1_addcarryx_u32(&x165, &x166, x164, x105, x141);
+ fiat_secp384r1_addcarryx_u32(&x167, &x168, x166, x107, x143);
+ fiat_secp384r1_addcarryx_u32(&x169, &x170, x168, x109, x145);
+ fiat_secp384r1_addcarryx_u32(&x171, &x172, x170, x111, x147);
+ fiat_secp384r1_addcarryx_u32(&x173, &x174, x172, x113, x149);
+ fiat_secp384r1_addcarryx_u32(&x175, &x176, x174, x115, x151);
+ fiat_secp384r1_addcarryx_u32(&x177, &x178, x176, ((uint32_t)x116 + x82),
+ (x152 + x118));
+ fiat_secp384r1_mulx_u32(&x179, &x180, x2, 0x2);
+ fiat_secp384r1_mulx_u32(&x181, &x182, x2, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x183, &x184, x2, 0x2);
+ fiat_secp384r1_mulx_u32(&x185, &x186, x2, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x187, &x188, 0x0, (fiat_secp384r1_uint1)x180,
+ x2);
+ fiat_secp384r1_addcarryx_u32(&x189, &x190, 0x0, x155, x2);
+ fiat_secp384r1_addcarryx_u32(&x191, &x192, x190, x157, x185);
+ fiat_secp384r1_addcarryx_u32(&x193, &x194, x192, x159, x186);
+ fiat_secp384r1_addcarryx_u32(&x195, &x196, x194, x161, x183);
+ fiat_secp384r1_addcarryx_u32(&x197, &x198, x196, x163,
+ (fiat_secp384r1_uint1)x184);
+ fiat_secp384r1_addcarryx_u32(&x199, &x200, x198, x165, x181);
+ fiat_secp384r1_addcarryx_u32(&x201, &x202, x200, x167, x182);
+ fiat_secp384r1_addcarryx_u32(&x203, &x204, x202, x169, x179);
+ fiat_secp384r1_addcarryx_u32(&x205, &x206, x204, x171, x187);
+ fiat_secp384r1_addcarryx_u32(&x207, &x208, x206, x173, x188);
+ fiat_secp384r1_addcarryx_u32(&x209, &x210, x208, x175, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x211, &x212, x210, x177, 0x0);
+ fiat_secp384r1_mulx_u32(&x213, &x214, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x215, &x216, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x217, &x218, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x219, &x220, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x221, &x222, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x223, &x224, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x225, &x226, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x227, &x228, x189, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x229, &x230, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x231, &x232, x189, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x233, &x234, 0x0, x230, x227);
+ fiat_secp384r1_addcarryx_u32(&x235, &x236, x234, x228, x225);
+ fiat_secp384r1_addcarryx_u32(&x237, &x238, x236, x226, x223);
+ fiat_secp384r1_addcarryx_u32(&x239, &x240, x238, x224, x221);
+ fiat_secp384r1_addcarryx_u32(&x241, &x242, x240, x222, x219);
+ fiat_secp384r1_addcarryx_u32(&x243, &x244, x242, x220, x217);
+ fiat_secp384r1_addcarryx_u32(&x245, &x246, x244, x218, x215);
+ fiat_secp384r1_addcarryx_u32(&x247, &x248, x246, x216, x213);
+ fiat_secp384r1_addcarryx_u32(&x249, &x250, 0x0, x189, x231);
+ fiat_secp384r1_addcarryx_u32(&x251, &x252, x250, x191, x232);
+ fiat_secp384r1_addcarryx_u32(&x253, &x254, x252, x193, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x255, &x256, x254, x195, x229);
+ fiat_secp384r1_addcarryx_u32(&x257, &x258, x256, x197, x233);
+ fiat_secp384r1_addcarryx_u32(&x259, &x260, x258, x199, x235);
+ fiat_secp384r1_addcarryx_u32(&x261, &x262, x260, x201, x237);
+ fiat_secp384r1_addcarryx_u32(&x263, &x264, x262, x203, x239);
+ fiat_secp384r1_addcarryx_u32(&x265, &x266, x264, x205, x241);
+ fiat_secp384r1_addcarryx_u32(&x267, &x268, x266, x207, x243);
+ fiat_secp384r1_addcarryx_u32(&x269, &x270, x268, x209, x245);
+ fiat_secp384r1_addcarryx_u32(&x271, &x272, x270, x211, x247);
+ fiat_secp384r1_addcarryx_u32(&x273, &x274, x272, ((uint32_t)x212 + x178),
+ (x248 + x214));
+ fiat_secp384r1_mulx_u32(&x275, &x276, x3, 0x2);
+ fiat_secp384r1_mulx_u32(&x277, &x278, x3, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x279, &x280, x3, 0x2);
+ fiat_secp384r1_mulx_u32(&x281, &x282, x3, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x283, &x284, 0x0, (fiat_secp384r1_uint1)x276,
+ x3);
+ fiat_secp384r1_addcarryx_u32(&x285, &x286, 0x0, x251, x3);
+ fiat_secp384r1_addcarryx_u32(&x287, &x288, x286, x253, x281);
+ fiat_secp384r1_addcarryx_u32(&x289, &x290, x288, x255, x282);
+ fiat_secp384r1_addcarryx_u32(&x291, &x292, x290, x257, x279);
+ fiat_secp384r1_addcarryx_u32(&x293, &x294, x292, x259,
+ (fiat_secp384r1_uint1)x280);
+ fiat_secp384r1_addcarryx_u32(&x295, &x296, x294, x261, x277);
+ fiat_secp384r1_addcarryx_u32(&x297, &x298, x296, x263, x278);
+ fiat_secp384r1_addcarryx_u32(&x299, &x300, x298, x265, x275);
+ fiat_secp384r1_addcarryx_u32(&x301, &x302, x300, x267, x283);
+ fiat_secp384r1_addcarryx_u32(&x303, &x304, x302, x269, x284);
+ fiat_secp384r1_addcarryx_u32(&x305, &x306, x304, x271, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x307, &x308, x306, x273, 0x0);
+ fiat_secp384r1_mulx_u32(&x309, &x310, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x311, &x312, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x313, &x314, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x315, &x316, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x317, &x318, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x319, &x320, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x321, &x322, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x323, &x324, x285, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x325, &x326, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x327, &x328, x285, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x329, &x330, 0x0, x326, x323);
+ fiat_secp384r1_addcarryx_u32(&x331, &x332, x330, x324, x321);
+ fiat_secp384r1_addcarryx_u32(&x333, &x334, x332, x322, x319);
+ fiat_secp384r1_addcarryx_u32(&x335, &x336, x334, x320, x317);
+ fiat_secp384r1_addcarryx_u32(&x337, &x338, x336, x318, x315);
+ fiat_secp384r1_addcarryx_u32(&x339, &x340, x338, x316, x313);
+ fiat_secp384r1_addcarryx_u32(&x341, &x342, x340, x314, x311);
+ fiat_secp384r1_addcarryx_u32(&x343, &x344, x342, x312, x309);
+ fiat_secp384r1_addcarryx_u32(&x345, &x346, 0x0, x285, x327);
+ fiat_secp384r1_addcarryx_u32(&x347, &x348, x346, x287, x328);
+ fiat_secp384r1_addcarryx_u32(&x349, &x350, x348, x289, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x351, &x352, x350, x291, x325);
+ fiat_secp384r1_addcarryx_u32(&x353, &x354, x352, x293, x329);
+ fiat_secp384r1_addcarryx_u32(&x355, &x356, x354, x295, x331);
+ fiat_secp384r1_addcarryx_u32(&x357, &x358, x356, x297, x333);
+ fiat_secp384r1_addcarryx_u32(&x359, &x360, x358, x299, x335);
+ fiat_secp384r1_addcarryx_u32(&x361, &x362, x360, x301, x337);
+ fiat_secp384r1_addcarryx_u32(&x363, &x364, x362, x303, x339);
+ fiat_secp384r1_addcarryx_u32(&x365, &x366, x364, x305, x341);
+ fiat_secp384r1_addcarryx_u32(&x367, &x368, x366, x307, x343);
+ fiat_secp384r1_addcarryx_u32(&x369, &x370, x368, ((uint32_t)x308 + x274),
+ (x344 + x310));
+ fiat_secp384r1_mulx_u32(&x371, &x372, x4, 0x2);
+ fiat_secp384r1_mulx_u32(&x373, &x374, x4, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x375, &x376, x4, 0x2);
+ fiat_secp384r1_mulx_u32(&x377, &x378, x4, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x379, &x380, 0x0, (fiat_secp384r1_uint1)x372,
+ x4);
+ fiat_secp384r1_addcarryx_u32(&x381, &x382, 0x0, x347, x4);
+ fiat_secp384r1_addcarryx_u32(&x383, &x384, x382, x349, x377);
+ fiat_secp384r1_addcarryx_u32(&x385, &x386, x384, x351, x378);
+ fiat_secp384r1_addcarryx_u32(&x387, &x388, x386, x353, x375);
+ fiat_secp384r1_addcarryx_u32(&x389, &x390, x388, x355,
+ (fiat_secp384r1_uint1)x376);
+ fiat_secp384r1_addcarryx_u32(&x391, &x392, x390, x357, x373);
+ fiat_secp384r1_addcarryx_u32(&x393, &x394, x392, x359, x374);
+ fiat_secp384r1_addcarryx_u32(&x395, &x396, x394, x361, x371);
+ fiat_secp384r1_addcarryx_u32(&x397, &x398, x396, x363, x379);
+ fiat_secp384r1_addcarryx_u32(&x399, &x400, x398, x365, x380);
+ fiat_secp384r1_addcarryx_u32(&x401, &x402, x400, x367, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x403, &x404, x402, x369, 0x0);
+ fiat_secp384r1_mulx_u32(&x405, &x406, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x407, &x408, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x409, &x410, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x411, &x412, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x413, &x414, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x415, &x416, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x417, &x418, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x419, &x420, x381, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x421, &x422, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x423, &x424, x381, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x425, &x426, 0x0, x422, x419);
+ fiat_secp384r1_addcarryx_u32(&x427, &x428, x426, x420, x417);
+ fiat_secp384r1_addcarryx_u32(&x429, &x430, x428, x418, x415);
+ fiat_secp384r1_addcarryx_u32(&x431, &x432, x430, x416, x413);
+ fiat_secp384r1_addcarryx_u32(&x433, &x434, x432, x414, x411);
+ fiat_secp384r1_addcarryx_u32(&x435, &x436, x434, x412, x409);
+ fiat_secp384r1_addcarryx_u32(&x437, &x438, x436, x410, x407);
+ fiat_secp384r1_addcarryx_u32(&x439, &x440, x438, x408, x405);
+ fiat_secp384r1_addcarryx_u32(&x441, &x442, 0x0, x381, x423);
+ fiat_secp384r1_addcarryx_u32(&x443, &x444, x442, x383, x424);
+ fiat_secp384r1_addcarryx_u32(&x445, &x446, x444, x385, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x447, &x448, x446, x387, x421);
+ fiat_secp384r1_addcarryx_u32(&x449, &x450, x448, x389, x425);
+ fiat_secp384r1_addcarryx_u32(&x451, &x452, x450, x391, x427);
+ fiat_secp384r1_addcarryx_u32(&x453, &x454, x452, x393, x429);
+ fiat_secp384r1_addcarryx_u32(&x455, &x456, x454, x395, x431);
+ fiat_secp384r1_addcarryx_u32(&x457, &x458, x456, x397, x433);
+ fiat_secp384r1_addcarryx_u32(&x459, &x460, x458, x399, x435);
+ fiat_secp384r1_addcarryx_u32(&x461, &x462, x460, x401, x437);
+ fiat_secp384r1_addcarryx_u32(&x463, &x464, x462, x403, x439);
+ fiat_secp384r1_addcarryx_u32(&x465, &x466, x464, ((uint32_t)x404 + x370),
+ (x440 + x406));
+ fiat_secp384r1_mulx_u32(&x467, &x468, x5, 0x2);
+ fiat_secp384r1_mulx_u32(&x469, &x470, x5, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x471, &x472, x5, 0x2);
+ fiat_secp384r1_mulx_u32(&x473, &x474, x5, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x475, &x476, 0x0, (fiat_secp384r1_uint1)x468,
+ x5);
+ fiat_secp384r1_addcarryx_u32(&x477, &x478, 0x0, x443, x5);
+ fiat_secp384r1_addcarryx_u32(&x479, &x480, x478, x445, x473);
+ fiat_secp384r1_addcarryx_u32(&x481, &x482, x480, x447, x474);
+ fiat_secp384r1_addcarryx_u32(&x483, &x484, x482, x449, x471);
+ fiat_secp384r1_addcarryx_u32(&x485, &x486, x484, x451,
+ (fiat_secp384r1_uint1)x472);
+ fiat_secp384r1_addcarryx_u32(&x487, &x488, x486, x453, x469);
+ fiat_secp384r1_addcarryx_u32(&x489, &x490, x488, x455, x470);
+ fiat_secp384r1_addcarryx_u32(&x491, &x492, x490, x457, x467);
+ fiat_secp384r1_addcarryx_u32(&x493, &x494, x492, x459, x475);
+ fiat_secp384r1_addcarryx_u32(&x495, &x496, x494, x461, x476);
+ fiat_secp384r1_addcarryx_u32(&x497, &x498, x496, x463, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x499, &x500, x498, x465, 0x0);
+ fiat_secp384r1_mulx_u32(&x501, &x502, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x503, &x504, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x505, &x506, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x507, &x508, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x509, &x510, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x511, &x512, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x513, &x514, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x515, &x516, x477, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x517, &x518, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x519, &x520, x477, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x521, &x522, 0x0, x518, x515);
+ fiat_secp384r1_addcarryx_u32(&x523, &x524, x522, x516, x513);
+ fiat_secp384r1_addcarryx_u32(&x525, &x526, x524, x514, x511);
+ fiat_secp384r1_addcarryx_u32(&x527, &x528, x526, x512, x509);
+ fiat_secp384r1_addcarryx_u32(&x529, &x530, x528, x510, x507);
+ fiat_secp384r1_addcarryx_u32(&x531, &x532, x530, x508, x505);
+ fiat_secp384r1_addcarryx_u32(&x533, &x534, x532, x506, x503);
+ fiat_secp384r1_addcarryx_u32(&x535, &x536, x534, x504, x501);
+ fiat_secp384r1_addcarryx_u32(&x537, &x538, 0x0, x477, x519);
+ fiat_secp384r1_addcarryx_u32(&x539, &x540, x538, x479, x520);
+ fiat_secp384r1_addcarryx_u32(&x541, &x542, x540, x481, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x543, &x544, x542, x483, x517);
+ fiat_secp384r1_addcarryx_u32(&x545, &x546, x544, x485, x521);
+ fiat_secp384r1_addcarryx_u32(&x547, &x548, x546, x487, x523);
+ fiat_secp384r1_addcarryx_u32(&x549, &x550, x548, x489, x525);
+ fiat_secp384r1_addcarryx_u32(&x551, &x552, x550, x491, x527);
+ fiat_secp384r1_addcarryx_u32(&x553, &x554, x552, x493, x529);
+ fiat_secp384r1_addcarryx_u32(&x555, &x556, x554, x495, x531);
+ fiat_secp384r1_addcarryx_u32(&x557, &x558, x556, x497, x533);
+ fiat_secp384r1_addcarryx_u32(&x559, &x560, x558, x499, x535);
+ fiat_secp384r1_addcarryx_u32(&x561, &x562, x560, ((uint32_t)x500 + x466),
+ (x536 + x502));
+ fiat_secp384r1_mulx_u32(&x563, &x564, x6, 0x2);
+ fiat_secp384r1_mulx_u32(&x565, &x566, x6, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x567, &x568, x6, 0x2);
+ fiat_secp384r1_mulx_u32(&x569, &x570, x6, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x571, &x572, 0x0, (fiat_secp384r1_uint1)x564,
+ x6);
+ fiat_secp384r1_addcarryx_u32(&x573, &x574, 0x0, x539, x6);
+ fiat_secp384r1_addcarryx_u32(&x575, &x576, x574, x541, x569);
+ fiat_secp384r1_addcarryx_u32(&x577, &x578, x576, x543, x570);
+ fiat_secp384r1_addcarryx_u32(&x579, &x580, x578, x545, x567);
+ fiat_secp384r1_addcarryx_u32(&x581, &x582, x580, x547,
+ (fiat_secp384r1_uint1)x568);
+ fiat_secp384r1_addcarryx_u32(&x583, &x584, x582, x549, x565);
+ fiat_secp384r1_addcarryx_u32(&x585, &x586, x584, x551, x566);
+ fiat_secp384r1_addcarryx_u32(&x587, &x588, x586, x553, x563);
+ fiat_secp384r1_addcarryx_u32(&x589, &x590, x588, x555, x571);
+ fiat_secp384r1_addcarryx_u32(&x591, &x592, x590, x557, x572);
+ fiat_secp384r1_addcarryx_u32(&x593, &x594, x592, x559, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x595, &x596, x594, x561, 0x0);
+ fiat_secp384r1_mulx_u32(&x597, &x598, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x599, &x600, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x601, &x602, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x603, &x604, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x605, &x606, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x607, &x608, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x609, &x610, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x611, &x612, x573, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x613, &x614, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x615, &x616, x573, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x617, &x618, 0x0, x614, x611);
+ fiat_secp384r1_addcarryx_u32(&x619, &x620, x618, x612, x609);
+ fiat_secp384r1_addcarryx_u32(&x621, &x622, x620, x610, x607);
+ fiat_secp384r1_addcarryx_u32(&x623, &x624, x622, x608, x605);
+ fiat_secp384r1_addcarryx_u32(&x625, &x626, x624, x606, x603);
+ fiat_secp384r1_addcarryx_u32(&x627, &x628, x626, x604, x601);
+ fiat_secp384r1_addcarryx_u32(&x629, &x630, x628, x602, x599);
+ fiat_secp384r1_addcarryx_u32(&x631, &x632, x630, x600, x597);
+ fiat_secp384r1_addcarryx_u32(&x633, &x634, 0x0, x573, x615);
+ fiat_secp384r1_addcarryx_u32(&x635, &x636, x634, x575, x616);
+ fiat_secp384r1_addcarryx_u32(&x637, &x638, x636, x577, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x639, &x640, x638, x579, x613);
+ fiat_secp384r1_addcarryx_u32(&x641, &x642, x640, x581, x617);
+ fiat_secp384r1_addcarryx_u32(&x643, &x644, x642, x583, x619);
+ fiat_secp384r1_addcarryx_u32(&x645, &x646, x644, x585, x621);
+ fiat_secp384r1_addcarryx_u32(&x647, &x648, x646, x587, x623);
+ fiat_secp384r1_addcarryx_u32(&x649, &x650, x648, x589, x625);
+ fiat_secp384r1_addcarryx_u32(&x651, &x652, x650, x591, x627);
+ fiat_secp384r1_addcarryx_u32(&x653, &x654, x652, x593, x629);
+ fiat_secp384r1_addcarryx_u32(&x655, &x656, x654, x595, x631);
+ fiat_secp384r1_addcarryx_u32(&x657, &x658, x656, ((uint32_t)x596 + x562),
+ (x632 + x598));
+ fiat_secp384r1_mulx_u32(&x659, &x660, x7, 0x2);
+ fiat_secp384r1_mulx_u32(&x661, &x662, x7, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x663, &x664, x7, 0x2);
+ fiat_secp384r1_mulx_u32(&x665, &x666, x7, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x667, &x668, 0x0, (fiat_secp384r1_uint1)x660,
+ x7);
+ fiat_secp384r1_addcarryx_u32(&x669, &x670, 0x0, x635, x7);
+ fiat_secp384r1_addcarryx_u32(&x671, &x672, x670, x637, x665);
+ fiat_secp384r1_addcarryx_u32(&x673, &x674, x672, x639, x666);
+ fiat_secp384r1_addcarryx_u32(&x675, &x676, x674, x641, x663);
+ fiat_secp384r1_addcarryx_u32(&x677, &x678, x676, x643,
+ (fiat_secp384r1_uint1)x664);
+ fiat_secp384r1_addcarryx_u32(&x679, &x680, x678, x645, x661);
+ fiat_secp384r1_addcarryx_u32(&x681, &x682, x680, x647, x662);
+ fiat_secp384r1_addcarryx_u32(&x683, &x684, x682, x649, x659);
+ fiat_secp384r1_addcarryx_u32(&x685, &x686, x684, x651, x667);
+ fiat_secp384r1_addcarryx_u32(&x687, &x688, x686, x653, x668);
+ fiat_secp384r1_addcarryx_u32(&x689, &x690, x688, x655, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x691, &x692, x690, x657, 0x0);
+ fiat_secp384r1_mulx_u32(&x693, &x694, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x695, &x696, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x697, &x698, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x699, &x700, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x701, &x702, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x703, &x704, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x705, &x706, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x707, &x708, x669, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x709, &x710, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x711, &x712, x669, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x713, &x714, 0x0, x710, x707);
+ fiat_secp384r1_addcarryx_u32(&x715, &x716, x714, x708, x705);
+ fiat_secp384r1_addcarryx_u32(&x717, &x718, x716, x706, x703);
+ fiat_secp384r1_addcarryx_u32(&x719, &x720, x718, x704, x701);
+ fiat_secp384r1_addcarryx_u32(&x721, &x722, x720, x702, x699);
+ fiat_secp384r1_addcarryx_u32(&x723, &x724, x722, x700, x697);
+ fiat_secp384r1_addcarryx_u32(&x725, &x726, x724, x698, x695);
+ fiat_secp384r1_addcarryx_u32(&x727, &x728, x726, x696, x693);
+ fiat_secp384r1_addcarryx_u32(&x729, &x730, 0x0, x669, x711);
+ fiat_secp384r1_addcarryx_u32(&x731, &x732, x730, x671, x712);
+ fiat_secp384r1_addcarryx_u32(&x733, &x734, x732, x673, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x735, &x736, x734, x675, x709);
+ fiat_secp384r1_addcarryx_u32(&x737, &x738, x736, x677, x713);
+ fiat_secp384r1_addcarryx_u32(&x739, &x740, x738, x679, x715);
+ fiat_secp384r1_addcarryx_u32(&x741, &x742, x740, x681, x717);
+ fiat_secp384r1_addcarryx_u32(&x743, &x744, x742, x683, x719);
+ fiat_secp384r1_addcarryx_u32(&x745, &x746, x744, x685, x721);
+ fiat_secp384r1_addcarryx_u32(&x747, &x748, x746, x687, x723);
+ fiat_secp384r1_addcarryx_u32(&x749, &x750, x748, x689, x725);
+ fiat_secp384r1_addcarryx_u32(&x751, &x752, x750, x691, x727);
+ fiat_secp384r1_addcarryx_u32(&x753, &x754, x752, ((uint32_t)x692 + x658),
+ (x728 + x694));
+ fiat_secp384r1_mulx_u32(&x755, &x756, x8, 0x2);
+ fiat_secp384r1_mulx_u32(&x757, &x758, x8, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x759, &x760, x8, 0x2);
+ fiat_secp384r1_mulx_u32(&x761, &x762, x8, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x763, &x764, 0x0, (fiat_secp384r1_uint1)x756,
+ x8);
+ fiat_secp384r1_addcarryx_u32(&x765, &x766, 0x0, x731, x8);
+ fiat_secp384r1_addcarryx_u32(&x767, &x768, x766, x733, x761);
+ fiat_secp384r1_addcarryx_u32(&x769, &x770, x768, x735, x762);
+ fiat_secp384r1_addcarryx_u32(&x771, &x772, x770, x737, x759);
+ fiat_secp384r1_addcarryx_u32(&x773, &x774, x772, x739,
+ (fiat_secp384r1_uint1)x760);
+ fiat_secp384r1_addcarryx_u32(&x775, &x776, x774, x741, x757);
+ fiat_secp384r1_addcarryx_u32(&x777, &x778, x776, x743, x758);
+ fiat_secp384r1_addcarryx_u32(&x779, &x780, x778, x745, x755);
+ fiat_secp384r1_addcarryx_u32(&x781, &x782, x780, x747, x763);
+ fiat_secp384r1_addcarryx_u32(&x783, &x784, x782, x749, x764);
+ fiat_secp384r1_addcarryx_u32(&x785, &x786, x784, x751, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x787, &x788, x786, x753, 0x0);
+ fiat_secp384r1_mulx_u32(&x789, &x790, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x791, &x792, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x793, &x794, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x795, &x796, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x797, &x798, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x799, &x800, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x801, &x802, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x803, &x804, x765, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x805, &x806, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x807, &x808, x765, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x809, &x810, 0x0, x806, x803);
+ fiat_secp384r1_addcarryx_u32(&x811, &x812, x810, x804, x801);
+ fiat_secp384r1_addcarryx_u32(&x813, &x814, x812, x802, x799);
+ fiat_secp384r1_addcarryx_u32(&x815, &x816, x814, x800, x797);
+ fiat_secp384r1_addcarryx_u32(&x817, &x818, x816, x798, x795);
+ fiat_secp384r1_addcarryx_u32(&x819, &x820, x818, x796, x793);
+ fiat_secp384r1_addcarryx_u32(&x821, &x822, x820, x794, x791);
+ fiat_secp384r1_addcarryx_u32(&x823, &x824, x822, x792, x789);
+ fiat_secp384r1_addcarryx_u32(&x825, &x826, 0x0, x765, x807);
+ fiat_secp384r1_addcarryx_u32(&x827, &x828, x826, x767, x808);
+ fiat_secp384r1_addcarryx_u32(&x829, &x830, x828, x769, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x831, &x832, x830, x771, x805);
+ fiat_secp384r1_addcarryx_u32(&x833, &x834, x832, x773, x809);
+ fiat_secp384r1_addcarryx_u32(&x835, &x836, x834, x775, x811);
+ fiat_secp384r1_addcarryx_u32(&x837, &x838, x836, x777, x813);
+ fiat_secp384r1_addcarryx_u32(&x839, &x840, x838, x779, x815);
+ fiat_secp384r1_addcarryx_u32(&x841, &x842, x840, x781, x817);
+ fiat_secp384r1_addcarryx_u32(&x843, &x844, x842, x783, x819);
+ fiat_secp384r1_addcarryx_u32(&x845, &x846, x844, x785, x821);
+ fiat_secp384r1_addcarryx_u32(&x847, &x848, x846, x787, x823);
+ fiat_secp384r1_addcarryx_u32(&x849, &x850, x848, ((uint32_t)x788 + x754),
+ (x824 + x790));
+ fiat_secp384r1_mulx_u32(&x851, &x852, x9, 0x2);
+ fiat_secp384r1_mulx_u32(&x853, &x854, x9, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x855, &x856, x9, 0x2);
+ fiat_secp384r1_mulx_u32(&x857, &x858, x9, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x859, &x860, 0x0, (fiat_secp384r1_uint1)x852,
+ x9);
+ fiat_secp384r1_addcarryx_u32(&x861, &x862, 0x0, x827, x9);
+ fiat_secp384r1_addcarryx_u32(&x863, &x864, x862, x829, x857);
+ fiat_secp384r1_addcarryx_u32(&x865, &x866, x864, x831, x858);
+ fiat_secp384r1_addcarryx_u32(&x867, &x868, x866, x833, x855);
+ fiat_secp384r1_addcarryx_u32(&x869, &x870, x868, x835,
+ (fiat_secp384r1_uint1)x856);
+ fiat_secp384r1_addcarryx_u32(&x871, &x872, x870, x837, x853);
+ fiat_secp384r1_addcarryx_u32(&x873, &x874, x872, x839, x854);
+ fiat_secp384r1_addcarryx_u32(&x875, &x876, x874, x841, x851);
+ fiat_secp384r1_addcarryx_u32(&x877, &x878, x876, x843, x859);
+ fiat_secp384r1_addcarryx_u32(&x879, &x880, x878, x845, x860);
+ fiat_secp384r1_addcarryx_u32(&x881, &x882, x880, x847, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x883, &x884, x882, x849, 0x0);
+ fiat_secp384r1_mulx_u32(&x885, &x886, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x887, &x888, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x889, &x890, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x891, &x892, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x893, &x894, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x895, &x896, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x897, &x898, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x899, &x900, x861, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x901, &x902, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x903, &x904, x861, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x905, &x906, 0x0, x902, x899);
+ fiat_secp384r1_addcarryx_u32(&x907, &x908, x906, x900, x897);
+ fiat_secp384r1_addcarryx_u32(&x909, &x910, x908, x898, x895);
+ fiat_secp384r1_addcarryx_u32(&x911, &x912, x910, x896, x893);
+ fiat_secp384r1_addcarryx_u32(&x913, &x914, x912, x894, x891);
+ fiat_secp384r1_addcarryx_u32(&x915, &x916, x914, x892, x889);
+ fiat_secp384r1_addcarryx_u32(&x917, &x918, x916, x890, x887);
+ fiat_secp384r1_addcarryx_u32(&x919, &x920, x918, x888, x885);
+ fiat_secp384r1_addcarryx_u32(&x921, &x922, 0x0, x861, x903);
+ fiat_secp384r1_addcarryx_u32(&x923, &x924, x922, x863, x904);
+ fiat_secp384r1_addcarryx_u32(&x925, &x926, x924, x865, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x927, &x928, x926, x867, x901);
+ fiat_secp384r1_addcarryx_u32(&x929, &x930, x928, x869, x905);
+ fiat_secp384r1_addcarryx_u32(&x931, &x932, x930, x871, x907);
+ fiat_secp384r1_addcarryx_u32(&x933, &x934, x932, x873, x909);
+ fiat_secp384r1_addcarryx_u32(&x935, &x936, x934, x875, x911);
+ fiat_secp384r1_addcarryx_u32(&x937, &x938, x936, x877, x913);
+ fiat_secp384r1_addcarryx_u32(&x939, &x940, x938, x879, x915);
+ fiat_secp384r1_addcarryx_u32(&x941, &x942, x940, x881, x917);
+ fiat_secp384r1_addcarryx_u32(&x943, &x944, x942, x883, x919);
+ fiat_secp384r1_addcarryx_u32(&x945, &x946, x944, ((uint32_t)x884 + x850),
+ (x920 + x886));
+ fiat_secp384r1_mulx_u32(&x947, &x948, x10, 0x2);
+ fiat_secp384r1_mulx_u32(&x949, &x950, x10, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x951, &x952, x10, 0x2);
+ fiat_secp384r1_mulx_u32(&x953, &x954, x10, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x955, &x956, 0x0, (fiat_secp384r1_uint1)x948,
+ x10);
+ fiat_secp384r1_addcarryx_u32(&x957, &x958, 0x0, x923, x10);
+ fiat_secp384r1_addcarryx_u32(&x959, &x960, x958, x925, x953);
+ fiat_secp384r1_addcarryx_u32(&x961, &x962, x960, x927, x954);
+ fiat_secp384r1_addcarryx_u32(&x963, &x964, x962, x929, x951);
+ fiat_secp384r1_addcarryx_u32(&x965, &x966, x964, x931,
+ (fiat_secp384r1_uint1)x952);
+ fiat_secp384r1_addcarryx_u32(&x967, &x968, x966, x933, x949);
+ fiat_secp384r1_addcarryx_u32(&x969, &x970, x968, x935, x950);
+ fiat_secp384r1_addcarryx_u32(&x971, &x972, x970, x937, x947);
+ fiat_secp384r1_addcarryx_u32(&x973, &x974, x972, x939, x955);
+ fiat_secp384r1_addcarryx_u32(&x975, &x976, x974, x941, x956);
+ fiat_secp384r1_addcarryx_u32(&x977, &x978, x976, x943, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x979, &x980, x978, x945, 0x0);
+ fiat_secp384r1_mulx_u32(&x981, &x982, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x983, &x984, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x985, &x986, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x987, &x988, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x989, &x990, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x991, &x992, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x993, &x994, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x995, &x996, x957, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x997, &x998, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x999, &x1000, x957, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1001, &x1002, 0x0, x998, x995);
+ fiat_secp384r1_addcarryx_u32(&x1003, &x1004, x1002, x996, x993);
+ fiat_secp384r1_addcarryx_u32(&x1005, &x1006, x1004, x994, x991);
+ fiat_secp384r1_addcarryx_u32(&x1007, &x1008, x1006, x992, x989);
+ fiat_secp384r1_addcarryx_u32(&x1009, &x1010, x1008, x990, x987);
+ fiat_secp384r1_addcarryx_u32(&x1011, &x1012, x1010, x988, x985);
+ fiat_secp384r1_addcarryx_u32(&x1013, &x1014, x1012, x986, x983);
+ fiat_secp384r1_addcarryx_u32(&x1015, &x1016, x1014, x984, x981);
+ fiat_secp384r1_addcarryx_u32(&x1017, &x1018, 0x0, x957, x999);
+ fiat_secp384r1_addcarryx_u32(&x1019, &x1020, x1018, x959, x1000);
+ fiat_secp384r1_addcarryx_u32(&x1021, &x1022, x1020, x961, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1023, &x1024, x1022, x963, x997);
+ fiat_secp384r1_addcarryx_u32(&x1025, &x1026, x1024, x965, x1001);
+ fiat_secp384r1_addcarryx_u32(&x1027, &x1028, x1026, x967, x1003);
+ fiat_secp384r1_addcarryx_u32(&x1029, &x1030, x1028, x969, x1005);
+ fiat_secp384r1_addcarryx_u32(&x1031, &x1032, x1030, x971, x1007);
+ fiat_secp384r1_addcarryx_u32(&x1033, &x1034, x1032, x973, x1009);
+ fiat_secp384r1_addcarryx_u32(&x1035, &x1036, x1034, x975, x1011);
+ fiat_secp384r1_addcarryx_u32(&x1037, &x1038, x1036, x977, x1013);
+ fiat_secp384r1_addcarryx_u32(&x1039, &x1040, x1038, x979, x1015);
+ fiat_secp384r1_addcarryx_u32(&x1041, &x1042, x1040, ((uint32_t)x980 + x946),
+ (x1016 + x982));
+ fiat_secp384r1_mulx_u32(&x1043, &x1044, x11, 0x2);
+ fiat_secp384r1_mulx_u32(&x1045, &x1046, x11, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1047, &x1048, x11, 0x2);
+ fiat_secp384r1_mulx_u32(&x1049, &x1050, x11, UINT32_C(0xfffffffe));
+ fiat_secp384r1_addcarryx_u32(&x1051, &x1052, 0x0,
+ (fiat_secp384r1_uint1)x1044, x11);
+ fiat_secp384r1_addcarryx_u32(&x1053, &x1054, 0x0, x1019, x11);
+ fiat_secp384r1_addcarryx_u32(&x1055, &x1056, x1054, x1021, x1049);
+ fiat_secp384r1_addcarryx_u32(&x1057, &x1058, x1056, x1023, x1050);
+ fiat_secp384r1_addcarryx_u32(&x1059, &x1060, x1058, x1025, x1047);
+ fiat_secp384r1_addcarryx_u32(&x1061, &x1062, x1060, x1027,
+ (fiat_secp384r1_uint1)x1048);
+ fiat_secp384r1_addcarryx_u32(&x1063, &x1064, x1062, x1029, x1045);
+ fiat_secp384r1_addcarryx_u32(&x1065, &x1066, x1064, x1031, x1046);
+ fiat_secp384r1_addcarryx_u32(&x1067, &x1068, x1066, x1033, x1043);
+ fiat_secp384r1_addcarryx_u32(&x1069, &x1070, x1068, x1035, x1051);
+ fiat_secp384r1_addcarryx_u32(&x1071, &x1072, x1070, x1037, x1052);
+ fiat_secp384r1_addcarryx_u32(&x1073, &x1074, x1072, x1039, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1075, &x1076, x1074, x1041, 0x0);
+ fiat_secp384r1_mulx_u32(&x1077, &x1078, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1079, &x1080, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1081, &x1082, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1083, &x1084, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1085, &x1086, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1087, &x1088, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1089, &x1090, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1091, &x1092, x1053, UINT32_C(0xfffffffe));
+ fiat_secp384r1_mulx_u32(&x1093, &x1094, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_mulx_u32(&x1095, &x1096, x1053, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x1097, &x1098, 0x0, x1094, x1091);
+ fiat_secp384r1_addcarryx_u32(&x1099, &x1100, x1098, x1092, x1089);
+ fiat_secp384r1_addcarryx_u32(&x1101, &x1102, x1100, x1090, x1087);
+ fiat_secp384r1_addcarryx_u32(&x1103, &x1104, x1102, x1088, x1085);
+ fiat_secp384r1_addcarryx_u32(&x1105, &x1106, x1104, x1086, x1083);
+ fiat_secp384r1_addcarryx_u32(&x1107, &x1108, x1106, x1084, x1081);
+ fiat_secp384r1_addcarryx_u32(&x1109, &x1110, x1108, x1082, x1079);
+ fiat_secp384r1_addcarryx_u32(&x1111, &x1112, x1110, x1080, x1077);
+ fiat_secp384r1_addcarryx_u32(&x1113, &x1114, 0x0, x1053, x1095);
+ fiat_secp384r1_addcarryx_u32(&x1115, &x1116, x1114, x1055, x1096);
+ fiat_secp384r1_addcarryx_u32(&x1117, &x1118, x1116, x1057, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x1119, &x1120, x1118, x1059, x1093);
+ fiat_secp384r1_addcarryx_u32(&x1121, &x1122, x1120, x1061, x1097);
+ fiat_secp384r1_addcarryx_u32(&x1123, &x1124, x1122, x1063, x1099);
+ fiat_secp384r1_addcarryx_u32(&x1125, &x1126, x1124, x1065, x1101);
+ fiat_secp384r1_addcarryx_u32(&x1127, &x1128, x1126, x1067, x1103);
+ fiat_secp384r1_addcarryx_u32(&x1129, &x1130, x1128, x1069, x1105);
+ fiat_secp384r1_addcarryx_u32(&x1131, &x1132, x1130, x1071, x1107);
+ fiat_secp384r1_addcarryx_u32(&x1133, &x1134, x1132, x1073, x1109);
+ fiat_secp384r1_addcarryx_u32(&x1135, &x1136, x1134, x1075, x1111);
+ fiat_secp384r1_addcarryx_u32(&x1137, &x1138, x1136,
+ ((uint32_t)x1076 + x1042), (x1112 + x1078));
+ fiat_secp384r1_subborrowx_u32(&x1139, &x1140, 0x0, x1115,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1141, &x1142, x1140, x1117, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1143, &x1144, x1142, x1119, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x1145, &x1146, x1144, x1121,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1147, &x1148, x1146, x1123,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x1149, &x1150, x1148, x1125,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1151, &x1152, x1150, x1127,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1153, &x1154, x1152, x1129,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1155, &x1156, x1154, x1131,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1157, &x1158, x1156, x1133,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1159, &x1160, x1158, x1135,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1161, &x1162, x1160, x1137,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x1163, &x1164, x1162, x1138, 0x0);
+ fiat_secp384r1_cmovznz_u32(&x1165, x1164, x1139, x1115);
+ fiat_secp384r1_cmovznz_u32(&x1166, x1164, x1141, x1117);
+ fiat_secp384r1_cmovznz_u32(&x1167, x1164, x1143, x1119);
+ fiat_secp384r1_cmovznz_u32(&x1168, x1164, x1145, x1121);
+ fiat_secp384r1_cmovznz_u32(&x1169, x1164, x1147, x1123);
+ fiat_secp384r1_cmovznz_u32(&x1170, x1164, x1149, x1125);
+ fiat_secp384r1_cmovznz_u32(&x1171, x1164, x1151, x1127);
+ fiat_secp384r1_cmovznz_u32(&x1172, x1164, x1153, x1129);
+ fiat_secp384r1_cmovznz_u32(&x1173, x1164, x1155, x1131);
+ fiat_secp384r1_cmovznz_u32(&x1174, x1164, x1157, x1133);
+ fiat_secp384r1_cmovznz_u32(&x1175, x1164, x1159, x1135);
+ fiat_secp384r1_cmovznz_u32(&x1176, x1164, x1161, x1137);
+ out1[0] = x1165;
+ out1[1] = x1166;
+ out1[2] = x1167;
+ out1[3] = x1168;
+ out1[4] = x1169;
+ out1[5] = x1170;
+ out1[6] = x1171;
+ out1[7] = x1172;
+ out1[8] = x1173;
+ out1[9] = x1174;
+ out1[10] = x1175;
+ out1[11] = x1176;
+}
+
+/*
+ * The function fiat_secp384r1_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ */
+static void
+fiat_secp384r1_nonzero(uint32_t *out1, const uint32_t arg1[12])
+{
+ uint32_t x1;
+ x1 = ((arg1[0]) |
+ ((arg1[1]) |
+ ((arg1[2]) |
+ ((arg1[3]) |
+ ((arg1[4]) |
+ ((arg1[5]) |
+ ((arg1[6]) |
+ ((arg1[7]) |
+ ((arg1[8]) | ((arg1[9]) | ((arg1[10]) | (arg1[11]))))))))))));
+ *out1 = x1;
+}
+
+/*
+ * The function fiat_secp384r1_selectznz is a multi-limb conditional select.
+ *
+ * Postconditions:
+ * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static void
+fiat_secp384r1_selectznz(uint32_t out1[12],
+ fiat_secp384r1_uint1 arg1,
+ const uint32_t arg2[12],
+ const uint32_t arg3[12])
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ fiat_secp384r1_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
+ fiat_secp384r1_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
+ fiat_secp384r1_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
+ fiat_secp384r1_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
+ fiat_secp384r1_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
+ fiat_secp384r1_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
+ fiat_secp384r1_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
+ fiat_secp384r1_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
+ fiat_secp384r1_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8]));
+ fiat_secp384r1_cmovznz_u32(&x10, arg1, (arg2[9]), (arg3[9]));
+ fiat_secp384r1_cmovznz_u32(&x11, arg1, (arg2[10]), (arg3[10]));
+ fiat_secp384r1_cmovznz_u32(&x12, arg1, (arg2[11]), (arg3[11]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+ out1[6] = x7;
+ out1[7] = x8;
+ out1[8] = x9;
+ out1[9] = x10;
+ out1[10] = x11;
+ out1[11] = x12;
+}
+
+/*
+ * The function fiat_secp384r1_to_bytes serializes a field element NOT in the Montgomery domain to bytes in little-endian order.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg1 < m
+ * Postconditions:
+ * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..47]
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+ */
+static void
+fiat_secp384r1_to_bytes(uint8_t out1[48], const uint32_t arg1[12])
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint8_t x13;
+ uint32_t x14;
+ uint8_t x15;
+ uint32_t x16;
+ uint8_t x17;
+ uint8_t x18;
+ uint8_t x19;
+ uint32_t x20;
+ uint8_t x21;
+ uint32_t x22;
+ uint8_t x23;
+ uint8_t x24;
+ uint8_t x25;
+ uint32_t x26;
+ uint8_t x27;
+ uint32_t x28;
+ uint8_t x29;
+ uint8_t x30;
+ uint8_t x31;
+ uint32_t x32;
+ uint8_t x33;
+ uint32_t x34;
+ uint8_t x35;
+ uint8_t x36;
+ uint8_t x37;
+ uint32_t x38;
+ uint8_t x39;
+ uint32_t x40;
+ uint8_t x41;
+ uint8_t x42;
+ uint8_t x43;
+ uint32_t x44;
+ uint8_t x45;
+ uint32_t x46;
+ uint8_t x47;
+ uint8_t x48;
+ uint8_t x49;
+ uint32_t x50;
+ uint8_t x51;
+ uint32_t x52;
+ uint8_t x53;
+ uint8_t x54;
+ uint8_t x55;
+ uint32_t x56;
+ uint8_t x57;
+ uint32_t x58;
+ uint8_t x59;
+ uint8_t x60;
+ uint8_t x61;
+ uint32_t x62;
+ uint8_t x63;
+ uint32_t x64;
+ uint8_t x65;
+ uint8_t x66;
+ uint8_t x67;
+ uint32_t x68;
+ uint8_t x69;
+ uint32_t x70;
+ uint8_t x71;
+ uint8_t x72;
+ uint8_t x73;
+ uint32_t x74;
+ uint8_t x75;
+ uint32_t x76;
+ uint8_t x77;
+ uint8_t x78;
+ uint8_t x79;
+ uint32_t x80;
+ uint8_t x81;
+ uint32_t x82;
+ uint8_t x83;
+ uint8_t x84;
+ x1 = (arg1[11]);
+ x2 = (arg1[10]);
+ x3 = (arg1[9]);
+ x4 = (arg1[8]);
+ x5 = (arg1[7]);
+ x6 = (arg1[6]);
+ x7 = (arg1[5]);
+ x8 = (arg1[4]);
+ x9 = (arg1[3]);
+ x10 = (arg1[2]);
+ x11 = (arg1[1]);
+ x12 = (arg1[0]);
+ x13 = (uint8_t)(x12 & UINT8_C(0xff));
+ x14 = (x12 >> 8);
+ x15 = (uint8_t)(x14 & UINT8_C(0xff));
+ x16 = (x14 >> 8);
+ x17 = (uint8_t)(x16 & UINT8_C(0xff));
+ x18 = (uint8_t)(x16 >> 8);
+ x19 = (uint8_t)(x11 & UINT8_C(0xff));
+ x20 = (x11 >> 8);
+ x21 = (uint8_t)(x20 & UINT8_C(0xff));
+ x22 = (x20 >> 8);
+ x23 = (uint8_t)(x22 & UINT8_C(0xff));
+ x24 = (uint8_t)(x22 >> 8);
+ x25 = (uint8_t)(x10 & UINT8_C(0xff));
+ x26 = (x10 >> 8);
+ x27 = (uint8_t)(x26 & UINT8_C(0xff));
+ x28 = (x26 >> 8);
+ x29 = (uint8_t)(x28 & UINT8_C(0xff));
+ x30 = (uint8_t)(x28 >> 8);
+ x31 = (uint8_t)(x9 & UINT8_C(0xff));
+ x32 = (x9 >> 8);
+ x33 = (uint8_t)(x32 & UINT8_C(0xff));
+ x34 = (x32 >> 8);
+ x35 = (uint8_t)(x34 & UINT8_C(0xff));
+ x36 = (uint8_t)(x34 >> 8);
+ x37 = (uint8_t)(x8 & UINT8_C(0xff));
+ x38 = (x8 >> 8);
+ x39 = (uint8_t)(x38 & UINT8_C(0xff));
+ x40 = (x38 >> 8);
+ x41 = (uint8_t)(x40 & UINT8_C(0xff));
+ x42 = (uint8_t)(x40 >> 8);
+ x43 = (uint8_t)(x7 & UINT8_C(0xff));
+ x44 = (x7 >> 8);
+ x45 = (uint8_t)(x44 & UINT8_C(0xff));
+ x46 = (x44 >> 8);
+ x47 = (uint8_t)(x46 & UINT8_C(0xff));
+ x48 = (uint8_t)(x46 >> 8);
+ x49 = (uint8_t)(x6 & UINT8_C(0xff));
+ x50 = (x6 >> 8);
+ x51 = (uint8_t)(x50 & UINT8_C(0xff));
+ x52 = (x50 >> 8);
+ x53 = (uint8_t)(x52 & UINT8_C(0xff));
+ x54 = (uint8_t)(x52 >> 8);
+ x55 = (uint8_t)(x5 & UINT8_C(0xff));
+ x56 = (x5 >> 8);
+ x57 = (uint8_t)(x56 & UINT8_C(0xff));
+ x58 = (x56 >> 8);
+ x59 = (uint8_t)(x58 & UINT8_C(0xff));
+ x60 = (uint8_t)(x58 >> 8);
+ x61 = (uint8_t)(x4 & UINT8_C(0xff));
+ x62 = (x4 >> 8);
+ x63 = (uint8_t)(x62 & UINT8_C(0xff));
+ x64 = (x62 >> 8);
+ x65 = (uint8_t)(x64 & UINT8_C(0xff));
+ x66 = (uint8_t)(x64 >> 8);
+ x67 = (uint8_t)(x3 & UINT8_C(0xff));
+ x68 = (x3 >> 8);
+ x69 = (uint8_t)(x68 & UINT8_C(0xff));
+ x70 = (x68 >> 8);
+ x71 = (uint8_t)(x70 & UINT8_C(0xff));
+ x72 = (uint8_t)(x70 >> 8);
+ x73 = (uint8_t)(x2 & UINT8_C(0xff));
+ x74 = (x2 >> 8);
+ x75 = (uint8_t)(x74 & UINT8_C(0xff));
+ x76 = (x74 >> 8);
+ x77 = (uint8_t)(x76 & UINT8_C(0xff));
+ x78 = (uint8_t)(x76 >> 8);
+ x79 = (uint8_t)(x1 & UINT8_C(0xff));
+ x80 = (x1 >> 8);
+ x81 = (uint8_t)(x80 & UINT8_C(0xff));
+ x82 = (x80 >> 8);
+ x83 = (uint8_t)(x82 & UINT8_C(0xff));
+ x84 = (uint8_t)(x82 >> 8);
+ out1[0] = x13;
+ out1[1] = x15;
+ out1[2] = x17;
+ out1[3] = x18;
+ out1[4] = x19;
+ out1[5] = x21;
+ out1[6] = x23;
+ out1[7] = x24;
+ out1[8] = x25;
+ out1[9] = x27;
+ out1[10] = x29;
+ out1[11] = x30;
+ out1[12] = x31;
+ out1[13] = x33;
+ out1[14] = x35;
+ out1[15] = x36;
+ out1[16] = x37;
+ out1[17] = x39;
+ out1[18] = x41;
+ out1[19] = x42;
+ out1[20] = x43;
+ out1[21] = x45;
+ out1[22] = x47;
+ out1[23] = x48;
+ out1[24] = x49;
+ out1[25] = x51;
+ out1[26] = x53;
+ out1[27] = x54;
+ out1[28] = x55;
+ out1[29] = x57;
+ out1[30] = x59;
+ out1[31] = x60;
+ out1[32] = x61;
+ out1[33] = x63;
+ out1[34] = x65;
+ out1[35] = x66;
+ out1[36] = x67;
+ out1[37] = x69;
+ out1[38] = x71;
+ out1[39] = x72;
+ out1[40] = x73;
+ out1[41] = x75;
+ out1[42] = x77;
+ out1[43] = x78;
+ out1[44] = x79;
+ out1[45] = x81;
+ out1[46] = x83;
+ out1[47] = x84;
+}
+
+/*
+ * The function fiat_secp384r1_from_bytes deserializes a field element NOT in the Montgomery domain from bytes in little-endian order.
+ *
+ * Preconditions:
+ * 0 ≤ bytes_eval arg1 < m
+ * Postconditions:
+ * eval out1 mod m = bytes_eval arg1 mod m
+ * 0 ≤ eval out1 < m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static void
+fiat_secp384r1_from_bytes(uint32_t out1[12],
+ const uint8_t arg1[48])
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint8_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint8_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint8_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint8_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint8_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint8_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint8_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint8_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint8_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint8_t x40;
+ uint32_t x41;
+ uint32_t x42;
+ uint32_t x43;
+ uint8_t x44;
+ uint32_t x45;
+ uint32_t x46;
+ uint32_t x47;
+ uint8_t x48;
+ uint32_t x49;
+ uint32_t x50;
+ uint32_t x51;
+ uint32_t x52;
+ uint32_t x53;
+ uint32_t x54;
+ uint32_t x55;
+ uint32_t x56;
+ uint32_t x57;
+ uint32_t x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ uint32_t x63;
+ uint32_t x64;
+ uint32_t x65;
+ uint32_t x66;
+ uint32_t x67;
+ uint32_t x68;
+ uint32_t x69;
+ uint32_t x70;
+ uint32_t x71;
+ uint32_t x72;
+ uint32_t x73;
+ uint32_t x74;
+ uint32_t x75;
+ uint32_t x76;
+ uint32_t x77;
+ uint32_t x78;
+ uint32_t x79;
+ uint32_t x80;
+ uint32_t x81;
+ uint32_t x82;
+ uint32_t x83;
+ uint32_t x84;
+ x1 = ((uint32_t)(arg1[47]) << 24);
+ x2 = ((uint32_t)(arg1[46]) << 16);
+ x3 = ((uint32_t)(arg1[45]) << 8);
+ x4 = (arg1[44]);
+ x5 = ((uint32_t)(arg1[43]) << 24);
+ x6 = ((uint32_t)(arg1[42]) << 16);
+ x7 = ((uint32_t)(arg1[41]) << 8);
+ x8 = (arg1[40]);
+ x9 = ((uint32_t)(arg1[39]) << 24);
+ x10 = ((uint32_t)(arg1[38]) << 16);
+ x11 = ((uint32_t)(arg1[37]) << 8);
+ x12 = (arg1[36]);
+ x13 = ((uint32_t)(arg1[35]) << 24);
+ x14 = ((uint32_t)(arg1[34]) << 16);
+ x15 = ((uint32_t)(arg1[33]) << 8);
+ x16 = (arg1[32]);
+ x17 = ((uint32_t)(arg1[31]) << 24);
+ x18 = ((uint32_t)(arg1[30]) << 16);
+ x19 = ((uint32_t)(arg1[29]) << 8);
+ x20 = (arg1[28]);
+ x21 = ((uint32_t)(arg1[27]) << 24);
+ x22 = ((uint32_t)(arg1[26]) << 16);
+ x23 = ((uint32_t)(arg1[25]) << 8);
+ x24 = (arg1[24]);
+ x25 = ((uint32_t)(arg1[23]) << 24);
+ x26 = ((uint32_t)(arg1[22]) << 16);
+ x27 = ((uint32_t)(arg1[21]) << 8);
+ x28 = (arg1[20]);
+ x29 = ((uint32_t)(arg1[19]) << 24);
+ x30 = ((uint32_t)(arg1[18]) << 16);
+ x31 = ((uint32_t)(arg1[17]) << 8);
+ x32 = (arg1[16]);
+ x33 = ((uint32_t)(arg1[15]) << 24);
+ x34 = ((uint32_t)(arg1[14]) << 16);
+ x35 = ((uint32_t)(arg1[13]) << 8);
+ x36 = (arg1[12]);
+ x37 = ((uint32_t)(arg1[11]) << 24);
+ x38 = ((uint32_t)(arg1[10]) << 16);
+ x39 = ((uint32_t)(arg1[9]) << 8);
+ x40 = (arg1[8]);
+ x41 = ((uint32_t)(arg1[7]) << 24);
+ x42 = ((uint32_t)(arg1[6]) << 16);
+ x43 = ((uint32_t)(arg1[5]) << 8);
+ x44 = (arg1[4]);
+ x45 = ((uint32_t)(arg1[3]) << 24);
+ x46 = ((uint32_t)(arg1[2]) << 16);
+ x47 = ((uint32_t)(arg1[1]) << 8);
+ x48 = (arg1[0]);
+ x49 = (x47 + (uint32_t)x48);
+ x50 = (x46 + x49);
+ x51 = (x45 + x50);
+ x52 = (x43 + (uint32_t)x44);
+ x53 = (x42 + x52);
+ x54 = (x41 + x53);
+ x55 = (x39 + (uint32_t)x40);
+ x56 = (x38 + x55);
+ x57 = (x37 + x56);
+ x58 = (x35 + (uint32_t)x36);
+ x59 = (x34 + x58);
+ x60 = (x33 + x59);
+ x61 = (x31 + (uint32_t)x32);
+ x62 = (x30 + x61);
+ x63 = (x29 + x62);
+ x64 = (x27 + (uint32_t)x28);
+ x65 = (x26 + x64);
+ x66 = (x25 + x65);
+ x67 = (x23 + (uint32_t)x24);
+ x68 = (x22 + x67);
+ x69 = (x21 + x68);
+ x70 = (x19 + (uint32_t)x20);
+ x71 = (x18 + x70);
+ x72 = (x17 + x71);
+ x73 = (x15 + (uint32_t)x16);
+ x74 = (x14 + x73);
+ x75 = (x13 + x74);
+ x76 = (x11 + (uint32_t)x12);
+ x77 = (x10 + x76);
+ x78 = (x9 + x77);
+ x79 = (x7 + (uint32_t)x8);
+ x80 = (x6 + x79);
+ x81 = (x5 + x80);
+ x82 = (x3 + (uint32_t)x4);
+ x83 = (x2 + x82);
+ x84 = (x1 + x83);
+ out1[0] = x51;
+ out1[1] = x54;
+ out1[2] = x57;
+ out1[3] = x60;
+ out1[4] = x63;
+ out1[5] = x66;
+ out1[6] = x69;
+ out1[7] = x72;
+ out1[8] = x75;
+ out1[9] = x78;
+ out1[10] = x81;
+ out1[11] = x84;
+}
+
+/*
+ * The function fiat_secp384r1_divstep computes a divstep.
+ *
+ * Preconditions:
+ * 0 ≤ eval arg4 < m
+ * 0 ≤ eval arg5 < m
+ * Postconditions:
+ * out1 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then 1 - arg1 else 1 + arg1)
+ * twos_complement_eval out2 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then twos_complement_eval arg3 else twos_complement_eval arg2)
+ * twos_complement_eval out3 = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then ⌊(twos_complement_eval arg3 - twos_complement_eval arg2) / 2⌋ else ⌊(twos_complement_eval arg3 + (twos_complement_eval arg3 mod 2) * twos_complement_eval arg2) / 2⌋)
+ * eval (from_montgomery out4) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (2 * eval (from_montgomery arg5)) mod m else (2 * eval (from_montgomery arg4)) mod m)
+ * eval (from_montgomery out5) mod m = (if 0 < arg1 ∧ (twos_complement_eval arg3) is odd then (eval (from_montgomery arg4) - eval (from_montgomery arg4)) mod m else (eval (from_montgomery arg5) + (twos_complement_eval arg3 mod 2) * eval (from_montgomery arg4)) mod m)
+ * 0 ≤ eval out5 < m
+ * 0 ≤ eval out5 < m
+ * 0 ≤ eval out2 < m
+ * 0 ≤ eval out3 < m
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0xffffffff]
+ * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * arg4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * arg5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ * out2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * out3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * out4: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * out5: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static void
+fiat_secp384r1_divstep(
+ uint32_t *out1, uint32_t out2[13], uint32_t out3[13], uint32_t out4[12],
+ uint32_t out5[12], uint32_t arg1, const uint32_t arg2[13],
+ const uint32_t arg3[13], const uint32_t arg4[12], const uint32_t arg5[12])
+{
+ uint32_t x1;
+ fiat_secp384r1_uint1 x2;
+ fiat_secp384r1_uint1 x3;
+ uint32_t x4;
+ fiat_secp384r1_uint1 x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ fiat_secp384r1_uint1 x21;
+ uint32_t x22;
+ fiat_secp384r1_uint1 x23;
+ uint32_t x24;
+ fiat_secp384r1_uint1 x25;
+ uint32_t x26;
+ fiat_secp384r1_uint1 x27;
+ uint32_t x28;
+ fiat_secp384r1_uint1 x29;
+ uint32_t x30;
+ fiat_secp384r1_uint1 x31;
+ uint32_t x32;
+ fiat_secp384r1_uint1 x33;
+ uint32_t x34;
+ fiat_secp384r1_uint1 x35;
+ uint32_t x36;
+ fiat_secp384r1_uint1 x37;
+ uint32_t x38;
+ fiat_secp384r1_uint1 x39;
+ uint32_t x40;
+ fiat_secp384r1_uint1 x41;
+ uint32_t x42;
+ fiat_secp384r1_uint1 x43;
+ uint32_t x44;
+ fiat_secp384r1_uint1 x45;
+ uint32_t x46;
+ uint32_t x47;
+ uint32_t x48;
+ uint32_t x49;
+ uint32_t x50;
+ uint32_t x51;
+ uint32_t x52;
+ uint32_t x53;
+ uint32_t x54;
+ uint32_t x55;
+ uint32_t x56;
+ uint32_t x57;
+ uint32_t x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ uint32_t x63;
+ uint32_t x64;
+ uint32_t x65;
+ uint32_t x66;
+ uint32_t x67;
+ uint32_t x68;
+ uint32_t x69;
+ uint32_t x70;
+ uint32_t x71;
+ fiat_secp384r1_uint1 x72;
+ uint32_t x73;
+ fiat_secp384r1_uint1 x74;
+ uint32_t x75;
+ fiat_secp384r1_uint1 x76;
+ uint32_t x77;
+ fiat_secp384r1_uint1 x78;
+ uint32_t x79;
+ fiat_secp384r1_uint1 x80;
+ uint32_t x81;
+ fiat_secp384r1_uint1 x82;
+ uint32_t x83;
+ fiat_secp384r1_uint1 x84;
+ uint32_t x85;
+ fiat_secp384r1_uint1 x86;
+ uint32_t x87;
+ fiat_secp384r1_uint1 x88;
+ uint32_t x89;
+ fiat_secp384r1_uint1 x90;
+ uint32_t x91;
+ fiat_secp384r1_uint1 x92;
+ uint32_t x93;
+ fiat_secp384r1_uint1 x94;
+ uint32_t x95;
+ fiat_secp384r1_uint1 x96;
+ uint32_t x97;
+ fiat_secp384r1_uint1 x98;
+ uint32_t x99;
+ fiat_secp384r1_uint1 x100;
+ uint32_t x101;
+ fiat_secp384r1_uint1 x102;
+ uint32_t x103;
+ fiat_secp384r1_uint1 x104;
+ uint32_t x105;
+ fiat_secp384r1_uint1 x106;
+ uint32_t x107;
+ fiat_secp384r1_uint1 x108;
+ uint32_t x109;
+ fiat_secp384r1_uint1 x110;
+ uint32_t x111;
+ fiat_secp384r1_uint1 x112;
+ uint32_t x113;
+ fiat_secp384r1_uint1 x114;
+ uint32_t x115;
+ fiat_secp384r1_uint1 x116;
+ uint32_t x117;
+ fiat_secp384r1_uint1 x118;
+ uint32_t x119;
+ fiat_secp384r1_uint1 x120;
+ uint32_t x121;
+ uint32_t x122;
+ uint32_t x123;
+ uint32_t x124;
+ uint32_t x125;
+ uint32_t x126;
+ uint32_t x127;
+ uint32_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint32_t x133;
+ fiat_secp384r1_uint1 x134;
+ uint32_t x135;
+ fiat_secp384r1_uint1 x136;
+ uint32_t x137;
+ fiat_secp384r1_uint1 x138;
+ uint32_t x139;
+ fiat_secp384r1_uint1 x140;
+ uint32_t x141;
+ fiat_secp384r1_uint1 x142;
+ uint32_t x143;
+ fiat_secp384r1_uint1 x144;
+ uint32_t x145;
+ fiat_secp384r1_uint1 x146;
+ uint32_t x147;
+ fiat_secp384r1_uint1 x148;
+ uint32_t x149;
+ fiat_secp384r1_uint1 x150;
+ uint32_t x151;
+ fiat_secp384r1_uint1 x152;
+ uint32_t x153;
+ fiat_secp384r1_uint1 x154;
+ uint32_t x155;
+ fiat_secp384r1_uint1 x156;
+ uint32_t x157;
+ uint32_t x158;
+ fiat_secp384r1_uint1 x159;
+ uint32_t x160;
+ fiat_secp384r1_uint1 x161;
+ uint32_t x162;
+ fiat_secp384r1_uint1 x163;
+ uint32_t x164;
+ fiat_secp384r1_uint1 x165;
+ uint32_t x166;
+ fiat_secp384r1_uint1 x167;
+ uint32_t x168;
+ fiat_secp384r1_uint1 x169;
+ uint32_t x170;
+ fiat_secp384r1_uint1 x171;
+ uint32_t x172;
+ fiat_secp384r1_uint1 x173;
+ uint32_t x174;
+ fiat_secp384r1_uint1 x175;
+ uint32_t x176;
+ fiat_secp384r1_uint1 x177;
+ uint32_t x178;
+ fiat_secp384r1_uint1 x179;
+ uint32_t x180;
+ fiat_secp384r1_uint1 x181;
+ uint32_t x182;
+ uint32_t x183;
+ uint32_t x184;
+ uint32_t x185;
+ uint32_t x186;
+ uint32_t x187;
+ uint32_t x188;
+ uint32_t x189;
+ uint32_t x190;
+ uint32_t x191;
+ uint32_t x192;
+ uint32_t x193;
+ fiat_secp384r1_uint1 x194;
+ uint32_t x195;
+ uint32_t x196;
+ uint32_t x197;
+ uint32_t x198;
+ uint32_t x199;
+ uint32_t x200;
+ uint32_t x201;
+ uint32_t x202;
+ uint32_t x203;
+ uint32_t x204;
+ uint32_t x205;
+ uint32_t x206;
+ uint32_t x207;
+ uint32_t x208;
+ fiat_secp384r1_uint1 x209;
+ uint32_t x210;
+ fiat_secp384r1_uint1 x211;
+ uint32_t x212;
+ fiat_secp384r1_uint1 x213;
+ uint32_t x214;
+ fiat_secp384r1_uint1 x215;
+ uint32_t x216;
+ fiat_secp384r1_uint1 x217;
+ uint32_t x218;
+ fiat_secp384r1_uint1 x219;
+ uint32_t x220;
+ fiat_secp384r1_uint1 x221;
+ uint32_t x222;
+ fiat_secp384r1_uint1 x223;
+ uint32_t x224;
+ fiat_secp384r1_uint1 x225;
+ uint32_t x226;
+ fiat_secp384r1_uint1 x227;
+ uint32_t x228;
+ fiat_secp384r1_uint1 x229;
+ uint32_t x230;
+ fiat_secp384r1_uint1 x231;
+ uint32_t x232;
+ fiat_secp384r1_uint1 x233;
+ uint32_t x234;
+ uint32_t x235;
+ uint32_t x236;
+ uint32_t x237;
+ uint32_t x238;
+ uint32_t x239;
+ uint32_t x240;
+ uint32_t x241;
+ uint32_t x242;
+ uint32_t x243;
+ uint32_t x244;
+ uint32_t x245;
+ uint32_t x246;
+ fiat_secp384r1_uint1 x247;
+ uint32_t x248;
+ fiat_secp384r1_uint1 x249;
+ uint32_t x250;
+ fiat_secp384r1_uint1 x251;
+ uint32_t x252;
+ fiat_secp384r1_uint1 x253;
+ uint32_t x254;
+ fiat_secp384r1_uint1 x255;
+ uint32_t x256;
+ fiat_secp384r1_uint1 x257;
+ uint32_t x258;
+ fiat_secp384r1_uint1 x259;
+ uint32_t x260;
+ fiat_secp384r1_uint1 x261;
+ uint32_t x262;
+ fiat_secp384r1_uint1 x263;
+ uint32_t x264;
+ fiat_secp384r1_uint1 x265;
+ uint32_t x266;
+ fiat_secp384r1_uint1 x267;
+ uint32_t x268;
+ fiat_secp384r1_uint1 x269;
+ uint32_t x270;
+ fiat_secp384r1_uint1 x271;
+ uint32_t x272;
+ fiat_secp384r1_uint1 x273;
+ uint32_t x274;
+ fiat_secp384r1_uint1 x275;
+ uint32_t x276;
+ fiat_secp384r1_uint1 x277;
+ uint32_t x278;
+ fiat_secp384r1_uint1 x279;
+ uint32_t x280;
+ fiat_secp384r1_uint1 x281;
+ uint32_t x282;
+ fiat_secp384r1_uint1 x283;
+ uint32_t x284;
+ fiat_secp384r1_uint1 x285;
+ uint32_t x286;
+ fiat_secp384r1_uint1 x287;
+ uint32_t x288;
+ fiat_secp384r1_uint1 x289;
+ uint32_t x290;
+ fiat_secp384r1_uint1 x291;
+ uint32_t x292;
+ fiat_secp384r1_uint1 x293;
+ uint32_t x294;
+ fiat_secp384r1_uint1 x295;
+ uint32_t x296;
+ fiat_secp384r1_uint1 x297;
+ uint32_t x298;
+ uint32_t x299;
+ uint32_t x300;
+ uint32_t x301;
+ uint32_t x302;
+ uint32_t x303;
+ uint32_t x304;
+ uint32_t x305;
+ uint32_t x306;
+ uint32_t x307;
+ uint32_t x308;
+ uint32_t x309;
+ uint32_t x310;
+ uint32_t x311;
+ uint32_t x312;
+ uint32_t x313;
+ uint32_t x314;
+ uint32_t x315;
+ uint32_t x316;
+ uint32_t x317;
+ uint32_t x318;
+ uint32_t x319;
+ uint32_t x320;
+ uint32_t x321;
+ uint32_t x322;
+ uint32_t x323;
+ uint32_t x324;
+ uint32_t x325;
+ uint32_t x326;
+ uint32_t x327;
+ uint32_t x328;
+ uint32_t x329;
+ uint32_t x330;
+ uint32_t x331;
+ uint32_t x332;
+ uint32_t x333;
+ uint32_t x334;
+ fiat_secp384r1_addcarryx_u32(&x1, &x2, 0x0, (~arg1), 0x1);
+ x3 = (fiat_secp384r1_uint1)((fiat_secp384r1_uint1)(x1 >> 31) &
+ (fiat_secp384r1_uint1)((arg3[0]) & 0x1));
+ fiat_secp384r1_addcarryx_u32(&x4, &x5, 0x0, (~arg1), 0x1);
+ fiat_secp384r1_cmovznz_u32(&x6, x3, arg1, x4);
+ fiat_secp384r1_cmovznz_u32(&x7, x3, (arg2[0]), (arg3[0]));
+ fiat_secp384r1_cmovznz_u32(&x8, x3, (arg2[1]), (arg3[1]));
+ fiat_secp384r1_cmovznz_u32(&x9, x3, (arg2[2]), (arg3[2]));
+ fiat_secp384r1_cmovznz_u32(&x10, x3, (arg2[3]), (arg3[3]));
+ fiat_secp384r1_cmovznz_u32(&x11, x3, (arg2[4]), (arg3[4]));
+ fiat_secp384r1_cmovznz_u32(&x12, x3, (arg2[5]), (arg3[5]));
+ fiat_secp384r1_cmovznz_u32(&x13, x3, (arg2[6]), (arg3[6]));
+ fiat_secp384r1_cmovznz_u32(&x14, x3, (arg2[7]), (arg3[7]));
+ fiat_secp384r1_cmovznz_u32(&x15, x3, (arg2[8]), (arg3[8]));
+ fiat_secp384r1_cmovznz_u32(&x16, x3, (arg2[9]), (arg3[9]));
+ fiat_secp384r1_cmovznz_u32(&x17, x3, (arg2[10]), (arg3[10]));
+ fiat_secp384r1_cmovznz_u32(&x18, x3, (arg2[11]), (arg3[11]));
+ fiat_secp384r1_cmovznz_u32(&x19, x3, (arg2[12]), (arg3[12]));
+ fiat_secp384r1_addcarryx_u32(&x20, &x21, 0x0, 0x1, (~(arg2[0])));
+ fiat_secp384r1_addcarryx_u32(&x22, &x23, x21, 0x0, (~(arg2[1])));
+ fiat_secp384r1_addcarryx_u32(&x24, &x25, x23, 0x0, (~(arg2[2])));
+ fiat_secp384r1_addcarryx_u32(&x26, &x27, x25, 0x0, (~(arg2[3])));
+ fiat_secp384r1_addcarryx_u32(&x28, &x29, x27, 0x0, (~(arg2[4])));
+ fiat_secp384r1_addcarryx_u32(&x30, &x31, x29, 0x0, (~(arg2[5])));
+ fiat_secp384r1_addcarryx_u32(&x32, &x33, x31, 0x0, (~(arg2[6])));
+ fiat_secp384r1_addcarryx_u32(&x34, &x35, x33, 0x0, (~(arg2[7])));
+ fiat_secp384r1_addcarryx_u32(&x36, &x37, x35, 0x0, (~(arg2[8])));
+ fiat_secp384r1_addcarryx_u32(&x38, &x39, x37, 0x0, (~(arg2[9])));
+ fiat_secp384r1_addcarryx_u32(&x40, &x41, x39, 0x0, (~(arg2[10])));
+ fiat_secp384r1_addcarryx_u32(&x42, &x43, x41, 0x0, (~(arg2[11])));
+ fiat_secp384r1_addcarryx_u32(&x44, &x45, x43, 0x0, (~(arg2[12])));
+ fiat_secp384r1_cmovznz_u32(&x46, x3, (arg3[0]), x20);
+ fiat_secp384r1_cmovznz_u32(&x47, x3, (arg3[1]), x22);
+ fiat_secp384r1_cmovznz_u32(&x48, x3, (arg3[2]), x24);
+ fiat_secp384r1_cmovznz_u32(&x49, x3, (arg3[3]), x26);
+ fiat_secp384r1_cmovznz_u32(&x50, x3, (arg3[4]), x28);
+ fiat_secp384r1_cmovznz_u32(&x51, x3, (arg3[5]), x30);
+ fiat_secp384r1_cmovznz_u32(&x52, x3, (arg3[6]), x32);
+ fiat_secp384r1_cmovznz_u32(&x53, x3, (arg3[7]), x34);
+ fiat_secp384r1_cmovznz_u32(&x54, x3, (arg3[8]), x36);
+ fiat_secp384r1_cmovznz_u32(&x55, x3, (arg3[9]), x38);
+ fiat_secp384r1_cmovznz_u32(&x56, x3, (arg3[10]), x40);
+ fiat_secp384r1_cmovznz_u32(&x57, x3, (arg3[11]), x42);
+ fiat_secp384r1_cmovznz_u32(&x58, x3, (arg3[12]), x44);
+ fiat_secp384r1_cmovznz_u32(&x59, x3, (arg4[0]), (arg5[0]));
+ fiat_secp384r1_cmovznz_u32(&x60, x3, (arg4[1]), (arg5[1]));
+ fiat_secp384r1_cmovznz_u32(&x61, x3, (arg4[2]), (arg5[2]));
+ fiat_secp384r1_cmovznz_u32(&x62, x3, (arg4[3]), (arg5[3]));
+ fiat_secp384r1_cmovznz_u32(&x63, x3, (arg4[4]), (arg5[4]));
+ fiat_secp384r1_cmovznz_u32(&x64, x3, (arg4[5]), (arg5[5]));
+ fiat_secp384r1_cmovznz_u32(&x65, x3, (arg4[6]), (arg5[6]));
+ fiat_secp384r1_cmovznz_u32(&x66, x3, (arg4[7]), (arg5[7]));
+ fiat_secp384r1_cmovznz_u32(&x67, x3, (arg4[8]), (arg5[8]));
+ fiat_secp384r1_cmovznz_u32(&x68, x3, (arg4[9]), (arg5[9]));
+ fiat_secp384r1_cmovznz_u32(&x69, x3, (arg4[10]), (arg5[10]));
+ fiat_secp384r1_cmovznz_u32(&x70, x3, (arg4[11]), (arg5[11]));
+ fiat_secp384r1_addcarryx_u32(&x71, &x72, 0x0, x59, x59);
+ fiat_secp384r1_addcarryx_u32(&x73, &x74, x72, x60, x60);
+ fiat_secp384r1_addcarryx_u32(&x75, &x76, x74, x61, x61);
+ fiat_secp384r1_addcarryx_u32(&x77, &x78, x76, x62, x62);
+ fiat_secp384r1_addcarryx_u32(&x79, &x80, x78, x63, x63);
+ fiat_secp384r1_addcarryx_u32(&x81, &x82, x80, x64, x64);
+ fiat_secp384r1_addcarryx_u32(&x83, &x84, x82, x65, x65);
+ fiat_secp384r1_addcarryx_u32(&x85, &x86, x84, x66, x66);
+ fiat_secp384r1_addcarryx_u32(&x87, &x88, x86, x67, x67);
+ fiat_secp384r1_addcarryx_u32(&x89, &x90, x88, x68, x68);
+ fiat_secp384r1_addcarryx_u32(&x91, &x92, x90, x69, x69);
+ fiat_secp384r1_addcarryx_u32(&x93, &x94, x92, x70, x70);
+ fiat_secp384r1_subborrowx_u32(&x95, &x96, 0x0, x71, UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x97, &x98, x96, x73, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x99, &x100, x98, x75, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x101, &x102, x100, x77,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x103, &x104, x102, x79,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x105, &x106, x104, x81,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x107, &x108, x106, x83,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x109, &x110, x108, x85,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x111, &x112, x110, x87,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x113, &x114, x112, x89,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x115, &x116, x114, x91,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x117, &x118, x116, x93,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x119, &x120, x118, x94, 0x0);
+ x121 = (arg4[11]);
+ x122 = (arg4[10]);
+ x123 = (arg4[9]);
+ x124 = (arg4[8]);
+ x125 = (arg4[7]);
+ x126 = (arg4[6]);
+ x127 = (arg4[5]);
+ x128 = (arg4[4]);
+ x129 = (arg4[3]);
+ x130 = (arg4[2]);
+ x131 = (arg4[1]);
+ x132 = (arg4[0]);
+ fiat_secp384r1_subborrowx_u32(&x133, &x134, 0x0, 0x0, x132);
+ fiat_secp384r1_subborrowx_u32(&x135, &x136, x134, 0x0, x131);
+ fiat_secp384r1_subborrowx_u32(&x137, &x138, x136, 0x0, x130);
+ fiat_secp384r1_subborrowx_u32(&x139, &x140, x138, 0x0, x129);
+ fiat_secp384r1_subborrowx_u32(&x141, &x142, x140, 0x0, x128);
+ fiat_secp384r1_subborrowx_u32(&x143, &x144, x142, 0x0, x127);
+ fiat_secp384r1_subborrowx_u32(&x145, &x146, x144, 0x0, x126);
+ fiat_secp384r1_subborrowx_u32(&x147, &x148, x146, 0x0, x125);
+ fiat_secp384r1_subborrowx_u32(&x149, &x150, x148, 0x0, x124);
+ fiat_secp384r1_subborrowx_u32(&x151, &x152, x150, 0x0, x123);
+ fiat_secp384r1_subborrowx_u32(&x153, &x154, x152, 0x0, x122);
+ fiat_secp384r1_subborrowx_u32(&x155, &x156, x154, 0x0, x121);
+ fiat_secp384r1_cmovznz_u32(&x157, x156, 0x0, UINT32_C(0xffffffff));
+ fiat_secp384r1_addcarryx_u32(&x158, &x159, 0x0, x133, x157);
+ fiat_secp384r1_addcarryx_u32(&x160, &x161, x159, x135, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x162, &x163, x161, x137, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x164, &x165, x163, x139, x157);
+ fiat_secp384r1_addcarryx_u32(&x166, &x167, x165, x141,
+ (x157 & UINT32_C(0xfffffffe)));
+ fiat_secp384r1_addcarryx_u32(&x168, &x169, x167, x143, x157);
+ fiat_secp384r1_addcarryx_u32(&x170, &x171, x169, x145, x157);
+ fiat_secp384r1_addcarryx_u32(&x172, &x173, x171, x147, x157);
+ fiat_secp384r1_addcarryx_u32(&x174, &x175, x173, x149, x157);
+ fiat_secp384r1_addcarryx_u32(&x176, &x177, x175, x151, x157);
+ fiat_secp384r1_addcarryx_u32(&x178, &x179, x177, x153, x157);
+ fiat_secp384r1_addcarryx_u32(&x180, &x181, x179, x155, x157);
+ fiat_secp384r1_cmovznz_u32(&x182, x3, (arg5[0]), x158);
+ fiat_secp384r1_cmovznz_u32(&x183, x3, (arg5[1]), x160);
+ fiat_secp384r1_cmovznz_u32(&x184, x3, (arg5[2]), x162);
+ fiat_secp384r1_cmovznz_u32(&x185, x3, (arg5[3]), x164);
+ fiat_secp384r1_cmovznz_u32(&x186, x3, (arg5[4]), x166);
+ fiat_secp384r1_cmovznz_u32(&x187, x3, (arg5[5]), x168);
+ fiat_secp384r1_cmovznz_u32(&x188, x3, (arg5[6]), x170);
+ fiat_secp384r1_cmovznz_u32(&x189, x3, (arg5[7]), x172);
+ fiat_secp384r1_cmovznz_u32(&x190, x3, (arg5[8]), x174);
+ fiat_secp384r1_cmovznz_u32(&x191, x3, (arg5[9]), x176);
+ fiat_secp384r1_cmovznz_u32(&x192, x3, (arg5[10]), x178);
+ fiat_secp384r1_cmovznz_u32(&x193, x3, (arg5[11]), x180);
+ x194 = (fiat_secp384r1_uint1)(x46 & 0x1);
+ fiat_secp384r1_cmovznz_u32(&x195, x194, 0x0, x7);
+ fiat_secp384r1_cmovznz_u32(&x196, x194, 0x0, x8);
+ fiat_secp384r1_cmovznz_u32(&x197, x194, 0x0, x9);
+ fiat_secp384r1_cmovznz_u32(&x198, x194, 0x0, x10);
+ fiat_secp384r1_cmovznz_u32(&x199, x194, 0x0, x11);
+ fiat_secp384r1_cmovznz_u32(&x200, x194, 0x0, x12);
+ fiat_secp384r1_cmovznz_u32(&x201, x194, 0x0, x13);
+ fiat_secp384r1_cmovznz_u32(&x202, x194, 0x0, x14);
+ fiat_secp384r1_cmovznz_u32(&x203, x194, 0x0, x15);
+ fiat_secp384r1_cmovznz_u32(&x204, x194, 0x0, x16);
+ fiat_secp384r1_cmovznz_u32(&x205, x194, 0x0, x17);
+ fiat_secp384r1_cmovznz_u32(&x206, x194, 0x0, x18);
+ fiat_secp384r1_cmovznz_u32(&x207, x194, 0x0, x19);
+ fiat_secp384r1_addcarryx_u32(&x208, &x209, 0x0, x46, x195);
+ fiat_secp384r1_addcarryx_u32(&x210, &x211, x209, x47, x196);
+ fiat_secp384r1_addcarryx_u32(&x212, &x213, x211, x48, x197);
+ fiat_secp384r1_addcarryx_u32(&x214, &x215, x213, x49, x198);
+ fiat_secp384r1_addcarryx_u32(&x216, &x217, x215, x50, x199);
+ fiat_secp384r1_addcarryx_u32(&x218, &x219, x217, x51, x200);
+ fiat_secp384r1_addcarryx_u32(&x220, &x221, x219, x52, x201);
+ fiat_secp384r1_addcarryx_u32(&x222, &x223, x221, x53, x202);
+ fiat_secp384r1_addcarryx_u32(&x224, &x225, x223, x54, x203);
+ fiat_secp384r1_addcarryx_u32(&x226, &x227, x225, x55, x204);
+ fiat_secp384r1_addcarryx_u32(&x228, &x229, x227, x56, x205);
+ fiat_secp384r1_addcarryx_u32(&x230, &x231, x229, x57, x206);
+ fiat_secp384r1_addcarryx_u32(&x232, &x233, x231, x58, x207);
+ fiat_secp384r1_cmovznz_u32(&x234, x194, 0x0, x59);
+ fiat_secp384r1_cmovznz_u32(&x235, x194, 0x0, x60);
+ fiat_secp384r1_cmovznz_u32(&x236, x194, 0x0, x61);
+ fiat_secp384r1_cmovznz_u32(&x237, x194, 0x0, x62);
+ fiat_secp384r1_cmovznz_u32(&x238, x194, 0x0, x63);
+ fiat_secp384r1_cmovznz_u32(&x239, x194, 0x0, x64);
+ fiat_secp384r1_cmovznz_u32(&x240, x194, 0x0, x65);
+ fiat_secp384r1_cmovznz_u32(&x241, x194, 0x0, x66);
+ fiat_secp384r1_cmovznz_u32(&x242, x194, 0x0, x67);
+ fiat_secp384r1_cmovznz_u32(&x243, x194, 0x0, x68);
+ fiat_secp384r1_cmovznz_u32(&x244, x194, 0x0, x69);
+ fiat_secp384r1_cmovznz_u32(&x245, x194, 0x0, x70);
+ fiat_secp384r1_addcarryx_u32(&x246, &x247, 0x0, x182, x234);
+ fiat_secp384r1_addcarryx_u32(&x248, &x249, x247, x183, x235);
+ fiat_secp384r1_addcarryx_u32(&x250, &x251, x249, x184, x236);
+ fiat_secp384r1_addcarryx_u32(&x252, &x253, x251, x185, x237);
+ fiat_secp384r1_addcarryx_u32(&x254, &x255, x253, x186, x238);
+ fiat_secp384r1_addcarryx_u32(&x256, &x257, x255, x187, x239);
+ fiat_secp384r1_addcarryx_u32(&x258, &x259, x257, x188, x240);
+ fiat_secp384r1_addcarryx_u32(&x260, &x261, x259, x189, x241);
+ fiat_secp384r1_addcarryx_u32(&x262, &x263, x261, x190, x242);
+ fiat_secp384r1_addcarryx_u32(&x264, &x265, x263, x191, x243);
+ fiat_secp384r1_addcarryx_u32(&x266, &x267, x265, x192, x244);
+ fiat_secp384r1_addcarryx_u32(&x268, &x269, x267, x193, x245);
+ fiat_secp384r1_subborrowx_u32(&x270, &x271, 0x0, x246,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x272, &x273, x271, x248, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x274, &x275, x273, x250, 0x0);
+ fiat_secp384r1_subborrowx_u32(&x276, &x277, x275, x252,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x278, &x279, x277, x254,
+ UINT32_C(0xfffffffe));
+ fiat_secp384r1_subborrowx_u32(&x280, &x281, x279, x256,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x282, &x283, x281, x258,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x284, &x285, x283, x260,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x286, &x287, x285, x262,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x288, &x289, x287, x264,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x290, &x291, x289, x266,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x292, &x293, x291, x268,
+ UINT32_C(0xffffffff));
+ fiat_secp384r1_subborrowx_u32(&x294, &x295, x293, x269, 0x0);
+ fiat_secp384r1_addcarryx_u32(&x296, &x297, 0x0, x6, 0x1);
+ x298 = ((x208 >> 1) | ((x210 << 31) & UINT32_C(0xffffffff)));
+ x299 = ((x210 >> 1) | ((x212 << 31) & UINT32_C(0xffffffff)));
+ x300 = ((x212 >> 1) | ((x214 << 31) & UINT32_C(0xffffffff)));
+ x301 = ((x214 >> 1) | ((x216 << 31) & UINT32_C(0xffffffff)));
+ x302 = ((x216 >> 1) | ((x218 << 31) & UINT32_C(0xffffffff)));
+ x303 = ((x218 >> 1) | ((x220 << 31) & UINT32_C(0xffffffff)));
+ x304 = ((x220 >> 1) | ((x222 << 31) & UINT32_C(0xffffffff)));
+ x305 = ((x222 >> 1) | ((x224 << 31) & UINT32_C(0xffffffff)));
+ x306 = ((x224 >> 1) | ((x226 << 31) & UINT32_C(0xffffffff)));
+ x307 = ((x226 >> 1) | ((x228 << 31) & UINT32_C(0xffffffff)));
+ x308 = ((x228 >> 1) | ((x230 << 31) & UINT32_C(0xffffffff)));
+ x309 = ((x230 >> 1) | ((x232 << 31) & UINT32_C(0xffffffff)));
+ x310 = ((x232 & UINT32_C(0x80000000)) | (x232 >> 1));
+ fiat_secp384r1_cmovznz_u32(&x311, x120, x95, x71);
+ fiat_secp384r1_cmovznz_u32(&x312, x120, x97, x73);
+ fiat_secp384r1_cmovznz_u32(&x313, x120, x99, x75);
+ fiat_secp384r1_cmovznz_u32(&x314, x120, x101, x77);
+ fiat_secp384r1_cmovznz_u32(&x315, x120, x103, x79);
+ fiat_secp384r1_cmovznz_u32(&x316, x120, x105, x81);
+ fiat_secp384r1_cmovznz_u32(&x317, x120, x107, x83);
+ fiat_secp384r1_cmovznz_u32(&x318, x120, x109, x85);
+ fiat_secp384r1_cmovznz_u32(&x319, x120, x111, x87);
+ fiat_secp384r1_cmovznz_u32(&x320, x120, x113, x89);
+ fiat_secp384r1_cmovznz_u32(&x321, x120, x115, x91);
+ fiat_secp384r1_cmovznz_u32(&x322, x120, x117, x93);
+ fiat_secp384r1_cmovznz_u32(&x323, x295, x270, x246);
+ fiat_secp384r1_cmovznz_u32(&x324, x295, x272, x248);
+ fiat_secp384r1_cmovznz_u32(&x325, x295, x274, x250);
+ fiat_secp384r1_cmovznz_u32(&x326, x295, x276, x252);
+ fiat_secp384r1_cmovznz_u32(&x327, x295, x278, x254);
+ fiat_secp384r1_cmovznz_u32(&x328, x295, x280, x256);
+ fiat_secp384r1_cmovznz_u32(&x329, x295, x282, x258);
+ fiat_secp384r1_cmovznz_u32(&x330, x295, x284, x260);
+ fiat_secp384r1_cmovznz_u32(&x331, x295, x286, x262);
+ fiat_secp384r1_cmovznz_u32(&x332, x295, x288, x264);
+ fiat_secp384r1_cmovznz_u32(&x333, x295, x290, x266);
+ fiat_secp384r1_cmovznz_u32(&x334, x295, x292, x268);
+ *out1 = x296;
+ out2[0] = x7;
+ out2[1] = x8;
+ out2[2] = x9;
+ out2[3] = x10;
+ out2[4] = x11;
+ out2[5] = x12;
+ out2[6] = x13;
+ out2[7] = x14;
+ out2[8] = x15;
+ out2[9] = x16;
+ out2[10] = x17;
+ out2[11] = x18;
+ out2[12] = x19;
+ out3[0] = x298;
+ out3[1] = x299;
+ out3[2] = x300;
+ out3[3] = x301;
+ out3[4] = x302;
+ out3[5] = x303;
+ out3[6] = x304;
+ out3[7] = x305;
+ out3[8] = x306;
+ out3[9] = x307;
+ out3[10] = x308;
+ out3[11] = x309;
+ out3[12] = x310;
+ out4[0] = x311;
+ out4[1] = x312;
+ out4[2] = x313;
+ out4[3] = x314;
+ out4[4] = x315;
+ out4[5] = x316;
+ out4[6] = x317;
+ out4[7] = x318;
+ out4[8] = x319;
+ out4[9] = x320;
+ out4[10] = x321;
+ out4[11] = x322;
+ out5[0] = x323;
+ out5[1] = x324;
+ out5[2] = x325;
+ out5[3] = x326;
+ out5[4] = x327;
+ out5[5] = x328;
+ out5[6] = x329;
+ out5[7] = x330;
+ out5[8] = x331;
+ out5[9] = x332;
+ out5[10] = x333;
+ out5[11] = x334;
+}
+
+/* END verbatim fiat code */
+
+/* curve-related constants */
+
+static const limb_t const_one[12] = {
+ UINT32_C(0x00000001), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF),
+ UINT32_C(0x00000000), UINT32_C(0x00000001), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000)
+};
+
+static const limb_t const_b[12] = {
+ UINT32_C(0x9D412DCC), UINT32_C(0x08118871), UINT32_C(0x7A4C32EC),
+ UINT32_C(0xF729ADD8), UINT32_C(0x1920022E), UINT32_C(0x77F2209B),
+ UINT32_C(0x94938AE2), UINT32_C(0xE3374BEE), UINT32_C(0x1F022094),
+ UINT32_C(0xB62B21F4), UINT32_C(0x604FBFF9), UINT32_C(0xCD08114B)
+};
+
+static const limb_t const_divstep[12] = {
+ UINT32_C(0x00005000), UINT32_C(0xFFFFC800), UINT32_C(0xFFFF83FF),
+ UINT32_C(0xFFFFB3FF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFF7FF),
+ UINT32_C(0xFFFFEFFF), UINT32_C(0xFFFFEBFF), UINT32_C(0xFFFFF3FF),
+ UINT32_C(0x00000BFF), UINT32_C(0x00003000), UINT32_C(0x00005000)
+};
+
+static const limb_t const_psat[12] = {
+ UINT32_C(0xFFFFFFFF), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFE), UINT32_C(0xFFFFFFFF),
+ UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF),
+ UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFF)
+};
+
+/* LUT for scalar multiplication by comb interleaving */
+static const pt_aff_t lut_cmb[21][16] = {
+ {
+ { { UINT32_C(0x49C0B528), UINT32_C(0x3DD07566), UINT32_C(0xA0D6CE38),
+ UINT32_C(0x20E378E2), UINT32_C(0x541B4D6E), UINT32_C(0x879C3AFC),
+ UINT32_C(0x59A30EFF), UINT32_C(0x64548684), UINT32_C(0x614EDE2B),
+ UINT32_C(0x812FF723), UINT32_C(0x299E1513), UINT32_C(0x4D3AADC2) },
+ { UINT32_C(0x4B03A4FE), UINT32_C(0x23043DAD), UINT32_C(0x7BB4A9AC),
+ UINT32_C(0xA1BFA8BF), UINT32_C(0x2E83B050), UINT32_C(0x8BADE756),
+ UINT32_C(0x68F4FFD9), UINT32_C(0xC6C35219), UINT32_C(0x3969A840),
+ UINT32_C(0xDD800226), UINT32_C(0x5A15C5E9), UINT32_C(0x2B78ABC2) } },
+ { { UINT32_C(0xC1DC4073), UINT32_C(0x05E4DBE6), UINT32_C(0xF04F779C),
+ UINT32_C(0xC54EA9FF), UINT32_C(0xA170CCF0), UINT32_C(0x6B2034E9),
+ UINT32_C(0xD51C6C3E), UINT32_C(0x3A48D732), UINT32_C(0x263AA470),
+ UINT32_C(0xE36F7E2D), UINT32_C(0xE7C1C3AC), UINT32_C(0xD283FE68) },
+ { UINT32_C(0xC04EE157), UINT32_C(0x7E284821), UINT32_C(0x7AE0E36D),
+ UINT32_C(0x92D789A7), UINT32_C(0x4EF67446), UINT32_C(0x132663C0),
+ UINT32_C(0xD2E1D0B4), UINT32_C(0x68012D5A), UINT32_C(0x5102B339),
+ UINT32_C(0xF6DB68B1), UINT32_C(0x983292AF), UINT32_C(0x465465FC) } },
+ { { UINT32_C(0x68F1F0DF), UINT32_C(0xBB595EBA), UINT32_C(0xCC873466),
+ UINT32_C(0xC185C0CB), UINT32_C(0x293C703B), UINT32_C(0x7F1EB1B5),
+ UINT32_C(0xAACC05E6), UINT32_C(0x60DB2CF5), UINT32_C(0xE2E8E4C6),
+ UINT32_C(0xC676B987), UINT32_C(0x1D178FFB), UINT32_C(0xE1BB26B1) },
+ { UINT32_C(0x7073FA21), UINT32_C(0x2B694BA0), UINT32_C(0x72F34566),
+ UINT32_C(0x22C16E2E), UINT32_C(0x01C35B99), UINT32_C(0x80B61B31),
+ UINT32_C(0x982C0411), UINT32_C(0x4B237FAF), UINT32_C(0x24DE236D),
+ UINT32_C(0xE6C59440), UINT32_C(0xE209E4A3), UINT32_C(0x4DB1C9D6) } },
+ { { UINT32_C(0x7D69222B), UINT32_C(0xDF13B9D1), UINT32_C(0x874774B1),
+ UINT32_C(0x4CE6415F), UINT32_C(0x211FAA95), UINT32_C(0x731EDCF8),
+ UINT32_C(0x659753ED), UINT32_C(0x5F4215D1), UINT32_C(0x9DB2DF55),
+ UINT32_C(0xF893DB58), UINT32_C(0x1C89025B), UINT32_C(0x932C9F81) },
+ { UINT32_C(0x7706A61E), UINT32_C(0x0996B220), UINT32_C(0xA8641C79),
+ UINT32_C(0x135349D5), UINT32_C(0x50130844), UINT32_C(0x65AAD76F),
+ UINT32_C(0x01FFF780), UINT32_C(0x0FF37C04), UINT32_C(0x693B0706),
+ UINT32_C(0xF57F238E), UINT32_C(0xAF6C9B3E), UINT32_C(0xD90A16B6) } },
+ { { UINT32_C(0x2353B92F), UINT32_C(0x2F5D200E), UINT32_C(0x3FD7E4F9),
+ UINT32_C(0xE35D8729), UINT32_C(0xA96D745D), UINT32_C(0x26094833),
+ UINT32_C(0x3CBFFF3F), UINT32_C(0xDC351DC1), UINT32_C(0xDAD54D6A),
+ UINT32_C(0x26D464C6), UINT32_C(0x53636C6A), UINT32_C(0x5CAB1D1D) },
+ { UINT32_C(0xB18EC0B0), UINT32_C(0xF2813072), UINT32_C(0xD742AA2F),
+ UINT32_C(0x3777E270), UINT32_C(0x033CA7C2), UINT32_C(0x27F061C7),
+ UINT32_C(0x68EAD0D8), UINT32_C(0xA6ECACCC), UINT32_C(0xEE69A754),
+ UINT32_C(0x7D9429F4), UINT32_C(0x31E8F5C6), UINT32_C(0xE7706334) } },
+ { { UINT32_C(0xB68B8C7D), UINT32_C(0xC7708B19), UINT32_C(0x44377ABA),
+ UINT32_C(0x4532077C), UINT32_C(0x6CDAD64F), UINT32_C(0x0DCC6770),
+ UINT32_C(0x147B6602), UINT32_C(0x01B8BF56), UINT32_C(0xF0561D79),
+ UINT32_C(0xF8D89885), UINT32_C(0x7BA9C437), UINT32_C(0x9C19E9FC) },
+ { UINT32_C(0xBDC4BA25), UINT32_C(0x764EB146), UINT32_C(0xAC144B83),
+ UINT32_C(0x604FE46B), UINT32_C(0x8A77E780), UINT32_C(0x3CE81329),
+ UINT32_C(0xFE9E682E), UINT32_C(0x2E070F36), UINT32_C(0x3A53287A),
+ UINT32_C(0x41821D0C), UINT32_C(0x3533F918), UINT32_C(0x9AA62F9F) } },
+ { { UINT32_C(0x75CCBDFB), UINT32_C(0x9B7AEB7E), UINT32_C(0xF6749A95),
+ UINT32_C(0xB25E28C5), UINT32_C(0x33B7D4AE), UINT32_C(0x8A7A8E46),
+ UINT32_C(0xD9C1BD56), UINT32_C(0xDB5203A8), UINT32_C(0xED22DF97),
+ UINT32_C(0xD2657265), UINT32_C(0x8CF23C94), UINT32_C(0xB51C56E1) },
+ { UINT32_C(0x6C3D812D), UINT32_C(0xF4D39459), UINT32_C(0x87CAE0C2),
+ UINT32_C(0xD8E88F1A), UINT32_C(0xCF4D0FE3), UINT32_C(0x789A2A48),
+ UINT32_C(0xFEC38D60), UINT32_C(0xB7FEAC2D), UINT32_C(0x3B490EC3),
+ UINT32_C(0x81FDBD1C), UINT32_C(0xCC6979E1), UINT32_C(0x4617ADB7) } },
+ { { UINT32_C(0x4709F4A9), UINT32_C(0x446AD888), UINT32_C(0xEC3DABD8),
+ UINT32_C(0x2B7210E2), UINT32_C(0x50E07B34), UINT32_C(0x83CCF195),
+ UINT32_C(0x789B3075), UINT32_C(0x59500917), UINT32_C(0xEB085993),
+ UINT32_C(0x0FC01FD4), UINT32_C(0x4903026B), UINT32_C(0xFB62D26F) },
+ { UINT32_C(0x6FE989BB), UINT32_C(0x2309CC9D), UINT32_C(0x144BD586),
+ UINT32_C(0x61609CBD), UINT32_C(0xDE06610C), UINT32_C(0x4B23D3A0),
+ UINT32_C(0xD898F470), UINT32_C(0xDDDC2866), UINT32_C(0x400C5797),
+ UINT32_C(0x8733FC41), UINT32_C(0xD0BC2716), UINT32_C(0x5A68C6FE) } },
+ { { UINT32_C(0x4B4A3CD0), UINT32_C(0x8903E130), UINT32_C(0x8FF1F43E),
+ UINT32_C(0x3EA4EA4C), UINT32_C(0xF655A10D), UINT32_C(0xE6FC3F2A),
+ UINT32_C(0x524FFEFC), UINT32_C(0x7BE3737D), UINT32_C(0x5330455E),
+ UINT32_C(0x9F692855), UINT32_C(0xE475CE70), UINT32_C(0x524F166E) },
+ { UINT32_C(0x6C12F055), UINT32_C(0x3FCC69CD), UINT32_C(0xD5B9C0DA),
+ UINT32_C(0x4E23B6FF), UINT32_C(0x336BF183), UINT32_C(0x49CE6993),
+ UINT32_C(0x4A54504A), UINT32_C(0xF87D6D85), UINT32_C(0xB3C2677A),
+ UINT32_C(0x25EB5DF1), UINT32_C(0x55B164C9), UINT32_C(0xAC37986F) } },
+ { { UINT32_C(0xBAA84C08), UINT32_C(0x82A2ED4A), UINT32_C(0x41A8C912),
+ UINT32_C(0x22C4CC5F), UINT32_C(0x154AAD5E), UINT32_C(0xCA109C3B),
+ UINT32_C(0xFC38538E), UINT32_C(0x23891298), UINT32_C(0x539802AE),
+ UINT32_C(0xB3B6639C), UINT32_C(0x0390D706), UINT32_C(0xFA0F1F45) },
+ { UINT32_C(0xB0DC21D0), UINT32_C(0x46B78E5D), UINT32_C(0xC3DA2EAC),
+ UINT32_C(0xA8C72D3C), UINT32_C(0x6FF2F643), UINT32_C(0x9170B378),
+ UINT32_C(0xB67F30C3), UINT32_C(0x3F5A799B), UINT32_C(0x8264B672),
+ UINT32_C(0x15D1DC77), UINT32_C(0xE9577764), UINT32_C(0xA1D47B23) } },
+ { { UINT32_C(0x0422CE2F), UINT32_C(0x08265E51), UINT32_C(0xDD2F9E21),
+ UINT32_C(0x88E0D496), UINT32_C(0x6177F75D), UINT32_C(0x30128AA0),
+ UINT32_C(0xBD9EBE69), UINT32_C(0x2E59AB62), UINT32_C(0x5DF0E537),
+ UINT32_C(0x1B1A0F6C), UINT32_C(0xDAC012B5), UINT32_C(0xAB16C626) },
+ { UINT32_C(0x008C5DE7), UINT32_C(0x8014214B), UINT32_C(0x38F17BEA),
+ UINT32_C(0xAA740A9E), UINT32_C(0x8A149098), UINT32_C(0x262EBB49),
+ UINT32_C(0x8527CD59), UINT32_C(0xB454111E), UINT32_C(0xACEA5817),
+ UINT32_C(0x266AD15A), UINT32_C(0x1353CCBA), UINT32_C(0x21824F41) } },
+ { { UINT32_C(0x12E3683B), UINT32_C(0xD1B4E74D), UINT32_C(0x569B8EF6),
+ UINT32_C(0x990ED20B), UINT32_C(0x429C0A18), UINT32_C(0xB9D3DD25),
+ UINT32_C(0x2A351783), UINT32_C(0x1C75B8AB), UINT32_C(0x905432F0),
+ UINT32_C(0x61E4CA2B), UINT32_C(0xEEA8F224), UINT32_C(0x80826A69) },
+ { UINT32_C(0xEC52ABAD), UINT32_C(0x7FC33A6B), UINT32_C(0xA65E4813),
+ UINT32_C(0x0BCCA3F0), UINT32_C(0xA527CEBE), UINT32_C(0x7AD8A132),
+ UINT32_C(0xEAF22C7E), UINT32_C(0xF0138950), UINT32_C(0x566718C1),
+ UINT32_C(0x282D2437), UINT32_C(0xE2212559), UINT32_C(0x9DFCCB0D) } },
+ { { UINT32_C(0x58CE3B83), UINT32_C(0x1E937227), UINT32_C(0x3CB3FB36),
+ UINT32_C(0xBB280DFA), UINT32_C(0xE2BE174A), UINT32_C(0x57D0F3D2),
+ UINT32_C(0x208ABE1E), UINT32_C(0x9BD51B99), UINT32_C(0xDE248024),
+ UINT32_C(0x3809AB50), UINT32_C(0xA5BB7331), UINT32_C(0xC29C6E2C) },
+ { UINT32_C(0x61124F05), UINT32_C(0x9944FD2E), UINT32_C(0x9009E391),
+ UINT32_C(0x83CCBC4E), UINT32_C(0x9424A3CC), UINT32_C(0x01628F05),
+ UINT32_C(0xEA8E4344), UINT32_C(0xD6A2F51D), UINT32_C(0x4CEBC96E),
+ UINT32_C(0xDA3E1A3D), UINT32_C(0xE97809DC), UINT32_C(0x1FE6FB42) } },
+ { { UINT32_C(0x467D66E4), UINT32_C(0xA04482D2), UINT32_C(0x4D78291D),
+ UINT32_C(0xCF191293), UINT32_C(0x482396F9), UINT32_C(0x8E0D4168),
+ UINT32_C(0xD18F14D0), UINT32_C(0x7228E2D5), UINT32_C(0x9C6A58FE),
+ UINT32_C(0x2F7E8D50), UINT32_C(0x373E5AEC), UINT32_C(0xE8CA780E) },
+ { UINT32_C(0x1B68E9F8), UINT32_C(0x42AAD1D6), UINT32_C(0x69E2F8F4),
+ UINT32_C(0x58A6D7F5), UINT32_C(0x31DA1BEA), UINT32_C(0xD779ADFE),
+ UINT32_C(0x38C85A85), UINT32_C(0x7D265406), UINT32_C(0xD44D3CDF),
+ UINT32_C(0x67E67195), UINT32_C(0xC5134ED7), UINT32_C(0x17820A0B) } },
+ { { UINT32_C(0xD3021470), UINT32_C(0x019D6AC5), UINT32_C(0x780443D6),
+ UINT32_C(0x25846B66), UINT32_C(0x55C97647), UINT32_C(0xCE3C15ED),
+ UINT32_C(0x0E3FEB0F), UINT32_C(0x3DC22D49), UINT32_C(0xA7DF26E4),
+ UINT32_C(0x2065B7CB), UINT32_C(0x187CEA1F), UINT32_C(0xC8B00AE8) },
+ { UINT32_C(0x865DDED3), UINT32_C(0x1A5284A0), UINT32_C(0x20C83DE2),
+ UINT32_C(0x293C1649), UINT32_C(0xCCE851B3), UINT32_C(0xAB178D26),
+ UINT32_C(0x404505FB), UINT32_C(0x8E6DB10B), UINT32_C(0x90C82033),
+ UINT32_C(0xF6F57E71), UINT32_C(0x5977F16C), UINT32_C(0x1D2A1C01) } },
+ { { UINT32_C(0x7C8906A4), UINT32_C(0xA39C8931), UINT32_C(0x9E821EE6),
+ UINT32_C(0xB6E7ECDD), UINT32_C(0xF0DF4FE6), UINT32_C(0x2ECF8340),
+ UINT32_C(0x53C14965), UINT32_C(0xD42F7DC9), UINT32_C(0xE3BA8285),
+ UINT32_C(0x1AFB51A3), UINT32_C(0x0A3305D1), UINT32_C(0x6C07C404) },
+ { UINT32_C(0x127FC1DA), UINT32_C(0xDAB83288), UINT32_C(0x374C4B08),
+ UINT32_C(0xBC0A699B), UINT32_C(0x42EB20DD), UINT32_C(0x402A9BAB),
+ UINT32_C(0x045A7A1C), UINT32_C(0xD7DD464F), UINT32_C(0x36BEECC4),
+ UINT32_C(0x5B3D0D6D), UINT32_C(0x6398A19D), UINT32_C(0x475A3E75) } },
+ },
+ {
+ { { UINT32_C(0x72876AE8), UINT32_C(0x31BDB483), UINT32_C(0x961ED1BF),
+ UINT32_C(0xE3325D98), UINT32_C(0x9B6FC64D), UINT32_C(0x18C04246),
+ UINT32_C(0x15786B8C), UINT32_C(0x0DCC15FA), UINT32_C(0x8E63DA4A),
+ UINT32_C(0x81ACDB06), UINT32_C(0xDADA70FB), UINT32_C(0xD3A4B643) },
+ { UINT32_C(0xDEA424EB), UINT32_C(0x46361AFE), UINT32_C(0x89B92970),
+ UINT32_C(0xDC2D2CAE), UINT32_C(0x615694E6), UINT32_C(0xF389B61B),
+ UINT32_C(0x872951D2), UINT32_C(0x7036DEF1), UINT32_C(0xD93BADC7),
+ UINT32_C(0x40FD3BDA), UINT32_C(0x380A68D3), UINT32_C(0x45AB6321) } },
+ { { UINT32_C(0x81A2703A), UINT32_C(0x23C1F744), UINT32_C(0xB9859136),
+ UINT32_C(0x1A5D075C), UINT32_C(0x5AFD1BFD), UINT32_C(0xA4F82C9D),
+ UINT32_C(0xF89D76FE), UINT32_C(0xA3D1E9A4), UINT32_C(0x75702F80),
+ UINT32_C(0x964F7050), UINT32_C(0xF56C089D), UINT32_C(0x182BF349) },
+ { UINT32_C(0xBE0DA6E1), UINT32_C(0xE205FA8F), UINT32_C(0x0A40F8F3),
+ UINT32_C(0x32905EB9), UINT32_C(0x356D4395), UINT32_C(0x331A1004),
+ UINT32_C(0xFDBBDFDE), UINT32_C(0x58B78901), UINT32_C(0x9BA00E71),
+ UINT32_C(0xA52A1597), UINT32_C(0x55497A30), UINT32_C(0xE0092E1F) } },
+ { { UINT32_C(0x70EE8F39), UINT32_C(0x5562A856), UINT32_C(0x64E52A9C),
+ UINT32_C(0x86B0C117), UINT32_C(0x09C75B8C), UINT32_C(0xC19F3174),
+ UINT32_C(0x24923F80), UINT32_C(0x21C7CC31), UINT32_C(0x8F5B291E),
+ UINT32_C(0xE63FE47F), UINT32_C(0x0DC08B05), UINT32_C(0x3D6D3C05) },
+ { UINT32_C(0xEE0C39A1), UINT32_C(0x58AE455E), UINT32_C(0x0AD97942),
+ UINT32_C(0x78BEA431), UINT32_C(0x3EE3989C), UINT32_C(0x42C7C97F),
+ UINT32_C(0xF38759AE), UINT32_C(0xC1B03AF5), UINT32_C(0xBCF46899),
+ UINT32_C(0x1A673C75), UINT32_C(0x8D508C7D), UINT32_C(0x4831B7D3) } },
+ { { UINT32_C(0xC552E354), UINT32_C(0x76512D1B), UINT32_C(0x273020FD),
+ UINT32_C(0x2B7EB6DF), UINT32_C(0x025A5F25), UINT32_C(0xD1C73AA8),
+ UINT32_C(0x5CBD2A40), UINT32_C(0x2ABA1929), UINT32_C(0xC88D61C6),
+ UINT32_C(0xB53CADC3), UINT32_C(0x098290F3), UINT32_C(0x7E66A95E) },
+ { UINT32_C(0xAF4C5073), UINT32_C(0x72800ECB), UINT32_C(0x9DC63FAF),
+ UINT32_C(0x81F2725E), UINT32_C(0x282BA9D1), UINT32_C(0x14BF92A7),
+ UINT32_C(0xBD5F1BB2), UINT32_C(0x90629672), UINT32_C(0xA97C6C96),
+ UINT32_C(0x362F68EB), UINT32_C(0x7EA9D601), UINT32_C(0xB1D3BB8B) } },
+ { { UINT32_C(0xA9C94429), UINT32_C(0x73878F7F), UINT32_C(0x456CA6D8),
+ UINT32_C(0xB35C3BC8), UINT32_C(0xF721923A), UINT32_C(0xD96F0B3C),
+ UINT32_C(0xE6D44FA1), UINT32_C(0x28D8F06C), UINT32_C(0xD5CD671A),
+ UINT32_C(0x94EFDCDC), UINT32_C(0x3F97D481), UINT32_C(0x0299AB93) },
+ { UINT32_C(0x2FD1D324), UINT32_C(0xB7CED6EA), UINT32_C(0x7E932EC2),
+ UINT32_C(0xBD683208), UINT32_C(0xCB755A6E), UINT32_C(0x24ED31FB),
+ UINT32_C(0xE48781D2), UINT32_C(0xA636098E), UINT32_C(0xF0A4F297),
+ UINT32_C(0x8687C63C), UINT32_C(0x07478526), UINT32_C(0xBB523440) } },
+ { { UINT32_C(0x34124B56), UINT32_C(0x2E5F7419), UINT32_C(0x4B3F02CA),
+ UINT32_C(0x1F223AE1), UINT32_C(0xE8336C7E), UINT32_C(0x6345B427),
+ UINT32_C(0xF5D0E3D0), UINT32_C(0x92123E16), UINT32_C(0x45E79F3A),
+ UINT32_C(0xDAF0D14D), UINT32_C(0x6F3BD0C6), UINT32_C(0x6ACA6765) },
+ { UINT32_C(0x403813F4), UINT32_C(0xF6169FAB), UINT32_C(0x334A4C59),
+ UINT32_C(0x31DC39C0), UINT32_C(0xD589866D), UINT32_C(0x74C46753),
+ UINT32_C(0x984C6A5D), UINT32_C(0x5741511D), UINT32_C(0x97FED2D3),
+ UINT32_C(0xF2631287), UINT32_C(0x11614886), UINT32_C(0x5687CA1B) } },
+ { { UINT32_C(0x33836D4B), UINT32_C(0x076D902A), UINT32_C(0x24AFB557),
+ UINT32_C(0xEC6C5C43), UINT32_C(0xA0516A0F), UINT32_C(0xA0FE2D1C),
+ UINT32_C(0x00D22ECC), UINT32_C(0x6FB8D737), UINT32_C(0xDAF1D7B3),
+ UINT32_C(0xF1DE9077), UINT32_C(0xD4C0C1EB), UINT32_C(0xE4695F77) },
+ { UINT32_C(0xB4375573), UINT32_C(0x5F0FD8A8), UINT32_C(0x5E50944F),
+ UINT32_C(0x76238359), UINT32_C(0x635CD76F), UINT32_C(0x65EA2F28),
+ UINT32_C(0x25FDE7B0), UINT32_C(0x08547769), UINT32_C(0x51944304),
+ UINT32_C(0xB2345A2E), UINT32_C(0xA16C980D), UINT32_C(0x86EFA2F7) } },
+ { { UINT32_C(0xBF4D1D63), UINT32_C(0x4CCBE2D0), UINT32_C(0x397366D5),
+ UINT32_C(0x32E33401), UINT32_C(0x71BDA2CE), UINT32_C(0xC83AFDDE),
+ UINT32_C(0x478ED9E6), UINT32_C(0x8DACE2AC), UINT32_C(0x763FDD9E),
+ UINT32_C(0x3AC6A559), UINT32_C(0xB398558F), UINT32_C(0x0FFDB04C) },
+ { UINT32_C(0xAFB9D6B8), UINT32_C(0x6C1B99B2), UINT32_C(0x27F815DD),
+ UINT32_C(0x572BA39C), UINT32_C(0x0DBCF842), UINT32_C(0x9DE73EE7),
+ UINT32_C(0x29267B88), UINT32_C(0x2A3ED589), UINT32_C(0x15EBBBB3),
+ UINT32_C(0xD46A7FD3), UINT32_C(0xE29400C7), UINT32_C(0xD1D01863) } },
+ { { UINT32_C(0xE1F89EC5), UINT32_C(0x8FB101D1), UINT32_C(0xF8508042),
+ UINT32_C(0xB87A1F53), UINT32_C(0x0ED7BEEF), UINT32_C(0x28C8DB24),
+ UINT32_C(0xACE8660A), UINT32_C(0x3940F845), UINT32_C(0xC6D453FD),
+ UINT32_C(0x4EACB619), UINT32_C(0x2BAD6160), UINT32_C(0x2E044C98) },
+ { UINT32_C(0x80B16C02), UINT32_C(0x87928548), UINT32_C(0xC0A9EB64),
+ UINT32_C(0xF0D4BEB3), UINT32_C(0xC183C195), UINT32_C(0xD785B4AF),
+ UINT32_C(0x5E6C46EA), UINT32_C(0x23AAB0E6), UINT32_C(0xA930FECA),
+ UINT32_C(0x30F7E104), UINT32_C(0xD55C10FB), UINT32_C(0x6A1A7B8B) } },
+ { { UINT32_C(0xDBFED1AA), UINT32_C(0xDA74EAEB), UINT32_C(0xDF0B025C),
+ UINT32_C(0xC8A59223), UINT32_C(0xD5B627F7), UINT32_C(0x7EF7DC85),
+ UINT32_C(0x197D7624), UINT32_C(0x02A13AE1), UINT32_C(0x2F785A9B),
+ UINT32_C(0x119E9BE1), UINT32_C(0x00D6B219), UINT32_C(0xC0B7572F) },
+ { UINT32_C(0x6D4CAF30), UINT32_C(0x9B1E5126), UINT32_C(0x0A840BD1),
+ UINT32_C(0xA16A5117), UINT32_C(0x0E9CCF43), UINT32_C(0x5BE17B91),
+ UINT32_C(0x69CF2C9C), UINT32_C(0x5BDBEDDD), UINT32_C(0x4CF4F289),
+ UINT32_C(0x9FFBFBCF), UINT32_C(0x6C355CE9), UINT32_C(0xE1A62183) } },
+ { { UINT32_C(0xA7B2FCCF), UINT32_C(0x056199D9), UINT32_C(0xCE1D784E),
+ UINT32_C(0x51F2E7B6), UINT32_C(0x339E2FF0), UINT32_C(0xA1D09C47),
+ UINT32_C(0xB836D0A9), UINT32_C(0xC8E64890), UINT32_C(0xC0D07EBE),
+ UINT32_C(0x2F781DCB), UINT32_C(0x3ACF934C), UINT32_C(0x5CF3C2AD) },
+ { UINT32_C(0xA17E26AE), UINT32_C(0xE55DB190), UINT32_C(0x91245513),
+ UINT32_C(0xC9C61E1F), UINT32_C(0x61998C15), UINT32_C(0x83D7E6CF),
+ UINT32_C(0xE41D38E3), UINT32_C(0x4DB33C85), UINT32_C(0xC2FEE43D),
+ UINT32_C(0x74D5F91D), UINT32_C(0x36BBC826), UINT32_C(0x7EBBDB45) } },
+ { { UINT32_C(0xCB655A9D), UINT32_C(0xE20EC7E9), UINT32_C(0x5C47D421),
+ UINT32_C(0x4977EB92), UINT32_C(0x3B9D72FA), UINT32_C(0xA237E12C),
+ UINT32_C(0xCBF7B145), UINT32_C(0xCAAEDBC1), UINT32_C(0x3B77AAA3),
+ UINT32_C(0x5200F5B2), UINT32_C(0xBDBE5380), UINT32_C(0x32EDED55) },
+ { UINT32_C(0xE7C9B80A), UINT32_C(0x74E38A40), UINT32_C(0xAB6DE911),
+ UINT32_C(0x3A3F0CF8), UINT32_C(0xAD16AAF0), UINT32_C(0x56DCDD7A),
+ UINT32_C(0x8E861D5E), UINT32_C(0x3D292449), UINT32_C(0x985733E2),
+ UINT32_C(0xD6C61878), UINT32_C(0x6AA6CD5B), UINT32_C(0x2401FE7D) } },
+ { { UINT32_C(0xB42E3686), UINT32_C(0xABB3DC75), UINT32_C(0xB4C57E61),
+ UINT32_C(0xAE712419), UINT32_C(0xB21B009B), UINT32_C(0x2C565F72),
+ UINT32_C(0x710C3699), UINT32_C(0xA5F1DA2E), UINT32_C(0xA5EBA59A),
+ UINT32_C(0x771099A0), UINT32_C(0xC10017A0), UINT32_C(0x4DA88F4A) },
+ { UINT32_C(0x1927B56D), UINT32_C(0x987FFFD3), UINT32_C(0xC4E33478),
+ UINT32_C(0xB98CB8EC), UINT32_C(0xC2248166), UINT32_C(0xB224A971),
+ UINT32_C(0xDE1DC794), UINT32_C(0x5470F554), UINT32_C(0xE31FF983),
+ UINT32_C(0xD747CC24), UINT32_C(0xB5B22DAE), UINT32_C(0xB91745E9) } },
+ { { UINT32_C(0x72F34420), UINT32_C(0x6CCBFED0), UINT32_C(0xA53039D2),
+ UINT32_C(0x95045E4D), UINT32_C(0x5A793944), UINT32_C(0x3B6C1154),
+ UINT32_C(0xDDB6B799), UINT32_C(0xAA114145), UINT32_C(0x252B7637),
+ UINT32_C(0xABC15CA4), UINT32_C(0xA5744634), UINT32_C(0x5745A35B) },
+ { UINT32_C(0xDA596FC0), UINT32_C(0x05DC6BDE), UINT32_C(0xA8020881),
+ UINT32_C(0xCD52C18C), UINT32_C(0xD296BAD0), UINT32_C(0x03FA9F47),
+ UINT32_C(0x7268E139), UINT32_C(0xD8E2C129), UINT32_C(0x9EC450B0),
+ UINT32_C(0x58C1A98D), UINT32_C(0xDE48B20D), UINT32_C(0x909638DA) } },
+ { { UINT32_C(0x9B7F8311), UINT32_C(0x7AFC30D4), UINT32_C(0x42368EA3),
+ UINT32_C(0x82A00422), UINT32_C(0x6F5F9865), UINT32_C(0xBFF95198),
+ UINT32_C(0xFC0A070F), UINT32_C(0x9B24F612), UINT32_C(0x620F489D),
+ UINT32_C(0x22C06CF2), UINT32_C(0x780F7DBB), UINT32_C(0x3C7ED052) },
+ { UINT32_C(0x34DAFE9B), UINT32_C(0xDB87AB18), UINT32_C(0x9C4BBCA1),
+ UINT32_C(0x20C03B40), UINT32_C(0x59A42341), UINT32_C(0x5D718CF0),
+ UINT32_C(0x69E84538), UINT32_C(0x98631706), UINT32_C(0xD27D64E1),
+ UINT32_C(0x5557192B), UINT32_C(0xDA822766), UINT32_C(0x08B4EC52) } },
+ { { UINT32_C(0xD66C1A59), UINT32_C(0xB2D986F6), UINT32_C(0x78E0E423),
+ UINT32_C(0x927DEB16), UINT32_C(0x49C3DEDC), UINT32_C(0x9E673CDE),
+ UINT32_C(0xF7ECB6CF), UINT32_C(0xFA362D84), UINT32_C(0x1BA17340),
+ UINT32_C(0x078E5F40), UINT32_C(0x1F4E489C), UINT32_C(0x934CA5D1) },
+ { UINT32_C(0x64EEF493), UINT32_C(0xC03C0731), UINT32_C(0xD7931A7E),
+ UINT32_C(0x631A353B), UINT32_C(0x65DD74F1), UINT32_C(0x8E7CC3BB),
+ UINT32_C(0x702676A5), UINT32_C(0xD55864C5), UINT32_C(0x439F04BD),
+ UINT32_C(0x6D306AC4), UINT32_C(0x2BAFED57), UINT32_C(0x58544F67) } },
+ },
+ {
+ { { UINT32_C(0xEC074AEA), UINT32_C(0xB083BA6A), UINT32_C(0x7F0B505B),
+ UINT32_C(0x46FAC5EF), UINT32_C(0xFC82DC03), UINT32_C(0x95367A21),
+ UINT32_C(0x9D3679D8), UINT32_C(0x227BE26A), UINT32_C(0x7E9724C0),
+ UINT32_C(0xC70F6D6C), UINT32_C(0xF9EBEC0F), UINT32_C(0xCD68C757) },
+ { UINT32_C(0x8FF321B2), UINT32_C(0x29DDE03E), UINT32_C(0x031939DC),
+ UINT32_C(0xF84AD7BB), UINT32_C(0x0F602F4B), UINT32_C(0xDAF590C9),
+ UINT32_C(0x49722BC4), UINT32_C(0x17C52888), UINT32_C(0x089B22B6),
+ UINT32_C(0xA8DF99F0), UINT32_C(0xE59B9B90), UINT32_C(0xC21BC5D4) } },
+ { { UINT32_C(0x8A31973F), UINT32_C(0x4936C6A0), UINT32_C(0x83B8C205),
+ UINT32_C(0x54D442FA), UINT32_C(0x5714F2C6), UINT32_C(0x03AEE8B4),
+ UINT32_C(0x3F5AC25A), UINT32_C(0x139BD692), UINT32_C(0xB5B33794),
+ UINT32_C(0x6A2E42BA), UINT32_C(0x3FF7BBA9), UINT32_C(0x50FA1164) },
+ { UINT32_C(0xF7E2C099), UINT32_C(0xB61D8643), UINT32_C(0xBD5C6637),
+ UINT32_C(0x2366C993), UINT32_C(0x72EB77FA), UINT32_C(0x62110E14),
+ UINT32_C(0x3B99C635), UINT32_C(0x3D5B96F1), UINT32_C(0xF674C9F2),
+ UINT32_C(0x956ECF64), UINT32_C(0xEF2BA250), UINT32_C(0xC56F7E51) } },
+ { { UINT32_C(0xFF602C1B), UINT32_C(0x246FFCB6), UINT32_C(0x6E1258E0),
+ UINT32_C(0x1E1A1D74), UINT32_C(0x250E6676), UINT32_C(0xB4B43AE2),
+ UINT32_C(0x924CE5FA), UINT32_C(0x95C1B5F0), UINT32_C(0xEBD8C776),
+ UINT32_C(0x2555795B), UINT32_C(0xACD9D9D0), UINT32_C(0x4C1E03DC) },
+ { UINT32_C(0x9CE90C61), UINT32_C(0xE1D74AA6), UINT32_C(0xA9C4B9F9),
+ UINT32_C(0xA88C0769), UINT32_C(0x95AF56DE), UINT32_C(0xDF74DF27),
+ UINT32_C(0xB331B6F4), UINT32_C(0x24B10C5F), UINT32_C(0x6559E137),
+ UINT32_C(0xB0A6DF9A), UINT32_C(0xC06637F2), UINT32_C(0x6ACC1B8F) } },
+ { { UINT32_C(0x34B4E381), UINT32_C(0xBD8C0868), UINT32_C(0x30DFF271),
+ UINT32_C(0x278CACC7), UINT32_C(0x02459389), UINT32_C(0x87ED12DE),
+ UINT32_C(0xDEF840B6), UINT32_C(0x3F7D98FF), UINT32_C(0x5F0B56E1),
+ UINT32_C(0x71EEE0CB), UINT32_C(0xD8D9BE87), UINT32_C(0x462B5C9B) },
+ { UINT32_C(0x98094C0F), UINT32_C(0xE6B50B5A), UINT32_C(0x508C67CE),
+ UINT32_C(0x26F3B274), UINT32_C(0x7CB1F992), UINT32_C(0x418B1BD1),
+ UINT32_C(0x4FF11827), UINT32_C(0x607818ED), UINT32_C(0x9B042C63),
+ UINT32_C(0xE630D93A), UINT32_C(0x8C779AE3), UINT32_C(0x38B9EFF3) } },
+ { { UINT32_C(0x729C5431), UINT32_C(0xE8767D36), UINT32_C(0xBB94642C),
+ UINT32_C(0xA8BD07C0), UINT32_C(0x58F2E5B2), UINT32_C(0x0C11FC8E),
+ UINT32_C(0x547533FE), UINT32_C(0xD8912D48), UINT32_C(0x230D91FB),
+ UINT32_C(0xAAE14F5E), UINT32_C(0x676DFBA0), UINT32_C(0xC122051A) },
+ { UINT32_C(0x5EA93078), UINT32_C(0x9ED4501F), UINT32_C(0xBD4BEE0A),
+ UINT32_C(0x2758515C), UINT32_C(0x94D21F52), UINT32_C(0x97733C6C),
+ UINT32_C(0x4AD306A2), UINT32_C(0x139BCD6D), UINT32_C(0x298123CC),
+ UINT32_C(0x0AAECBDC), UINT32_C(0x1CB7C7C9), UINT32_C(0x102B8A31) } },
+ { { UINT32_C(0xFAF46675), UINT32_C(0x22A28E59), UINT32_C(0x10A31E7D),
+ UINT32_C(0x10757308), UINT32_C(0x2B4C2F4F), UINT32_C(0xC7EEAC84),
+ UINT32_C(0xB5EF5184), UINT32_C(0xBA370148), UINT32_C(0x8732E055),
+ UINT32_C(0x4A5A2866), UINT32_C(0xB887C36F), UINT32_C(0x14B8DCDC) },
+ { UINT32_C(0x433F093D), UINT32_C(0xDBA8C85C), UINT32_C(0x1C9A201C),
+ UINT32_C(0x73DF549D), UINT32_C(0x70F927D8), UINT32_C(0x69AA0D7B),
+ UINT32_C(0xD7D2493A), UINT32_C(0xFA3A8685), UINT32_C(0x0A7F4013),
+ UINT32_C(0x6F48A255), UINT32_C(0xDD393067), UINT32_C(0xD20C8BF9) } },
+ { { UINT32_C(0x81625E78), UINT32_C(0x4EC874EA), UINT32_C(0x3FBE9267),
+ UINT32_C(0x8B8D8B5A), UINT32_C(0x9421EC2F), UINT32_C(0xA3D9D164),
+ UINT32_C(0x880EA295), UINT32_C(0x490E92D9), UINT32_C(0xD8F3B6DA),
+ UINT32_C(0x745D1EDC), UINT32_C(0x8F18BA03), UINT32_C(0x0116628B) },
+ { UINT32_C(0x834EADCE), UINT32_C(0x0FF6BCE0), UINT32_C(0x000827F7),
+ UINT32_C(0x464697F2), UINT32_C(0x498D724E), UINT32_C(0x08DCCF84),
+ UINT32_C(0x1E88304C), UINT32_C(0x7896D365), UINT32_C(0x135E3622),
+ UINT32_C(0xE63EBCCE), UINT32_C(0xDC007521), UINT32_C(0xFB942E8E) } },
+ { { UINT32_C(0xA3688621), UINT32_C(0xBB155A66), UINT32_C(0xF91B52A3),
+ UINT32_C(0xED2FD7CD), UINT32_C(0xEA20CB88), UINT32_C(0x52798F5D),
+ UINT32_C(0x373F7DD8), UINT32_C(0x069CE105), UINT32_C(0x8CA78F6B),
+ UINT32_C(0xF9392EC7), UINT32_C(0x6B335169), UINT32_C(0xB3013E25) },
+ { UINT32_C(0x6B11715C), UINT32_C(0x1D92F800), UINT32_C(0xFF9DC464),
+ UINT32_C(0xADD4050E), UINT32_C(0x8465B84A), UINT32_C(0x2AC22659),
+ UINT32_C(0x465B2BD6), UINT32_C(0x2729D646), UINT32_C(0xE4EFF9DD),
+ UINT32_C(0x6202344A), UINT32_C(0xCD9B90B9), UINT32_C(0x51F3198F) } },
+ { { UINT32_C(0xE5F0AE1D), UINT32_C(0x17CE54EF), UINT32_C(0xB09852AF),
+ UINT32_C(0x984E8204), UINT32_C(0xC4B27A71), UINT32_C(0x3365B37A),
+ UINT32_C(0xA00E0A9C), UINT32_C(0x720E3152), UINT32_C(0x925BD606),
+ UINT32_C(0x3692F70D), UINT32_C(0x7BC7E9AB), UINT32_C(0xBE6E699D) },
+ { UINT32_C(0x4C89A3C0), UINT32_C(0xD75C041F), UINT32_C(0x8DC100C0),
+ UINT32_C(0x8B9F592D), UINT32_C(0xAD228F71), UINT32_C(0x30750F3A),
+ UINT32_C(0xE8B17A11), UINT32_C(0x1B9ECF84), UINT32_C(0x0FBFA8A2),
+ UINT32_C(0xDF202562), UINT32_C(0xAA1B6D67), UINT32_C(0x45C811FC) } },
+ { { UINT32_C(0x1A5151F8), UINT32_C(0xEC5B84B7), UINT32_C(0x550AB2D2),
+ UINT32_C(0x118E59E8), UINT32_C(0x049BD735), UINT32_C(0x2CCDEDA4),
+ UINT32_C(0x9CD62F0F), UINT32_C(0xC99CBA71), UINT32_C(0x62C9E4F8),
+ UINT32_C(0x69B8040A), UINT32_C(0x110B8283), UINT32_C(0x16F1A31A) },
+ { UINT32_C(0x98E908A3), UINT32_C(0x53F63802), UINT32_C(0xD862F9DE),
+ UINT32_C(0x308CB6EF), UINT32_C(0xA521A95A), UINT32_C(0xE185DAD8),
+ UINT32_C(0x097F75CA), UINT32_C(0x4D8FE9A4), UINT32_C(0x1CA07D53),
+ UINT32_C(0xD1ECCEC7), UINT32_C(0x0DB07E83), UINT32_C(0x13DFA1DC) } },
+ { { UINT32_C(0x0F591A76), UINT32_C(0xDDAF9DC6), UINT32_C(0x1685F412),
+ UINT32_C(0xE1A6D7CC), UINT32_C(0x002B6E8D), UINT32_C(0x153DE557),
+ UINT32_C(0xC6DA37D9), UINT32_C(0x730C38BC), UINT32_C(0x0914B597),
+ UINT32_C(0xAE180622), UINT32_C(0xDD8C3A0A), UINT32_C(0x84F98103) },
+ { UINT32_C(0x8DA205B0), UINT32_C(0x369C5398), UINT32_C(0x3888A720),
+ UINT32_C(0xA3D95B81), UINT32_C(0xE10E2806), UINT32_C(0x1F3F8BBF),
+ UINT32_C(0x4530D1F3), UINT32_C(0x48663DF5), UINT32_C(0x3E377713),
+ UINT32_C(0x320523B4), UINT32_C(0xC7894814), UINT32_C(0xE8B1A575) } },
+ { { UINT32_C(0x2EE8EA07), UINT32_C(0x33066871), UINT32_C(0x60DA199D),
+ UINT32_C(0xC6FB4EC5), UINT32_C(0xF4370A05), UINT32_C(0x33231860),
+ UINT32_C(0xC6DE4E26), UINT32_C(0x7ABECE72), UINT32_C(0xEBDECE7A),
+ UINT32_C(0xDE8D4BD8), UINT32_C(0x1CBE93C7), UINT32_C(0xC90EE657) },
+ { UINT32_C(0x85AC2509), UINT32_C(0x0246751B), UINT32_C(0x30380245),
+ UINT32_C(0xD0EF142C), UINT32_C(0x7C76E39C), UINT32_C(0x086DF9C4),
+ UINT32_C(0xB789FB56), UINT32_C(0x68F1304F), UINT32_C(0xA5E4BD56),
+ UINT32_C(0x23E4CB98), UINT32_C(0x64663DCA), UINT32_C(0x69A4C63C) } },
+ { { UINT32_C(0x7CB34E63), UINT32_C(0x6C72B6AF), UINT32_C(0x6DFC23FE),
+ UINT32_C(0x073C40CD), UINT32_C(0xC936693A), UINT32_C(0xBDEEE7A1),
+ UINT32_C(0x6EFAD378), UINT32_C(0xBC858E80), UINT32_C(0xF5BE55D4),
+ UINT32_C(0xEAD719FF), UINT32_C(0x04552F5F), UINT32_C(0xC8C3238F) },
+ { UINT32_C(0x928D5784), UINT32_C(0x0952C068), UINT32_C(0x94C58F2B),
+ UINT32_C(0x89DFDF22), UINT32_C(0x67502C50), UINT32_C(0x332DEDF3),
+ UINT32_C(0xAC0BE258), UINT32_C(0x3ED2FA3A), UINT32_C(0x7C5C8244),
+ UINT32_C(0xAEDC9B8A), UINT32_C(0xDC0EA34F), UINT32_C(0x43A761B9) } },
+ { { UINT32_C(0xCC5E21A5), UINT32_C(0x8FD683A2), UINT32_C(0xFBA2BB68),
+ UINT32_C(0x5F444C6E), UINT32_C(0xAF05586D), UINT32_C(0x709ACD0E),
+ UINT32_C(0xDE8FB348), UINT32_C(0x8EFA54D2), UINT32_C(0x34CFE29E),
+ UINT32_C(0x35276B71), UINT32_C(0x941EAC8C), UINT32_C(0x77A06FCD) },
+ { UINT32_C(0x928322DD), UINT32_C(0x5815792D), UINT32_C(0x67F7CB59),
+ UINT32_C(0x82FF356B), UINT32_C(0x304980F4), UINT32_C(0x71E40A78),
+ UINT32_C(0x3667D021), UINT32_C(0xC8645C27), UINT32_C(0xAEBAE28F),
+ UINT32_C(0xE785741C), UINT32_C(0x53ECAC37), UINT32_C(0xB2C1BC75) } },
+ { { UINT32_C(0x1D0A74DB), UINT32_C(0x633EB24F), UINT32_C(0xFA752512),
+ UINT32_C(0xF1F55E56), UINT32_C(0x8EFE11DE), UINT32_C(0x75FECA68),
+ UINT32_C(0xE6BF19EC), UINT32_C(0xC80FD91C), UINT32_C(0x2A14C908),
+ UINT32_C(0xAD0BAFEC), UINT32_C(0xADE4031F), UINT32_C(0x4E1C4ACA) },
+ { UINT32_C(0x1EB1549A), UINT32_C(0x463A815B), UINT32_C(0x668F1298),
+ UINT32_C(0x5AD4253C), UINT32_C(0x38A37151), UINT32_C(0x5CB38662),
+ UINT32_C(0xAFF16B96), UINT32_C(0x34BB1CCF), UINT32_C(0xEE731AB0),
+ UINT32_C(0xDCA93B13), UINT32_C(0x9BE01A0B), UINT32_C(0x9F3CE5CC) } },
+ { { UINT32_C(0xA110D331), UINT32_C(0x75DB5723), UINT32_C(0x7123D89F),
+ UINT32_C(0x67C66F6A), UINT32_C(0x4009D570), UINT32_C(0x27ABBD4B),
+ UINT32_C(0xC73451BC), UINT32_C(0xACDA6F84), UINT32_C(0x05575ACF),
+ UINT32_C(0xE4B9A239), UINT32_C(0xAB2D3D6C), UINT32_C(0x3C2DB7EF) },
+ { UINT32_C(0x29115145), UINT32_C(0x01CCDD08), UINT32_C(0x57B5814A),
+ UINT32_C(0x9E0602FE), UINT32_C(0x87862838), UINT32_C(0x679B35C2),
+ UINT32_C(0x38AD598D), UINT32_C(0x0277DC4C), UINT32_C(0x6D896DD4),
+ UINT32_C(0xEF80A213), UINT32_C(0xE7B9047B), UINT32_C(0xC8812213) } },
+ },
+ {
+ { { UINT32_C(0xEDC9CE62), UINT32_C(0xAC6DBDF6), UINT32_C(0x0F9C006E),
+ UINT32_C(0xA58F5B44), UINT32_C(0xDC28E1B0), UINT32_C(0x16694DE3),
+ UINT32_C(0xA6647711), UINT32_C(0x2D039CF2), UINT32_C(0xC5B08B4B),
+ UINT32_C(0xA13BBE6F), UINT32_C(0x10EBD8CE), UINT32_C(0xE44DA930) },
+ { UINT32_C(0x19649A16), UINT32_C(0xCD472087), UINT32_C(0x683E5DF1),
+ UINT32_C(0xE18F4E44), UINT32_C(0x929BFA28), UINT32_C(0xB3F66303),
+ UINT32_C(0x818249BF), UINT32_C(0x7C378E43), UINT32_C(0x847F7CD9),
+ UINT32_C(0x76068C80), UINT32_C(0x987EBA16), UINT32_C(0xEE3DB6D1) } },
+ { { UINT32_C(0xC42A2F52), UINT32_C(0xCBBD8576), UINT32_C(0x9D2B06BB),
+ UINT32_C(0x9ACC6F70), UINT32_C(0x2E6B72A4), UINT32_C(0xE5CB5620),
+ UINT32_C(0x7C024443), UINT32_C(0x5738EA0E), UINT32_C(0xB55368F3),
+ UINT32_C(0x8ED06170), UINT32_C(0x1AEED44F), UINT32_C(0xE54C99BB) },
+ { UINT32_C(0xE2E0D8B2), UINT32_C(0x3D90A6B2), UINT32_C(0xCF7B2856),
+ UINT32_C(0x21718977), UINT32_C(0xC5612AEC), UINT32_C(0x089093DC),
+ UINT32_C(0x99C1BACC), UINT32_C(0xC272EF6F), UINT32_C(0xDC43EAAD),
+ UINT32_C(0x47DB3B43), UINT32_C(0x0832D891), UINT32_C(0x730F30E4) } },
+ { { UINT32_C(0x0C7FECDB), UINT32_C(0x9FFE5563), UINT32_C(0xF88101E5),
+ UINT32_C(0x55CC67B6), UINT32_C(0xCBEFA3C7), UINT32_C(0x3039F981),
+ UINT32_C(0x667BFD64), UINT32_C(0x2AB06883), UINT32_C(0x4340E3DF),
+ UINT32_C(0x9007A257), UINT32_C(0x5A3A49CA), UINT32_C(0x1AC3F3FA) },
+ { UINT32_C(0xC97E20FD), UINT32_C(0x9C7BE629), UINT32_C(0xA3DAE003),
+ UINT32_C(0xF61823D3), UINT32_C(0xE7380DBA), UINT32_C(0xFFE7FF39),
+ UINT32_C(0x9FACC3B8), UINT32_C(0x620BB9B5), UINT32_C(0x31AE422C),
+ UINT32_C(0x2DDCB8CD), UINT32_C(0xD12C3C43), UINT32_C(0x1DE3BCFA) } },
+ { { UINT32_C(0xD6E0F9A9), UINT32_C(0x8C074946), UINT32_C(0x51C3B05B),
+ UINT32_C(0x662FA995), UINT32_C(0x04BB2048), UINT32_C(0x6CDAE969),
+ UINT32_C(0xD6DC8B60), UINT32_C(0x6DEC9594), UINT32_C(0x54438BBC),
+ UINT32_C(0x8D265869), UINT32_C(0x1B0E95A5), UINT32_C(0x88E983E3) },
+ { UINT32_C(0x60CBF838), UINT32_C(0x8189F114), UINT32_C(0x771DC46B),
+ UINT32_C(0x77190697), UINT32_C(0x27F8EC1A), UINT32_C(0x775775A2),
+ UINT32_C(0x607E3739), UINT32_C(0x7A125240), UINT32_C(0x4F793E4E),
+ UINT32_C(0xAFAE84E7), UINT32_C(0x5BF5BAF4), UINT32_C(0x44FA17F3) } },
+ { { UINT32_C(0xD03AC439), UINT32_C(0xA21E69A5), UINT32_C(0x88AA8094),
+ UINT32_C(0x2069C5FC), UINT32_C(0x8C08F206), UINT32_C(0xB041EEA7),
+ UINT32_C(0x3D65B8ED), UINT32_C(0x55B9D461), UINT32_C(0xD392C7C4),
+ UINT32_C(0x951EA25C), UINT32_C(0x9D166232), UINT32_C(0x4B9A1CEC) },
+ { UINT32_C(0xFCF931A4), UINT32_C(0xC184FCD8), UINT32_C(0x063AD374),
+ UINT32_C(0xBA59AD44), UINT32_C(0x1AA9796F), UINT32_C(0x1868AD2A),
+ UINT32_C(0xDFF29832), UINT32_C(0x38A34018), UINT32_C(0x03DF8070),
+ UINT32_C(0x01FC8801), UINT32_C(0x48DD334A), UINT32_C(0x1282CCE0) } },
+ { { UINT32_C(0x26D8503C), UINT32_C(0x76AA9557), UINT32_C(0x6BC3E3D0),
+ UINT32_C(0xBE962B63), UINT32_C(0x97DE8841), UINT32_C(0xF5CA93E5),
+ UINT32_C(0xAF3F2C16), UINT32_C(0x1561B05E), UINT32_C(0xD34BFF98),
+ UINT32_C(0x34BE00AA), UINT32_C(0xD23D2925), UINT32_C(0xEA21E6E9) },
+ { UINT32_C(0x394C3AFB), UINT32_C(0x55713230), UINT32_C(0xD6C8BECA),
+ UINT32_C(0xEAF0529B), UINT32_C(0x202B9A11), UINT32_C(0xFF38A743),
+ UINT32_C(0x6D3A398B), UINT32_C(0xA13E39FC), UINT32_C(0x86E2615A),
+ UINT32_C(0x8CBD644B), UINT32_C(0x191057EC), UINT32_C(0x92063988) } },
+ { { UINT32_C(0x13F89146), UINT32_C(0x787835CE), UINT32_C(0x69446C3F),
+ UINT32_C(0x7FCD42CC), UINT32_C(0x840E679D), UINT32_C(0x0DA2AA98),
+ UINT32_C(0x18779A1B), UINT32_C(0x44F20523), UINT32_C(0xEFBF5935),
+ UINT32_C(0xE3A3B34F), UINT32_C(0xB9947B70), UINT32_C(0xA5D2CFD0) },
+ { UINT32_C(0x27F4E16F), UINT32_C(0xAE2AF4EF), UINT32_C(0xB9D21322),
+ UINT32_C(0xA7FA70D2), UINT32_C(0xB3FD566B), UINT32_C(0x68084919),
+ UINT32_C(0xD7AAD6AB), UINT32_C(0xF04D71C8), UINT32_C(0x10BC4260),
+ UINT32_C(0xDBEA21E4), UINT32_C(0x8D949B42), UINT32_C(0xAA7DC665) } },
+ { { UINT32_C(0x6CCB8213), UINT32_C(0xD8E958A0), UINT32_C(0x91900B54),
+ UINT32_C(0x118D9DB9), UINT32_C(0x85E8CED6), UINT32_C(0x09BB9D49),
+ UINT32_C(0x24019281), UINT32_C(0x410E9FB5), UINT32_C(0x6D74C86E),
+ UINT32_C(0x3B31B4E1), UINT32_C(0x020BB77D), UINT32_C(0x52BC0252) },
+ { UINT32_C(0x27092CE4), UINT32_C(0x5616A26F), UINT32_C(0xA08F65CD),
+ UINT32_C(0x67774DBC), UINT32_C(0xC08BD569), UINT32_C(0x560AD494),
+ UINT32_C(0xAD498783), UINT32_C(0xBE26DA36), UINT32_C(0x7F019C91),
+ UINT32_C(0x0276C8AB), UINT32_C(0x5248266E), UINT32_C(0x09843ADA) } },
+ { { UINT32_C(0x7D963CF2), UINT32_C(0xA0AE88A7), UINT32_C(0xD0E84920),
+ UINT32_C(0x91EF8986), UINT32_C(0xF8C58104), UINT32_C(0xC7EFE344),
+ UINT32_C(0xECA20773), UINT32_C(0x0A25D9FD), UINT32_C(0x00D8F1D5),
+ UINT32_C(0x9D989FAA), UINT32_C(0xC8B06264), UINT32_C(0x4204C8CE) },
+ { UINT32_C(0xBE1A2796), UINT32_C(0x717C12E0), UINT32_C(0xC190C728),
+ UINT32_C(0x1FA4BA8C), UINT32_C(0x8C8A59BA), UINT32_C(0xA245CA8D),
+ UINT32_C(0x7672B935), UINT32_C(0xE3C37475), UINT32_C(0x2E4D6375),
+ UINT32_C(0x083D5E40), UINT32_C(0x5455E16E), UINT32_C(0x0B8D5AB3) } },
+ { { UINT32_C(0xEED765D4), UINT32_C(0x1DB17DBF), UINT32_C(0xA5DDB965),
+ UINT32_C(0xBBC9B1BE), UINT32_C(0xDFC12ABC), UINT32_C(0x1948F76D),
+ UINT32_C(0x134EF489), UINT32_C(0x2C2714E5), UINT32_C(0x741C600F),
+ UINT32_C(0x60CE2EE8), UINT32_C(0xF80E6E63), UINT32_C(0x32396F22) },
+ { UINT32_C(0x22537F59), UINT32_C(0x421DAC75), UINT32_C(0x49475DF5),
+ UINT32_C(0x58FB73C6), UINT32_C(0x6F18F1C7), UINT32_C(0x0ABF2885),
+ UINT32_C(0x9A398D16), UINT32_C(0x36474468), UINT32_C(0xBF673B87),
+ UINT32_C(0x87A661A7), UINT32_C(0x73819E17), UINT32_C(0x3E80698F) } },
+ { { UINT32_C(0x53784CC4), UINT32_C(0xDFE49793), UINT32_C(0x486D508F),
+ UINT32_C(0x4280EAB0), UINT32_C(0xE534F5A4), UINT32_C(0x119593FF),
+ UINT32_C(0x9F63242F), UINT32_C(0x98AEFADD), UINT32_C(0xC4829CAE),
+ UINT32_C(0x9AE6A24A), UINT32_C(0x58E8BA80), UINT32_C(0xF2373CA5) },
+ { UINT32_C(0x51765FB3), UINT32_C(0x4017AF7E), UINT32_C(0xAF4AEC4B),
+ UINT32_C(0xD1E40F7C), UINT32_C(0x0898E3BC), UINT32_C(0x87372C7A),
+ UINT32_C(0x85452CA9), UINT32_C(0x688982B2), UINT32_C(0xB1E50BCA),
+ UINT32_C(0x71E0B4BF), UINT32_C(0xF70E714A), UINT32_C(0x21FD2DBF) } },
+ { { UINT32_C(0xFB78DDAC), UINT32_C(0xEE6E8820), UINT32_C(0x063892CD),
+ UINT32_C(0x0BAED29C), UINT32_C(0x28C0588D), UINT32_C(0x5F33049C),
+ UINT32_C(0x18DBC432), UINT32_C(0x90C2515E), UINT32_C(0x3B4CB0BD),
+ UINT32_C(0xB8A1B143), UINT32_C(0x68103043), UINT32_C(0x0AB5C0C9) },
+ { UINT32_C(0x4005EC40), UINT32_C(0xF3788FA0), UINT32_C(0x039EE115),
+ UINT32_C(0x82571C99), UINT32_C(0x93260BED), UINT32_C(0xEE8FCED5),
+ UINT32_C(0x10836D18), UINT32_C(0x5A9BAF79), UINT32_C(0xC46AA4F6),
+ UINT32_C(0x7C258B09), UINT32_C(0x37F53D31), UINT32_C(0x46ECC5E8) } },
+ { { UINT32_C(0xBFE0DD98), UINT32_C(0xFA32C0DC), UINT32_C(0x962B1066),
+ UINT32_C(0x66EFAFC4), UINT32_C(0x64BDF5EB), UINT32_C(0xBA81D33E),
+ UINT32_C(0xFC7FC512), UINT32_C(0x36C28536), UINT32_C(0xE0B4FA97),
+ UINT32_C(0x0C95176B), UINT32_C(0x3B9BC64A), UINT32_C(0x47DDE29B) },
+ { UINT32_C(0x5C173B36), UINT32_C(0x08D986FD), UINT32_C(0x6CF3F28C),
+ UINT32_C(0x46D84B52), UINT32_C(0xF026BDB9), UINT32_C(0x6F6ED6C3),
+ UINT32_C(0x68206DC5), UINT32_C(0xAC90668B), UINT32_C(0xECBE4E70),
+ UINT32_C(0xE8ED5D98), UINT32_C(0xDC1A6974), UINT32_C(0xCFFF61DD) } },
+ { { UINT32_C(0x77B1A5C1), UINT32_C(0xFF5C3A29), UINT32_C(0x0DDF995D),
+ UINT32_C(0x10C27E4A), UINT32_C(0xE23363E3), UINT32_C(0xCB745F77),
+ UINT32_C(0x32F399A3), UINT32_C(0xD765DF6F), UINT32_C(0x8A99E109),
+ UINT32_C(0xF0CA0C2F), UINT32_C(0x1E025CA0), UINT32_C(0xC3A6BFB7) },
+ { UINT32_C(0x4F9D9FA5), UINT32_C(0x830B2C0A), UINT32_C(0xBD1A84E5),
+ UINT32_C(0xAE914CAC), UINT32_C(0xA4FEBCC1), UINT32_C(0x30B35ED8),
+ UINT32_C(0x84CFBF2E), UINT32_C(0xCB902B46), UINT32_C(0x25FC6375),
+ UINT32_C(0x0BD47628), UINT32_C(0x85509D04), UINT32_C(0xA858A53C) } },
+ { { UINT32_C(0x552E0A3F), UINT32_C(0x8B995D0C), UINT32_C(0x17BE9FF7),
+ UINT32_C(0xEDBD4E94), UINT32_C(0x95085178), UINT32_C(0x3432E839),
+ UINT32_C(0x80C256F5), UINT32_C(0x0FE5C181), UINT32_C(0xEBF9597C),
+ UINT32_C(0x05A64EA8), UINT32_C(0x3F80371F), UINT32_C(0x6ED44BB1) },
+ { UINT32_C(0xFE4C12EE), UINT32_C(0x6A29A05E), UINT32_C(0xE0BB83B3),
+ UINT32_C(0x3E436A43), UINT32_C(0x74D72921), UINT32_C(0x38365D9A),
+ UINT32_C(0xC38E1ED7), UINT32_C(0x3F5EE823), UINT32_C(0xE8FA063F),
+ UINT32_C(0x09A53213), UINT32_C(0xB435E713), UINT32_C(0x1E7FE47A) } },
+ { { UINT32_C(0xFDDD17F3), UINT32_C(0xE4D9BC94), UINT32_C(0xC1016C20),
+ UINT32_C(0xC74B8FED), UINT32_C(0xB49C060E), UINT32_C(0x095DE39B),
+ UINT32_C(0x8AC0DF00), UINT32_C(0xDBCC6795), UINT32_C(0x1C34F4DF),
+ UINT32_C(0x4CF6BAEB), UINT32_C(0xE8390170), UINT32_C(0x72C55C21) },
+ { UINT32_C(0xF6C48E79), UINT32_C(0x4F17BFD2), UINT32_C(0x017A80BA),
+ UINT32_C(0x18BF4DA0), UINT32_C(0xBCF4B138), UINT32_C(0xCF51D829),
+ UINT32_C(0xF48F8B0D), UINT32_C(0x598AEE5F), UINT32_C(0x20F10809),
+ UINT32_C(0x83FAEE56), UINT32_C(0x779F0850), UINT32_C(0x4615D4DC) } },
+ },
+ {
+ { { UINT32_C(0x5852B59B), UINT32_C(0x22313DEE), UINT32_C(0xB6A0B37F),
+ UINT32_C(0x6F56C8E8), UINT32_C(0xA76EC380), UINT32_C(0x43D6EEAE),
+ UINT32_C(0x0275AD36), UINT32_C(0xA1655136), UINT32_C(0xDF095BDA),
+ UINT32_C(0xE5C1B65A), UINT32_C(0x367C44B0), UINT32_C(0xBD1FFA8D) },
+ { UINT32_C(0x6B48AF2B), UINT32_C(0xE2B419C2), UINT32_C(0x3DA194C8),
+ UINT32_C(0x57BBBD97), UINT32_C(0xA2BAFF05), UINT32_C(0xB5FBE51F),
+ UINT32_C(0x6269B5D0), UINT32_C(0xA0594D70), UINT32_C(0x23E8D667),
+ UINT32_C(0x0B07B705), UINT32_C(0x63E016E7), UINT32_C(0xAE1976B5) } },
+ { { UINT32_C(0xFBECAAAE), UINT32_C(0x2FDE4893), UINT32_C(0x30332229),
+ UINT32_C(0x444346DE), UINT32_C(0x09456ED5), UINT32_C(0x157B8A5B),
+ UINT32_C(0x25797C6C), UINT32_C(0x73606A79), UINT32_C(0x33C14C06),
+ UINT32_C(0xA9D0F47C), UINT32_C(0xFAF971CA), UINT32_C(0x7BC8962C) },
+ { UINT32_C(0x65909DFD), UINT32_C(0x6E763C51), UINT32_C(0x14A9BF42),
+ UINT32_C(0x1BBBE41B), UINT32_C(0xC49E9EFC), UINT32_C(0xD95B7ECB),
+ UINT32_C(0xB38F2B59), UINT32_C(0x0C317927), UINT32_C(0xB3C397DB),
+ UINT32_C(0x97912B53), UINT32_C(0x45C7ABC7), UINT32_C(0xCB3879AA) } },
+ { { UINT32_C(0x24359B81), UINT32_C(0xCD81BDCF), UINT32_C(0xDB4C321C),
+ UINT32_C(0x6FD326E2), UINT32_C(0xF8EBE39C), UINT32_C(0x4CB0228B),
+ UINT32_C(0xB2CDD852), UINT32_C(0x496A9DCE), UINT32_C(0xD0E9B3AF),
+ UINT32_C(0x0F115A1A), UINT32_C(0xD8EEEF8A), UINT32_C(0xAA08BF36) },
+ { UINT32_C(0x06E5E739), UINT32_C(0x5232A515), UINT32_C(0x8407A551),
+ UINT32_C(0x21FAE9D5), UINT32_C(0x8994B4E8), UINT32_C(0x289D18B0),
+ UINT32_C(0x09097A52), UINT32_C(0xB4E346A8), UINT32_C(0x324621D0),
+ UINT32_C(0xC641510F), UINT32_C(0x95A41AB8), UINT32_C(0xC567FD4A) } },
+ { { UINT32_C(0xD57C8DE9), UINT32_C(0x261578C7), UINT32_C(0x3836C5C8),
+ UINT32_C(0xB9BC491F), UINT32_C(0x14C8038F), UINT32_C(0x993266B4),
+ UINT32_C(0xFAA7CC39), UINT32_C(0xBACAD755), UINT32_C(0xD69B7E27),
+ UINT32_C(0x418C4DEF), UINT32_C(0xAE751533), UINT32_C(0x53FDC5CD) },
+ { UINT32_C(0xC3EEA63A), UINT32_C(0x6F3BD329), UINT32_C(0xE53DD29E),
+ UINT32_C(0xA7A22091), UINT32_C(0xDC4C54EC), UINT32_C(0xB7164F73),
+ UINT32_C(0x44D3D74E), UINT32_C(0xCA66290D), UINT32_C(0x4C9EA511),
+ UINT32_C(0xF77C6242), UINT32_C(0x1F714C49), UINT32_C(0x34337F55) } },
+ { { UINT32_C(0xA64B6C4B), UINT32_C(0x5ED2B216), UINT32_C(0x3AAE640D),
+ UINT32_C(0x1C38794F), UINT32_C(0x8905794F), UINT32_C(0x30BBAEE0),
+ UINT32_C(0xC8699CFB), UINT32_C(0x0D9EE41E), UINT32_C(0xCF7B7C29),
+ UINT32_C(0xAF38DAF2), UINT32_C(0x43E53513), UINT32_C(0x0D6A05CA) },
+ { UINT32_C(0x2606AB56), UINT32_C(0xBE96C644), UINT32_C(0xE9EB9734),
+ UINT32_C(0x13E7A072), UINT32_C(0x5FF50CD7), UINT32_C(0xF9669445),
+ UINT32_C(0x47DA6F1D), UINT32_C(0x68EF26B5), UINT32_C(0x23687CB7),
+ UINT32_C(0xF0028738), UINT32_C(0x6217C1CE), UINT32_C(0x5ED9C876) } },
+ { { UINT32_C(0x0A3A9691), UINT32_C(0x423BA513), UINT32_C(0xB3179296),
+ UINT32_C(0xF421B1E7), UINT32_C(0x1A871E1B), UINT32_C(0x6B51BCDB),
+ UINT32_C(0x464E4300), UINT32_C(0x6E3BB5B5), UINT32_C(0xFC6C54CC),
+ UINT32_C(0x24171E2E), UINT32_C(0xD3E58DC2), UINT32_C(0xA9DFA947) },
+ { UINT32_C(0x9DE9CFA7), UINT32_C(0x175B3309), UINT32_C(0x2D1015DA),
+ UINT32_C(0x707B2529), UINT32_C(0x993EA65A), UINT32_C(0xCBB95F17),
+ UINT32_C(0x0447450D), UINT32_C(0x93515063), UINT32_C(0x1B2753C9),
+ UINT32_C(0x0F47B205), UINT32_C(0xE7D427CF), UINT32_C(0x4A0BAB14) } },
+ { { UINT32_C(0xB5AA7CA1), UINT32_C(0xA39DEF39), UINT32_C(0xC47C33DF),
+ UINT32_C(0x591CB173), UINT32_C(0x6BBAB872), UINT32_C(0xA09DAC79),
+ UINT32_C(0x7208BA2F), UINT32_C(0x3EF9D7CF), UINT32_C(0x7A0A34FC),
+ UINT32_C(0x3CC18931), UINT32_C(0xBCC3380F), UINT32_C(0xAE31C62B) },
+ { UINT32_C(0x0287C0B4), UINT32_C(0xD72A6794), UINT32_C(0x68E334F1),
+ UINT32_C(0x3373382C), UINT32_C(0xBD20C6A6), UINT32_C(0xD0310CA8),
+ UINT32_C(0x42C033FD), UINT32_C(0xA2734B87), UINT32_C(0x8DCE4509),
+ UINT32_C(0xA5D390F1), UINT32_C(0x3E1AFCB5), UINT32_C(0xFC84E74B) } },
+ { { UINT32_C(0xF2CD8A9C), UINT32_C(0xB028334D), UINT32_C(0x570F76F6),
+ UINT32_C(0xB8719291), UINT32_C(0x01065A2D), UINT32_C(0x662A386E),
+ UINT32_C(0x53D940AE), UINT32_C(0xDF1634CB), UINT32_C(0x8F5B41F9),
+ UINT32_C(0x625A7B83), UINT32_C(0xEE6AA1B4), UINT32_C(0xA033E4FE) },
+ { UINT32_C(0x1E42BABB), UINT32_C(0x51E9D463), UINT32_C(0x0D388468),
+ UINT32_C(0x660BC2E4), UINT32_C(0xFCBB114A), UINT32_C(0x3F702189),
+ UINT32_C(0xB414CA78), UINT32_C(0x6B46FE35), UINT32_C(0x4A57316B),
+ UINT32_C(0x328F6CF2), UINT32_C(0x381AD156), UINT32_C(0x917423B5) } },
+ { { UINT32_C(0x5373A607), UINT32_C(0xAC19306E), UINT32_C(0x191D0969),
+ UINT32_C(0x471DF8E3), UINT32_C(0xB9720D83), UINT32_C(0x380ADE35),
+ UINT32_C(0x48F1FD5C), UINT32_C(0x7423FDF5), UINT32_C(0x49CABC95),
+ UINT32_C(0x8B090C9F), UINT32_C(0xC9842F2F), UINT32_C(0xB768E8CD) },
+ { UINT32_C(0xE56162D6), UINT32_C(0x399F456D), UINT32_C(0x4F326791),
+ UINT32_C(0xBB6BA240), UINT32_C(0x342590BE), UINT32_C(0x8F4FBA3B),
+ UINT32_C(0x3DFB6B3E), UINT32_C(0x053986B9), UINT32_C(0x190C7425),
+ UINT32_C(0xBB6739F1), UINT32_C(0x32F7E95F), UINT32_C(0x32D4A553) } },
+ { { UINT32_C(0x0DDBFB21), UINT32_C(0x0205A0EC), UINT32_C(0x33AC3407),
+ UINT32_C(0x3010327D), UINT32_C(0x3348999B), UINT32_C(0xCF2F4DB3),
+ UINT32_C(0x1551604A), UINT32_C(0x660DB9F4), UINT32_C(0x5D38D335),
+ UINT32_C(0xC346C69A), UINT32_C(0x38882479), UINT32_C(0x64AAB3D3) },
+ { UINT32_C(0x6AE44403), UINT32_C(0xA096B5E7), UINT32_C(0x645F76CD),
+ UINT32_C(0x6B4C9571), UINT32_C(0x4711120F), UINT32_C(0x72E1CD5F),
+ UINT32_C(0xF27CC3E1), UINT32_C(0x93EC42AC), UINT32_C(0xA72ABB12),
+ UINT32_C(0x2D18D004), UINT32_C(0xC9841A04), UINT32_C(0x232E9568) } },
+ { { UINT32_C(0x3CC7F908), UINT32_C(0xFF01DB22), UINT32_C(0xD13CDD3B),
+ UINT32_C(0x9F214F8F), UINT32_C(0xE0B014B5), UINT32_C(0x38DADBB7),
+ UINT32_C(0x94245C95), UINT32_C(0x2C548CCC), UINT32_C(0x809AFCE3),
+ UINT32_C(0x714BE331), UINT32_C(0x9BFE957E), UINT32_C(0xBCC64410) },
+ { UINT32_C(0x5B957F80), UINT32_C(0xC21C2D21), UINT32_C(0xBB8A4C42),
+ UINT32_C(0xBA2D4FDC), UINT32_C(0x74817CEC), UINT32_C(0xFA6CD4AF),
+ UINT32_C(0xC528EAD6), UINT32_C(0x9E7FB523), UINT32_C(0x7714B10E),
+ UINT32_C(0xAED781FF), UINT32_C(0x94F04455), UINT32_C(0xB52BB592) } },
+ { { UINT32_C(0x868CC68B), UINT32_C(0xA578BD69), UINT32_C(0x603F2C08),
+ UINT32_C(0xA40FDC8D), UINT32_C(0x2D81B042), UINT32_C(0x53D79BD1),
+ UINT32_C(0xA7587EAB), UINT32_C(0x1B136AF3), UINT32_C(0x868A16DB),
+ UINT32_C(0x1ED4F939), UINT32_C(0xD0B98273), UINT32_C(0x775A61FB) },
+ { UINT32_C(0xE56BEF8C), UINT32_C(0xBA5C12A6), UINT32_C(0xDDDC8595),
+ UINT32_C(0xF926CE52), UINT32_C(0x586FE1F8), UINT32_C(0xA13F5C8F),
+ UINT32_C(0x060DBB54), UINT32_C(0xEAC9F7F2), UINT32_C(0x51AF4342),
+ UINT32_C(0x70C0AC3A), UINT32_C(0x79CDA450), UINT32_C(0xC16E303C) } },
+ { { UINT32_C(0x8113F4EA), UINT32_C(0xD0DADD6C), UINT32_C(0x07BDF09F),
+ UINT32_C(0xF14E3922), UINT32_C(0xAA7D877C), UINT32_C(0x3FE5E9C2),
+ UINT32_C(0x48779264), UINT32_C(0x9EA95C19), UINT32_C(0x4FCB8344),
+ UINT32_C(0xE93F65A7), UINT32_C(0x76D925A4), UINT32_C(0x9F40837E) },
+ { UINT32_C(0x8271FFC7), UINT32_C(0x0EA6DA3F), UINT32_C(0xCC8F9B19),
+ UINT32_C(0x557FA529), UINT32_C(0x78E6DDFD), UINT32_C(0x2613DBF1),
+ UINT32_C(0x36B1E954), UINT32_C(0x7A7523B8), UINT32_C(0x406A87FB),
+ UINT32_C(0x20EB3168), UINT32_C(0x03ABA56A), UINT32_C(0x64C21C14) } },
+ { { UINT32_C(0xC032DD5F), UINT32_C(0xE86C9C2D), UINT32_C(0x86F16A21),
+ UINT32_C(0x158CEB8E), UINT32_C(0x68326AF1), UINT32_C(0x0279FF53),
+ UINT32_C(0x59F12BA5), UINT32_C(0x1FFE2E2B), UINT32_C(0x86826D45),
+ UINT32_C(0xD75A46DB), UINT32_C(0x1E33E6AC), UINT32_C(0xE19B4841) },
+ { UINT32_C(0x0E52991C), UINT32_C(0x5F0CC524), UINT32_C(0x8B116286),
+ UINT32_C(0x645871F9), UINT32_C(0xFCAEC5D3), UINT32_C(0xAB3B4B1E),
+ UINT32_C(0x51D0F698), UINT32_C(0x994C8DF0), UINT32_C(0xE5D13040),
+ UINT32_C(0x06F890AF), UINT32_C(0x5F96C7C2), UINT32_C(0x72D9DC23) } },
+ { { UINT32_C(0xE7886A80), UINT32_C(0x7C018DEE), UINT32_C(0x8786E4A3),
+ UINT32_C(0xFA209330), UINT32_C(0xA4415CA1), UINT32_C(0xCEC8E2A3),
+ UINT32_C(0xCC83CC60), UINT32_C(0x5C736FC1), UINT32_C(0xF00C259F),
+ UINT32_C(0xFEF9788C), UINT32_C(0xDD29A6AD), UINT32_C(0xED5C01CB) },
+ { UINT32_C(0x3E20825B), UINT32_C(0x87834A03), UINT32_C(0x123F9358),
+ UINT32_C(0x13B1239D), UINT32_C(0xFBC286C1), UINT32_C(0x7E8869D0),
+ UINT32_C(0x24CE8609), UINT32_C(0xC4AB5AA3), UINT32_C(0xB6349208),
+ UINT32_C(0x38716BEE), UINT32_C(0xB322AE21), UINT32_C(0x0BDF4F99) } },
+ { { UINT32_C(0x53E3494B), UINT32_C(0x6B97A2BF), UINT32_C(0x70F7A13E),
+ UINT32_C(0xA8AA05C5), UINT32_C(0xF1305B51), UINT32_C(0x209709C2),
+ UINT32_C(0xDAB76F2C), UINT32_C(0x57B31888), UINT32_C(0xAA2A406A),
+ UINT32_C(0x75B2ECD7), UINT32_C(0xA35374A4), UINT32_C(0x88801A00) },
+ { UINT32_C(0x45C0471B), UINT32_C(0xE1458D1C), UINT32_C(0x322C1AB0),
+ UINT32_C(0x5760E306), UINT32_C(0xAD6AB0A6), UINT32_C(0x789A0AF1),
+ UINT32_C(0xF458B9CE), UINT32_C(0x74398DE1), UINT32_C(0x32E0C65F),
+ UINT32_C(0x1652FF9F), UINT32_C(0xFFFB3A52), UINT32_C(0xFAF1F9D5) } },
+ },
+ {
+ { { UINT32_C(0xD1D1B007), UINT32_C(0xA05C751C), UINT32_C(0x0213E478),
+ UINT32_C(0x016C213B), UINT32_C(0xF4C98FEE), UINT32_C(0x9C56E26C),
+ UINT32_C(0xE7B3A7C7), UINT32_C(0x6084F8B9), UINT32_C(0xDECC1646),
+ UINT32_C(0xA0B042F6), UINT32_C(0xFBF3A0BC), UINT32_C(0x4A6F3C1A) },
+ { UINT32_C(0x51C9F909), UINT32_C(0x94524C2C), UINT32_C(0x3A6D3748),
+ UINT32_C(0xF3B3AD40), UINT32_C(0x7CE1F9F5), UINT32_C(0x18792D6E),
+ UINT32_C(0xFC0C34FA), UINT32_C(0x8EBC2FD7), UINT32_C(0x780A1693),
+ UINT32_C(0x032A9F41), UINT32_C(0x56A60019), UINT32_C(0x34F9801E) } },
+ { { UINT32_C(0xF0DB3751), UINT32_C(0xB398290C), UINT32_C(0xBA42C976),
+ UINT32_C(0x01170580), UINT32_C(0x56560B89), UINT32_C(0x3E71AA29),
+ UINT32_C(0x50E6647B), UINT32_C(0x80817AAC), UINT32_C(0xA0BE42DA),
+ UINT32_C(0x35C833AD), UINT32_C(0xF1BABA4E), UINT32_C(0xFA3C6148) },
+ { UINT32_C(0xCD8F6253), UINT32_C(0xC57BE645), UINT32_C(0xC657AD0D),
+ UINT32_C(0x77CEE46B), UINT32_C(0x0DEFD908), UINT32_C(0x83007731),
+ UINT32_C(0x899CBA56), UINT32_C(0x92FE9BCE), UINT32_C(0xBCEFFB5A),
+ UINT32_C(0x48450EC4), UINT32_C(0xF2F5F4BF), UINT32_C(0xE615148D) } },
+ { { UINT32_C(0x90B86166), UINT32_C(0xF55EDABB), UINT32_C(0x075430A2),
+ UINT32_C(0x27F7D784), UINT32_C(0x9BF17161), UINT32_C(0xF53E822B),
+ UINT32_C(0xAFE808DC), UINT32_C(0x4A5B3B93), UINT32_C(0xD7272F55),
+ UINT32_C(0x590BBBDE), UINT32_C(0xEAEA79A1), UINT32_C(0x233D63FA) },
+ { UINT32_C(0xFE1EBA07), UINT32_C(0xD7042BEA), UINT32_C(0x10750D7E),
+ UINT32_C(0xD2B9AEA0), UINT32_C(0x31078AA5), UINT32_C(0xD8D1E690),
+ UINT32_C(0x7E37BC8B), UINT32_C(0x9E837F18), UINT32_C(0x85008975),
+ UINT32_C(0x9558FF4F), UINT32_C(0x421FE867), UINT32_C(0x93EDB837) } },
+ { { UINT32_C(0x83D55B5A), UINT32_C(0xAA6489DF), UINT32_C(0x86BF27F7),
+ UINT32_C(0xEA092E49), UINT32_C(0x5FA2EFEC), UINT32_C(0x4D8943A9),
+ UINT32_C(0x720E1A8C), UINT32_C(0xC9BAAE53), UINT32_C(0x95A4F8A3),
+ UINT32_C(0xC055444B), UINT32_C(0xA7C1206B), UINT32_C(0x93BD01E8) },
+ { UINT32_C(0x714A27DF), UINT32_C(0xD97765B6), UINT32_C(0x193F1B16),
+ UINT32_C(0xD622D954), UINT32_C(0xF1503B15), UINT32_C(0x115CC35A),
+ UINT32_C(0xA9FA21F8), UINT32_C(0x1DD5359F), UINT32_C(0x6DFED1F1),
+ UINT32_C(0x197C3299), UINT32_C(0xF77F2679), UINT32_C(0xDEE8B7C9) } },
+ { { UINT32_C(0x394FD855), UINT32_C(0x5405179F), UINT32_C(0x49FDFB33),
+ UINT32_C(0xC9D6E244), UINT32_C(0xBD903393), UINT32_C(0x70EBCAB4),
+ UINT32_C(0xA2C56780), UINT32_C(0x0D3A3899), UINT32_C(0x683D1A0A),
+ UINT32_C(0x012C7256), UINT32_C(0x80A48F3B), UINT32_C(0xC688FC88) },
+ { UINT32_C(0x6F7DF527), UINT32_C(0x18095754), UINT32_C(0x71315D16),
+ UINT32_C(0x9E339B4B), UINT32_C(0xA956BB12), UINT32_C(0x90560C28),
+ UINT32_C(0xD42EEE8D), UINT32_C(0x2BECEA60), UINT32_C(0x50632653),
+ UINT32_C(0x82AEB9A7), UINT32_C(0xDFA5CD6A), UINT32_C(0xED34353E) } },
+ { { UINT32_C(0x91AECCE4), UINT32_C(0x82154D2C), UINT32_C(0x5041887F),
+ UINT32_C(0x312C6070), UINT32_C(0xFB9FBD71), UINT32_C(0xECF589F3),
+ UINT32_C(0xB524BDE4), UINT32_C(0x67660A7D), UINT32_C(0x724ACF23),
+ UINT32_C(0xE99B029D), UINT32_C(0x6D1CD891), UINT32_C(0xDF06E4AF) },
+ { UINT32_C(0x80EE304D), UINT32_C(0x07806CB5), UINT32_C(0x7443A8F8),
+ UINT32_C(0x0C70BB9F), UINT32_C(0x08B0830A), UINT32_C(0x01EC3414),
+ UINT32_C(0x5A81510B), UINT32_C(0xFD7B63C3), UINT32_C(0x453B5F93),
+ UINT32_C(0xE90A0A39), UINT32_C(0x9BC71725), UINT32_C(0xAB700F8F) } },
+ { { UINT32_C(0xB9F00793), UINT32_C(0x9401AEC2), UINT32_C(0xB997F0BF),
+ UINT32_C(0x064EC4F4), UINT32_C(0x849240C8), UINT32_C(0xDC0CC1FD),
+ UINT32_C(0xB6E92D72), UINT32_C(0x39A75F37), UINT32_C(0x0224A4AB),
+ UINT32_C(0xAA43CA5D), UINT32_C(0x54614C47), UINT32_C(0x9C4D6325) },
+ { UINT32_C(0xC6709DA3), UINT32_C(0x1767366F), UINT32_C(0x23479232),
+ UINT32_C(0xA6B482D1), UINT32_C(0x84D63E85), UINT32_C(0x54DC6DDC),
+ UINT32_C(0xC99D3B9E), UINT32_C(0x0ACCB5AD), UINT32_C(0xE8AA3ABF),
+ UINT32_C(0x211716BB), UINT32_C(0x69EC6406), UINT32_C(0xD0FE25AD) } },
+ { { UINT32_C(0xDF85C705), UINT32_C(0x0D5C1769), UINT32_C(0xA409DCD1),
+ UINT32_C(0x7086C93D), UINT32_C(0x0E8D75D8), UINT32_C(0x9710839D),
+ UINT32_C(0xEBDD4177), UINT32_C(0x17B7DB75), UINT32_C(0xF649A809),
+ UINT32_C(0xAF69EB58), UINT32_C(0x8A84E220), UINT32_C(0x6EF19EA2) },
+ { UINT32_C(0x65C278B2), UINT32_C(0x36EB5C66), UINT32_C(0x81EA9D65),
+ UINT32_C(0xD2A15128), UINT32_C(0x769300AD), UINT32_C(0x4FCBA840),
+ UINT32_C(0xC8E536E5), UINT32_C(0xC2052CCD), UINT32_C(0xAC263B8F),
+ UINT32_C(0x9CAEE014), UINT32_C(0xF9239663), UINT32_C(0x56F7ED7A) } },
+ { { UINT32_C(0xAC9E09E1), UINT32_C(0xF6FA251F), UINT32_C(0x955A2853),
+ UINT32_C(0xA3775605), UINT32_C(0xF2A4BD78), UINT32_C(0x977B8D21),
+ UINT32_C(0x3E096410), UINT32_C(0xF68AA7FF), UINT32_C(0x65F88419),
+ UINT32_C(0x01AB0552), UINT32_C(0xBB93F64E), UINT32_C(0xC4C8D77E) },
+ { UINT32_C(0x3451FE64), UINT32_C(0x71825111), UINT32_C(0x46F9BAF0),
+ UINT32_C(0xFA0F905B), UINT32_C(0xCA49EF1A), UINT32_C(0x79BE3BF3),
+ UINT32_C(0x6CB02071), UINT32_C(0x831109B2), UINT32_C(0xC4DDBFE5),
+ UINT32_C(0x765F935F), UINT32_C(0x80E5A3BA), UINT32_C(0x6F99CD14) } },
+ { { UINT32_C(0x234F91FF), UINT32_C(0xD2E8DA04), UINT32_C(0x813867AA),
+ UINT32_C(0x4DED4D6D), UINT32_C(0xE0A0D945), UINT32_C(0x3B50175D),
+ UINT32_C(0x4EB78137), UINT32_C(0x55AC7406), UINT32_C(0xE1D47730),
+ UINT32_C(0xE9FA7F6E), UINT32_C(0x5CBF2176), UINT32_C(0x2C171531) },
+ { UINT32_C(0x2BE7A47D), UINT32_C(0xA521788F), UINT32_C(0x3FCF1AB3),
+ UINT32_C(0x95B15A27), UINT32_C(0xF28A946A), UINT32_C(0xAADA6401),
+ UINT32_C(0x8B4E898B), UINT32_C(0x628B2EF4), UINT32_C(0x6D6592CC),
+ UINT32_C(0x0E6F4629), UINT32_C(0xA723CADD), UINT32_C(0x997C7094) } },
+ { { UINT32_C(0x6AFE80C6), UINT32_C(0x878BCE11), UINT32_C(0x007BBA38),
+ UINT32_C(0xA89ABC9D), UINT32_C(0xA7CC267F), UINT32_C(0xB0C1F87B),
+ UINT32_C(0x5104FF04), UINT32_C(0x86D33B9D), UINT32_C(0x2EF1BA42),
+ UINT32_C(0xB0504B1B), UINT32_C(0xB2827E88), UINT32_C(0x21693048) },
+ { UINT32_C(0x79CFCD14), UINT32_C(0x11F1CCD5), UINT32_C(0x94AD227E),
+ UINT32_C(0x59C09FFA), UINT32_C(0x3EA91ACF), UINT32_C(0x95A4ADCB),
+ UINT32_C(0xB4370BAA), UINT32_C(0x1346238B), UINT32_C(0x3E1367B0),
+ UINT32_C(0xB099D202), UINT32_C(0x90F23CEA), UINT32_C(0xCF5BBDE6) } },
+ { { UINT32_C(0xBCB3BE5E), UINT32_C(0x453299BB), UINT32_C(0x38E9FF97),
+ UINT32_C(0x123C588E), UINT32_C(0xF6A2E521), UINT32_C(0x8C115DD9),
+ UINT32_C(0xFF7D4B98), UINT32_C(0x6E333C11), UINT32_C(0xDA73E736),
+ UINT32_C(0x9DD061E5), UINT32_C(0x5CA53056), UINT32_C(0xC6AB7B3A) },
+ { UINT32_C(0x5B30A76B), UINT32_C(0xF1EF3EE3), UINT32_C(0x961BA11F),
+ UINT32_C(0xADD6B44A), UINT32_C(0x2CA6E030), UINT32_C(0x7BB00B75),
+ UINT32_C(0x2FE270AD), UINT32_C(0x270272E8), UINT32_C(0x241A9239),
+ UINT32_C(0x23BC6F4F), UINT32_C(0x0BB94A94), UINT32_C(0x88581E13) } },
+ { { UINT32_C(0x24EEF67F), UINT32_C(0xBD225A69), UINT32_C(0x0412CEB7),
+ UINT32_C(0x7CFD9614), UINT32_C(0x99AC298E), UINT32_C(0xF6DE1679),
+ UINT32_C(0xED6C3571), UINT32_C(0xB20FD895), UINT32_C(0x61836C56),
+ UINT32_C(0x03C73B78), UINT32_C(0xABA6CB34), UINT32_C(0xEE3C3A16) },
+ { UINT32_C(0x4138408A), UINT32_C(0x9E8C5667), UINT32_C(0x2DD6EBDF),
+ UINT32_C(0xEC25FCB1), UINT32_C(0xDBBDF6E3), UINT32_C(0xC54C33FD),
+ UINT32_C(0x4A3C9DD4), UINT32_C(0x93E0913B), UINT32_C(0x35EDEED4),
+ UINT32_C(0x66D7D135), UINT32_C(0x453FB66E), UINT32_C(0xD29A36C4) } },
+ { { UINT32_C(0x9F1943AF), UINT32_C(0x7F192F03), UINT32_C(0x4E0B5FB0),
+ UINT32_C(0x6488163F), UINT32_C(0x53599226), UINT32_C(0x66A45C69),
+ UINT32_C(0x9AD15A73), UINT32_C(0x924E2E43), UINT32_C(0x42A99D76),
+ UINT32_C(0x8B553DB7), UINT32_C(0x0451F521), UINT32_C(0x4BC6B53B) },
+ { UINT32_C(0x101F8AD6), UINT32_C(0xC029B5EF), UINT32_C(0xC507EED9),
+ UINT32_C(0x6A4DA71C), UINT32_C(0x30BB22F3), UINT32_C(0x3ADFAEC0),
+ UINT32_C(0xB514F85B), UINT32_C(0x81BCAF7A), UINT32_C(0x5A7E60D3),
+ UINT32_C(0x2E1E6EFF), UINT32_C(0xAE39D42F), UINT32_C(0x5270ABC0) } },
+ { { UINT32_C(0x3901F0F8), UINT32_C(0x86D56DEB), UINT32_C(0xEED5F650),
+ UINT32_C(0x1D0BC792), UINT32_C(0xCA1114A3), UINT32_C(0x1A2DDFD8),
+ UINT32_C(0xF1DD316D), UINT32_C(0x94ABF4B1), UINT32_C(0x3D9F18EF),
+ UINT32_C(0xF72179E4), UINT32_C(0x9AA2CABF), UINT32_C(0x52A0921E) },
+ { UINT32_C(0xA7452883), UINT32_C(0xECDA9E27), UINT32_C(0xAFD771B4),
+ UINT32_C(0x7E90850A), UINT32_C(0x9CC0465C), UINT32_C(0xD40F87EA),
+ UINT32_C(0x865CDA36), UINT32_C(0x8CFCB60A), UINT32_C(0x7C650942),
+ UINT32_C(0x3DBEC2CC), UINT32_C(0xE718CA9D), UINT32_C(0x071A4EE7) } },
+ { { UINT32_C(0x276AC5F3), UINT32_C(0x73C0E4FF), UINT32_C(0xBDB97EA1),
+ UINT32_C(0xE7BA5A6A), UINT32_C(0xC5808398), UINT32_C(0x638CA54E),
+ UINT32_C(0x413855E5), UINT32_C(0x8258DC82), UINT32_C(0x57F07614),
+ UINT32_C(0x35DDD2E9), UINT32_C(0x1DC13BF9), UINT32_C(0xF98DD692) },
+ { UINT32_C(0xF16DCD84), UINT32_C(0x3A4C0088), UINT32_C(0x833D83F9),
+ UINT32_C(0xF192EADD), UINT32_C(0xA6D61D29), UINT32_C(0x3C26C931),
+ UINT32_C(0xDE0AD7A1), UINT32_C(0x589FDD52), UINT32_C(0x0442D37F),
+ UINT32_C(0x7CD83DD2), UINT32_C(0x403ECBFC), UINT32_C(0x1E47E777) } },
+ },
+ {
+ { { UINT32_C(0x70D4D7BC), UINT32_C(0x2AF8ED81), UINT32_C(0xB632435C),
+ UINT32_C(0xABC3E15F), UINT32_C(0x78219356), UINT32_C(0x4C0E726F),
+ UINT32_C(0xB87254C4), UINT32_C(0x8C1962A1), UINT32_C(0xC9E7691A),
+ UINT32_C(0x30796A71), UINT32_C(0xA75A12EE), UINT32_C(0xD453EF19) },
+ { UINT32_C(0x13AE4964), UINT32_C(0x535F42C2), UINT32_C(0x0DA9586A),
+ UINT32_C(0x86831C3C), UINT32_C(0xE39A7A58), UINT32_C(0xB7F1EF35),
+ UINT32_C(0xD459B91A), UINT32_C(0xA2789AE2), UINT32_C(0x02FD429D),
+ UINT32_C(0xEADBCA7F), UINT32_C(0x65290F57), UINT32_C(0x94F215D4) } },
+ { { UINT32_C(0x1CFB79AC), UINT32_C(0x37ED2BE5), UINT32_C(0xE7AF84C3),
+ UINT32_C(0x801946F3), UINT32_C(0xE77C2F00), UINT32_C(0xB061AD8A),
+ UINT32_C(0x44DE16A8), UINT32_C(0xE87E1A9A), UINT32_C(0x7EE490FF),
+ UINT32_C(0xDF4F57C8), UINT32_C(0x005993ED), UINT32_C(0x4E793B49) },
+ { UINT32_C(0xBCCB593F), UINT32_C(0xE1036387), UINT32_C(0x95E09B80),
+ UINT32_C(0xF1749411), UINT32_C(0x5AB42F91), UINT32_C(0x59CB20D1),
+ UINT32_C(0xAC0FF033), UINT32_C(0xA738A18D), UINT32_C(0x2AC1E7F4),
+ UINT32_C(0xDA501A2E), UINT32_C(0x84D8A6E0), UINT32_C(0x1B67EDA0) } },
+ { { UINT32_C(0x1080E90B), UINT32_C(0x1D27EFCE), UINT32_C(0x3FD01DC6),
+ UINT32_C(0xA2815246), UINT32_C(0xCAA26D18), UINT32_C(0x99A3FB83),
+ UINT32_C(0xB82BABBE), UINT32_C(0xD27E6133), UINT32_C(0xD783DD60),
+ UINT32_C(0x61030DFD), UINT32_C(0x73C78CB8), UINT32_C(0x295A2913) },
+ { UINT32_C(0x68BE6A92), UINT32_C(0x8707A2CF), UINT32_C(0xEEB3474A),
+ UINT32_C(0xC9C2FB98), UINT32_C(0xA2B176B8), UINT32_C(0x7C3FD412),
+ UINT32_C(0xC7202101), UINT32_C(0xD5B52E2F), UINT32_C(0xF0A6D536),
+ UINT32_C(0x24A63030), UINT32_C(0x04648EC0), UINT32_C(0x05842DE3) } },
+ { { UINT32_C(0x30577AC9), UINT32_C(0x67477CDC), UINT32_C(0x244F92A8),
+ UINT32_C(0x51DD9775), UINT32_C(0x917EEC66), UINT32_C(0x31FD60B9),
+ UINT32_C(0xD66C5C1D), UINT32_C(0xACD95BD4), UINT32_C(0xBF9508BA),
+ UINT32_C(0x2E0551F3), UINT32_C(0x688CB243), UINT32_C(0x121168E1) },
+ { UINT32_C(0x4540D230), UINT32_C(0x8C039740), UINT32_C(0x009ECDF9),
+ UINT32_C(0xC4ED3CF6), UINT32_C(0x44DB62AF), UINT32_C(0x191825E1),
+ UINT32_C(0xC4A030DA), UINT32_C(0x3EE8ACAB), UINT32_C(0x94081504),
+ UINT32_C(0x8AB154A8), UINT32_C(0x486C9CD0), UINT32_C(0x1FE09E4B) } },
+ { { UINT32_C(0xD113450B), UINT32_C(0x512F82F9), UINT32_C(0x2DBC9197),
+ UINT32_C(0x5878C901), UINT32_C(0xE13F355B), UINT32_C(0xDB87412B),
+ UINT32_C(0x935B8A5E), UINT32_C(0x0A0A4A9B), UINT32_C(0xF25A5351),
+ UINT32_C(0x818587BD), UINT32_C(0x31E3D9C7), UINT32_C(0xE8079310) },
+ { UINT32_C(0x611BC1B1), UINT32_C(0x8B1D47C7), UINT32_C(0x72A823F2),
+ UINT32_C(0x51722B58), UINT32_C(0x53B36B3E), UINT32_C(0x6F97EE8A),
+ UINT32_C(0x946DD453), UINT32_C(0x6E085AAC), UINT32_C(0xE65E6533),
+ UINT32_C(0x2EC5057D), UINT32_C(0x4BB18801), UINT32_C(0xF82D9D71) } },
+ { { UINT32_C(0x8BA5AA8E), UINT32_C(0xAD81FA93), UINT32_C(0x8F7AA69E),
+ UINT32_C(0x723E628E), UINT32_C(0xEF35937C), UINT32_C(0x0BA7C2DE),
+ UINT32_C(0x6DECFB40), UINT32_C(0x83A43EC5), UINT32_C(0xE60C4F2D),
+ UINT32_C(0xF520F849), UINT32_C(0x457E3B5E), UINT32_C(0x8260E8AE) },
+ { UINT32_C(0xBF1D9ED7), UINT32_C(0x7CE874F0), UINT32_C(0x7F1A5466),
+ UINT32_C(0x5FDE3553), UINT32_C(0x0C162DBB), UINT32_C(0x5A63777C),
+ UINT32_C(0xDAD87289), UINT32_C(0x0FD04F8C), UINT32_C(0x640761D5),
+ UINT32_C(0xCA2D9E0E), UINT32_C(0x38501ADB), UINT32_C(0x4615CFF8) } },
+ { { UINT32_C(0x110B4A25), UINT32_C(0x9422789B), UINT32_C(0x70AD8CC1),
+ UINT32_C(0x5C26779F), UINT32_C(0xEC4F1E14), UINT32_C(0x4EE6A748),
+ UINT32_C(0x5C7AB5E0), UINT32_C(0xFB584A0D), UINT32_C(0xFB21EE66),
+ UINT32_C(0xED1DCB0B), UINT32_C(0x11C6863C), UINT32_C(0xDBED1F00) },
+ { UINT32_C(0xB1B1D187), UINT32_C(0xD2969269), UINT32_C(0xAFE964E6),
+ UINT32_C(0xF7D0C3F2), UINT32_C(0x12BB865E), UINT32_C(0xE05EE93F),
+ UINT32_C(0xED79118E), UINT32_C(0x1AFB7BEE), UINT32_C(0x0F0FE453),
+ UINT32_C(0x220AF138), UINT32_C(0x52782AB9), UINT32_C(0x1463AA1A) } },
+ { { UINT32_C(0xD7DBE5F9), UINT32_C(0x7C139D56), UINT32_C(0x0B83685B),
+ UINT32_C(0xFC16E611), UINT32_C(0x9018463C), UINT32_C(0xFA723C02),
+ UINT32_C(0x840BF5D7), UINT32_C(0xC472458C), UINT32_C(0x0AF07591),
+ UINT32_C(0x4D809359), UINT32_C(0x3308DFD9), UINT32_C(0x418D8830) },
+ { UINT32_C(0x0C365AE3), UINT32_C(0x9B381E04), UINT32_C(0xF8190FD1),
+ UINT32_C(0x3780BF33), UINT32_C(0xDD03E854), UINT32_C(0x45397418),
+ UINT32_C(0x4E51E491), UINT32_C(0xA95D030F), UINT32_C(0xE3286CEA),
+ UINT32_C(0x87C8C686), UINT32_C(0x900B5F83), UINT32_C(0x01C773BF) } },
+ { { UINT32_C(0x78673B02), UINT32_C(0xDABE3475), UINT32_C(0xF6E7395E),
+ UINT32_C(0x4F0F25CE), UINT32_C(0xD181AD45), UINT32_C(0x3117ABB9),
+ UINT32_C(0xAA13DE0B), UINT32_C(0x4B559F88), UINT32_C(0xEA7C9745),
+ UINT32_C(0xFD8EFE78), UINT32_C(0x5DD21682), UINT32_C(0x08060047) },
+ { UINT32_C(0xD4C86FFC), UINT32_C(0xC0F5DE4B), UINT32_C(0xF21AB6A2),
+ UINT32_C(0x4BB14B1E), UINT32_C(0xF50C1D12), UINT32_C(0xACB53A6C),
+ UINT32_C(0x5CC9162E), UINT32_C(0x46AAC450), UINT32_C(0x2DE240B6),
+ UINT32_C(0x049C51E0), UINT32_C(0xE383C3B0), UINT32_C(0xBB2DC016) } },
+ { { UINT32_C(0x8E438C92), UINT32_C(0xA3C56AD2), UINT32_C(0xB2CEAF1A),
+ UINT32_C(0x7C43F98F), UINT32_C(0xE2150778), UINT32_C(0x397C44F7),
+ UINT32_C(0x71A24131), UINT32_C(0x48D17AB7), UINT32_C(0x1E2ACDA9),
+ UINT32_C(0xCC513863), UINT32_C(0xF0C9BAC9), UINT32_C(0x2C76A55E) },
+ { UINT32_C(0x7EA4BB7B), UINT32_C(0x4D74CDCE), UINT32_C(0xB1B3C2BA),
+ UINT32_C(0x834BD5BF), UINT32_C(0xCCC310A4), UINT32_C(0x46E2911E),
+ UINT32_C(0x0FC1BF13), UINT32_C(0xD3DE84AA), UINT32_C(0x80A03AD3),
+ UINT32_C(0x27F2892F), UINT32_C(0x3BD2F08B), UINT32_C(0x85B47620) } },
+ { { UINT32_C(0x567AF533), UINT32_C(0xAB1CB818), UINT32_C(0xBAC2705A),
+ UINT32_C(0x273B4537), UINT32_C(0x22C84AB6), UINT32_C(0x133066C4),
+ UINT32_C(0x4830BFC1), UINT32_C(0xC3590DE6), UINT32_C(0x5E4742D0),
+ UINT32_C(0xEA297869), UINT32_C(0x4F3164C0), UINT32_C(0xF6D8C694) },
+ { UINT32_C(0xC1249588), UINT32_C(0x09E85F3D), UINT32_C(0x4EC64DF7),
+ UINT32_C(0x6C2BB05D), UINT32_C(0x8B78000F), UINT32_C(0xD267115E),
+ UINT32_C(0xC7E4A316), UINT32_C(0x07C5D7AE), UINT32_C(0x4619E5BD),
+ UINT32_C(0xCB1187BA), UINT32_C(0xA43F7EEE), UINT32_C(0x57B1D4EF) } },
+ { { UINT32_C(0xC8176A96), UINT32_C(0x3618891F), UINT32_C(0xE5808B97),
+ UINT32_C(0x62C4B084), UINT32_C(0x4DD95D6E), UINT32_C(0xDE558546),
+ UINT32_C(0x730B2EA4), UINT32_C(0x27A8133E), UINT32_C(0x6AF318A0),
+ UINT32_C(0xE07CEEC3), UINT32_C(0xCE24FD2C), UINT32_C(0x0ACC1286) },
+ { UINT32_C(0xDD4D307C), UINT32_C(0x8A48FE4A), UINT32_C(0x18CDE0DA),
+ UINT32_C(0x71A9BA9C), UINT32_C(0xD5D79747), UINT32_C(0x655E2B66),
+ UINT32_C(0xA79AEDC7), UINT32_C(0x409FE856), UINT32_C(0xD287E5CF),
+ UINT32_C(0xC5A9F244), UINT32_C(0x4E82EC39), UINT32_C(0xCCE10384) } },
+ { { UINT32_C(0xF25D364C), UINT32_C(0x00675BA7), UINT32_C(0x68D36BDF),
+ UINT32_C(0x7A7F1629), UINT32_C(0xA9E23F29), UINT32_C(0x35EC468A),
+ UINT32_C(0x2D926E6C), UINT32_C(0xF797AC50), UINT32_C(0x4B4F4376),
+ UINT32_C(0x639BA453), UINT32_C(0x51FF9519), UINT32_C(0xD71B430F) },
+ { UINT32_C(0x2CF5635C), UINT32_C(0xB8C439EC), UINT32_C(0x81980393),
+ UINT32_C(0x0CE4C8D1), UINT32_C(0x64123B15), UINT32_C(0x4C5362A9),
+ UINT32_C(0xFFDCF096), UINT32_C(0x6E0421E0), UINT32_C(0x10D1F914),
+ UINT32_C(0x624A855F), UINT32_C(0x614DCD29), UINT32_C(0x7D8F3AB7) } },
+ { { UINT32_C(0xB3493CE0), UINT32_C(0xD9219ADA), UINT32_C(0x52F09AE5),
+ UINT32_C(0x971B243A), UINT32_C(0xE24E3674), UINT32_C(0xC16C9BF8),
+ UINT32_C(0xCE68C7CD), UINT32_C(0x026D408D), UINT32_C(0x358209E3),
+ UINT32_C(0xF9B33DD9), UINT32_C(0xF3B2A206), UINT32_C(0x02D0595D) },
+ { UINT32_C(0x60D15640), UINT32_C(0xBF994271), UINT32_C(0x15B5466A),
+ UINT32_C(0x6DA7A04E), UINT32_C(0x1CADB50D), UINT32_C(0x03AA4ED8),
+ UINT32_C(0x129A4253), UINT32_C(0x1548F029), UINT32_C(0xB842865A),
+ UINT32_C(0x41741F7E), UINT32_C(0xA3F88C98), UINT32_C(0x859FE0A4) } },
+ { { UINT32_C(0x05FD7553), UINT32_C(0x80DE085A), UINT32_C(0xB897566B),
+ UINT32_C(0x4A4AB91E), UINT32_C(0x2F1C173F), UINT32_C(0x33BCD475),
+ UINT32_C(0xC100C013), UINT32_C(0x4E238896), UINT32_C(0xD614B34B),
+ UINT32_C(0x1C88500D), UINT32_C(0xC3BA9E23), UINT32_C(0x0401C5F6) },
+ { UINT32_C(0xD0AF0DE5), UINT32_C(0x8E8003C4), UINT32_C(0x9D0DCBB9),
+ UINT32_C(0x19B1DFB5), UINT32_C(0xEBEF7AB6), UINT32_C(0x4A3640A9),
+ UINT32_C(0x959B15F6), UINT32_C(0xEDAFD65B), UINT32_C(0x7FB95821),
+ UINT32_C(0x8092EF7F), UINT32_C(0xCE2E45D1), UINT32_C(0xAB8DD52E) } },
+ { { UINT32_C(0xB9CFE6BF), UINT32_C(0xD1F2D6B8), UINT32_C(0x00073F6F),
+ UINT32_C(0x6358810B), UINT32_C(0xD712106E), UINT32_C(0x5FCE5993),
+ UINT32_C(0x1C024C91), UINT32_C(0x5EE6B271), UINT32_C(0x453DB663),
+ UINT32_C(0xD0248FF5), UINT32_C(0xADB835E8), UINT32_C(0xD6D81CB2) },
+ { UINT32_C(0xFDFCB4C7), UINT32_C(0x8696CFEC), UINT32_C(0x53BC9045),
+ UINT32_C(0x696B7FCB), UINT32_C(0xDDA56981), UINT32_C(0xAB4D3807),
+ UINT32_C(0x1E4B943B), UINT32_C(0x2F998052), UINT32_C(0x166B7F18),
+ UINT32_C(0x8AA76ADB), UINT32_C(0x52A2D7ED), UINT32_C(0x63934301) } },
+ },
+ {
+ { { UINT32_C(0xA368EFF6), UINT32_C(0xBBCCCE39), UINT32_C(0x8CEB5C43),
+ UINT32_C(0xD8CAABDF), UINT32_C(0xD2252FDA), UINT32_C(0x9EAE35A5),
+ UINT32_C(0x54E7DD49), UINT32_C(0xA8F4F209), UINT32_C(0x295100FD),
+ UINT32_C(0xA56D72A6), UINT32_C(0x56767727), UINT32_C(0x20FC1FE8) },
+ { UINT32_C(0x0BBAA5AB), UINT32_C(0xBF60B248), UINT32_C(0x313911F2),
+ UINT32_C(0xA4F3CE5A), UINT32_C(0xB93DAB9C), UINT32_C(0xC2A67AD4),
+ UINT32_C(0x22D71F39), UINT32_C(0x18CD0ED0), UINT32_C(0x5F304DB2),
+ UINT32_C(0x04380C42), UINT32_C(0x6729C821), UINT32_C(0x26420CBB) } },
+ { { UINT32_C(0xBDFBCAE8), UINT32_C(0x26BD07D6), UINT32_C(0xDF01A80A),
+ UINT32_C(0x10B5173F), UINT32_C(0x6798B96C), UINT32_C(0xD831C546),
+ UINT32_C(0x1D3F3859), UINT32_C(0x1D6B4108), UINT32_C(0x991B9EC7),
+ UINT32_C(0x501D38EC), UINT32_C(0xD78431A9), UINT32_C(0x26319283) },
+ { UINT32_C(0x118B343C), UINT32_C(0x8B85BAF7), UINT32_C(0x58DEF7D0),
+ UINT32_C(0x4696CDDD), UINT32_C(0x7ACDCF58), UINT32_C(0xEFC7C110),
+ UINT32_C(0x848D5842), UINT32_C(0xD9AF415C), UINT32_C(0x0AC7FDAC),
+ UINT32_C(0x6B5A06BC), UINT32_C(0xA344319B), UINT32_C(0x7D623E0D) } },
+ { { UINT32_C(0x0C9D3547), UINT32_C(0x4C0D7806), UINT32_C(0xCF2AED47),
+ UINT32_C(0x993F048D), UINT32_C(0xE4B57E22), UINT32_C(0x5217C453),
+ UINT32_C(0xF4172B28), UINT32_C(0xB4669E35), UINT32_C(0x49F999F8),
+ UINT32_C(0x509A3CD0), UINT32_C(0x87C69D41), UINT32_C(0xD19F8632) },
+ { UINT32_C(0x4C8FDED0), UINT32_C(0xE14D01E8), UINT32_C(0xEAFD9E1C),
+ UINT32_C(0x342880FD), UINT32_C(0x70DC2BF0), UINT32_C(0x0E17BFF2),
+ UINT32_C(0xC0186400), UINT32_C(0x46560B7B), UINT32_C(0x49A4DD34),
+ UINT32_C(0xE28C7B9C), UINT32_C(0x0F325D06), UINT32_C(0x18211916) } },
+ { { UINT32_C(0xD7E02E18), UINT32_C(0x46D70888), UINT32_C(0xD9F11FD9),
+ UINT32_C(0x7C806954), UINT32_C(0x4FBEA271), UINT32_C(0xE4948FCA),
+ UINT32_C(0xBD80A9DF), UINT32_C(0x7D6C7765), UINT32_C(0xF3871C71),
+ UINT32_C(0x1B470EA6), UINT32_C(0x8330A570), UINT32_C(0xD62DE244) },
+ { UINT32_C(0xC659C3A7), UINT32_C(0xDAECDDC1), UINT32_C(0x077F7AFC),
+ UINT32_C(0x8621E513), UINT32_C(0xCAEEEF13), UINT32_C(0x56C7CD84),
+ UINT32_C(0xC685A356), UINT32_C(0xC60C910F), UINT32_C(0x9DD93DDC),
+ UINT32_C(0xE68BC5C5), UINT32_C(0xFEB64895), UINT32_C(0xD904E89F) } },
+ { { UINT32_C(0x8BA7917A), UINT32_C(0x75D874FB), UINT32_C(0xFD043BD4),
+ UINT32_C(0x18FA7F53), UINT32_C(0x1FC3979E), UINT32_C(0x212A0AD7),
+ UINT32_C(0x5D6EAC0E), UINT32_C(0x5703A7D9), UINT32_C(0x017DEAD5),
+ UINT32_C(0x222F7188), UINT32_C(0x0F6C1817), UINT32_C(0x1EC687B7) },
+ { UINT32_C(0x238BACB6), UINT32_C(0x23412FC3), UINT32_C(0x54CED154),
+ UINT32_C(0xB85D70E9), UINT32_C(0xBDA674D0), UINT32_C(0xD4E06722),
+ UINT32_C(0x36F5A0C2), UINT32_C(0x3EA5F178), UINT32_C(0xF5C6D2CA),
+ UINT32_C(0x7E7D79CF), UINT32_C(0x3DBB3C73), UINT32_C(0x1FFF9464) } },
+ { { UINT32_C(0xF163E4A8), UINT32_C(0x916E19D0), UINT32_C(0x1489DF17),
+ UINT32_C(0x1E6740E7), UINT32_C(0x339F3A47), UINT32_C(0x1EAF9723),
+ UINT32_C(0x124B8DAD), UINT32_C(0x22F0ED1A), UINT32_C(0x49C3DD04),
+ UINT32_C(0x39C9166C), UINT32_C(0xCE1E9ACC), UINT32_C(0x628E7FD4) },
+ { UINT32_C(0x40031676), UINT32_C(0x124DDF27), UINT32_C(0x1EDDB9BE),
+ UINT32_C(0x00256939), UINT32_C(0xD360B0DA), UINT32_C(0xD39E25E7),
+ UINT32_C(0x4AA6C4C9), UINT32_C(0x6E3015A8), UINT32_C(0x623EDA09),
+ UINT32_C(0xC6A2F643), UINT32_C(0x50AA99FB), UINT32_C(0xBEFF2D12) } },
+ { { UINT32_C(0x93EE8089), UINT32_C(0x1FEEF7CE), UINT32_C(0x252DD7BD),
+ UINT32_C(0xC6B180BC), UINT32_C(0x1788F051), UINT32_C(0xA16FB20B),
+ UINT32_C(0xE046ED39), UINT32_C(0xD86FD392), UINT32_C(0x9378CE1D),
+ UINT32_C(0xDA0A3611), UINT32_C(0xA5F7A61D), UINT32_C(0x121EF3E7) },
+ { UINT32_C(0x92D13CAE), UINT32_C(0x94D22061), UINT32_C(0x77C72E08),
+ UINT32_C(0x5076046A), UINT32_C(0x7D2308B9), UINT32_C(0xF18BC233),
+ UINT32_C(0x17F977B1), UINT32_C(0x004DB3C5), UINT32_C(0x0471C11D),
+ UINT32_C(0xD05AE399), UINT32_C(0x85CD1726), UINT32_C(0x86A2A557) } },
+ { { UINT32_C(0x72107804), UINT32_C(0xB8D9B286), UINT32_C(0x3303B79B),
+ UINT32_C(0xB5A7C413), UINT32_C(0x5FA37DED), UINT32_C(0x927EEF78),
+ UINT32_C(0xAD67DABA), UINT32_C(0xA1C5CF1E), UINT32_C(0x7360E7C7),
+ UINT32_C(0xAA5E3FB2), UINT32_C(0x0A0C0993), UINT32_C(0x8354E61A) },
+ { UINT32_C(0x7F5458CC), UINT32_C(0x2EC73AF9), UINT32_C(0x48474325),
+ UINT32_C(0xDE4CB488), UINT32_C(0x7209BC69), UINT32_C(0x2DD134C7),
+ UINT32_C(0x451A2ABE), UINT32_C(0xB70C5567), UINT32_C(0x8E293018),
+ UINT32_C(0x2CD1B200), UINT32_C(0xD33C0D72), UINT32_C(0x15F8DA7A) } },
+ { { UINT32_C(0xA8790657), UINT32_C(0x5DC386D0), UINT32_C(0xBC4D88BB),
+ UINT32_C(0xA4FDF676), UINT32_C(0x48BC6C49), UINT32_C(0x1B21F38F),
+ UINT32_C(0x543A7003), UINT32_C(0xCDCC7FAA), UINT32_C(0x8C9CF72C),
+ UINT32_C(0xEA97E7AA), UINT32_C(0x50D938A8), UINT32_C(0xA6B883F4) },
+ { UINT32_C(0xA3A10F27), UINT32_C(0x51936F3A), UINT32_C(0xDECC76BF),
+ UINT32_C(0x0170785F), UINT32_C(0x908C578A), UINT32_C(0x7539ECE1),
+ UINT32_C(0x0F3E8C25), UINT32_C(0x5D9C8A8E), UINT32_C(0x9E4717A7),
+ UINT32_C(0x8681B43B), UINT32_C(0xA9D83E39), UINT32_C(0x94F42507) } },
+ { { UINT32_C(0xA55ADDE7), UINT32_C(0xBBE11CA8), UINT32_C(0x3BC0896B),
+ UINT32_C(0x39E6F5CF), UINT32_C(0x1D2D8D94), UINT32_C(0x1447314E),
+ UINT32_C(0x5B012F8A), UINT32_C(0x45B48125), UINT32_C(0x08AD5283),
+ UINT32_C(0x41AD23FA), UINT32_C(0x41D13774), UINT32_C(0x837243E2) },
+ { UINT32_C(0xBADCAA46), UINT32_C(0x1FC0BD9D), UINT32_C(0x26E84CAE),
+ UINT32_C(0x8DF164ED), UINT32_C(0x41017176), UINT32_C(0x8FF70EC0),
+ UINT32_C(0x5C848BA7), UINT32_C(0x23AD4BCE), UINT32_C(0x97A19CBB),
+ UINT32_C(0x89246FDE), UINT32_C(0x78397991), UINT32_C(0xA5EF987B) } },
+ { { UINT32_C(0x4757964D), UINT32_C(0x111AF1B7), UINT32_C(0xDDBBF258),
+ UINT32_C(0x1D25D351), UINT32_C(0x7D2B06D6), UINT32_C(0x4161E776),
+ UINT32_C(0x1CAC0C5B), UINT32_C(0x6EFD2691), UINT32_C(0x211BFAEB),
+ UINT32_C(0x633B95DB), UINT32_C(0xE2BDF701), UINT32_C(0x9BEDFA5A) },
+ { UINT32_C(0x73E099C8), UINT32_C(0xADAC2B0B), UINT32_C(0xBFB16BFF),
+ UINT32_C(0x436F0023), UINT32_C(0x30F55854), UINT32_C(0xB91B1002),
+ UINT32_C(0xF4C6C8B7), UINT32_C(0xAF6A2097), UINT32_C(0x3AD7B3D9),
+ UINT32_C(0x3FF65CED), UINT32_C(0x330E56DF), UINT32_C(0x6FA2626F) } },
+ { { UINT32_C(0xFFCCFD07), UINT32_C(0x3D28BF2D), UINT32_C(0xD989603B),
+ UINT32_C(0x0514F6FF), UINT32_C(0x5514787A), UINT32_C(0xB9519629),
+ UINT32_C(0xC3DB4E9C), UINT32_C(0xA1848121), UINT32_C(0x2A3D4595),
+ UINT32_C(0x47FE2E39), UINT32_C(0x11B73ED4), UINT32_C(0x506F5D82) },
+ { UINT32_C(0xA600D8BB), UINT32_C(0xA2257AE7), UINT32_C(0x0F9F122C),
+ UINT32_C(0xD659DBD1), UINT32_C(0x64DF160F), UINT32_C(0xDB0FDC67),
+ UINT32_C(0x7CB19690), UINT32_C(0xFF379339), UINT32_C(0x98E72EC1),
+ UINT32_C(0xDF4366B8), UINT32_C(0xDF437EB8), UINT32_C(0x97E72BEC) } },
+ { { UINT32_C(0x1C81E5D9), UINT32_C(0x81DCEA27), UINT32_C(0x6717FC49),
+ UINT32_C(0x7E1B6CDA), UINT32_C(0x11EAE80D), UINT32_C(0xAA36B3B5),
+ UINT32_C(0x3CD7CBB3), UINT32_C(0x1306687C), UINT32_C(0xC4E89064),
+ UINT32_C(0xED670235), UINT32_C(0x58A94760), UINT32_C(0x9D3B0009) },
+ { UINT32_C(0xE6A6333C), UINT32_C(0x5A64E158), UINT32_C(0x49453203),
+ UINT32_C(0x1A8B4A36), UINT32_C(0x1F77CC21), UINT32_C(0xF1CAD724),
+ UINT32_C(0x70518EF7), UINT32_C(0x693EBB4B), UINT32_C(0x0F39C91A),
+ UINT32_C(0xFB47BD81), UINT32_C(0xFA4BC64B), UINT32_C(0xCFE63DA2) } },
+ { { UINT32_C(0xEAA66108), UINT32_C(0x82C1C684), UINT32_C(0x4CFE79FC),
+ UINT32_C(0xE3226218), UINT32_C(0x849C720E), UINT32_C(0x3F28B72B),
+ UINT32_C(0x8FEE1CA8), UINT32_C(0x137FB355), UINT32_C(0xE4F90C4E),
+ UINT32_C(0x4D18A9CD), UINT32_C(0xCC3E46FA), UINT32_C(0xC0344227) },
+ { UINT32_C(0x79CDA392), UINT32_C(0x4FD5C08E), UINT32_C(0x8ADC87B5),
+ UINT32_C(0x65DB20DB), UINT32_C(0x916C1B84), UINT32_C(0x86F95D5B),
+ UINT32_C(0x17BB2B7C), UINT32_C(0x7EDA3871), UINT32_C(0x669A533B),
+ UINT32_C(0x18CCF7E7), UINT32_C(0xECAD0E06), UINT32_C(0x5E92421C) } },
+ { { UINT32_C(0x4174B08B), UINT32_C(0x26063E12), UINT32_C(0x70DE8E4D),
+ UINT32_C(0xE621D9BE), UINT32_C(0x5ECDF350), UINT32_C(0xAEA0FD0F),
+ UINT32_C(0x9C20E5C9), UINT32_C(0x0D9F69E4), UINT32_C(0x0BBE2918),
+ UINT32_C(0xD3DADEB9), UINT32_C(0x58AA2F71), UINT32_C(0xD7B9B5DB) },
+ { UINT32_C(0x3364CAF8), UINT32_C(0x7A971DD7), UINT32_C(0xC25D4BE4),
+ UINT32_C(0x702616A3), UINT32_C(0xA9E30071), UINT32_C(0xA30F0FA1),
+ UINT32_C(0x5573BC69), UINT32_C(0x98AB2438), UINT32_C(0x6FEC2E22),
+ UINT32_C(0xCBC63CDF), UINT32_C(0xCC901B9B), UINT32_C(0x965F90ED) } },
+ { { UINT32_C(0x71E15BB3), UINT32_C(0xD53B592D), UINT32_C(0x8820E0D0),
+ UINT32_C(0x1F03C0E9), UINT32_C(0x3CCCB726), UINT32_C(0xCE93947D),
+ UINT32_C(0x1D547590), UINT32_C(0x2790FEE0), UINT32_C(0xC59CDD7A),
+ UINT32_C(0x4401D847), UINT32_C(0xA926DD9D), UINT32_C(0x72D69120) },
+ { UINT32_C(0x4229F289), UINT32_C(0x38B8F21D), UINT32_C(0x7FE978AF),
+ UINT32_C(0x9F412E40), UINT32_C(0xCDB59AF1), UINT32_C(0xAE07901B),
+ UINT32_C(0xD1D4715E), UINT32_C(0x1E6BE5EB), UINT32_C(0x18C96BEF),
+ UINT32_C(0x3715BD8B), UINT32_C(0xE11B3798), UINT32_C(0x4B71F6E6) } },
+ },
+ {
+ { { UINT32_C(0xF0CE2DF4), UINT32_C(0x11A8FDE5), UINT32_C(0xFA8D26DF),
+ UINT32_C(0xBC70CA3E), UINT32_C(0xC74DFE82), UINT32_C(0x6818C275),
+ UINT32_C(0x38373A50), UINT32_C(0x2B0294AC), UINT32_C(0xE8E5F88F),
+ UINT32_C(0x584C4061), UINT32_C(0x7342383A), UINT32_C(0x1C05C1CA) },
+ { UINT32_C(0x911430EC), UINT32_C(0x263895B3), UINT32_C(0xA5171453),
+ UINT32_C(0xEF9B0032), UINT32_C(0x84DA7F0C), UINT32_C(0x144359DA),
+ UINT32_C(0x924A09F2), UINT32_C(0x76E3095A), UINT32_C(0xD69AD835),
+ UINT32_C(0x612986E3), UINT32_C(0x392122AF), UINT32_C(0x70E03ADA) } },
+ { { UINT32_C(0x67AAD17B), UINT32_C(0xFEB707EE), UINT32_C(0x83042995),
+ UINT32_C(0xBB21B287), UINT32_C(0x9A0D32BA), UINT32_C(0x26DE1645),
+ UINT32_C(0x1FFB9266), UINT32_C(0x9A2FF38A), UINT32_C(0x8F578B4A),
+ UINT32_C(0x4E5AD96D), UINT32_C(0x883E7443), UINT32_C(0x26CC0655) },
+ { UINT32_C(0x2EE9367A), UINT32_C(0x1D8EECAB), UINT32_C(0x881DE2F8),
+ UINT32_C(0x42B84337), UINT32_C(0xD758AE41), UINT32_C(0xE49B2FAE),
+ UINT32_C(0x4A85D867), UINT32_C(0x6A9A2290), UINT32_C(0xE68CBA86),
+ UINT32_C(0x2FB89DCE), UINT32_C(0x7F09A982), UINT32_C(0xBC252635) } },
+ { { UINT32_C(0x8C61AAAC), UINT32_C(0xADC79436), UINT32_C(0x5E926563),
+ UINT32_C(0x24C7FD13), UINT32_C(0x0406C129), UINT32_C(0xEF9FAAA4),
+ UINT32_C(0x8B658D3C), UINT32_C(0xF4E6388C), UINT32_C(0x1E435BAF),
+ UINT32_C(0x7262BEB4), UINT32_C(0xFDAEAC99), UINT32_C(0x3BF622CC) },
+ { UINT32_C(0x4E1AEDDC), UINT32_C(0xD359F7D8), UINT32_C(0xD78C17B7),
+ UINT32_C(0x05DC4F8C), UINT32_C(0x29498BA5), UINT32_C(0xB18CF032),
+ UINT32_C(0x85BF35AD), UINT32_C(0xC67388CA), UINT32_C(0x62AA4BC8),
+ UINT32_C(0x8A7A6AA2), UINT32_C(0x72F4627A), UINT32_C(0x0B8F458E) } },
+ { { UINT32_C(0xC68E4488), UINT32_C(0x3FB812EE), UINT32_C(0x60EF7281),
+ UINT32_C(0x53C5EAA4), UINT32_C(0x8FBEFBE4), UINT32_C(0xE5724183),
+ UINT32_C(0xA4B24A05), UINT32_C(0x2B7D49F4), UINT32_C(0x710C0A43),
+ UINT32_C(0x23B138D0), UINT32_C(0xA85EC1DB), UINT32_C(0x16A5B4C1) },
+ { UINT32_C(0x305FEB02), UINT32_C(0x7CC1F3D7), UINT32_C(0x5B6C1B54),
+ UINT32_C(0x52F7947D), UINT32_C(0x8F56981C), UINT32_C(0x1BDA2312),
+ UINT32_C(0xB4080A01), UINT32_C(0x68663EAE), UINT32_C(0x9F999B7F),
+ UINT32_C(0x8DD7BA7E), UINT32_C(0xB686580C), UINT32_C(0xD8768D19) } },
+ { { UINT32_C(0x7AFDDA94), UINT32_C(0xBCD0E0AD), UINT32_C(0x34A30687),
+ UINT32_C(0x95A0DBBE), UINT32_C(0x8C5E2665), UINT32_C(0xBBE3C3DF),
+ UINT32_C(0xEBF2BC16), UINT32_C(0x742BECD8), UINT32_C(0x3FA163A6),
+ UINT32_C(0x300CEB48), UINT32_C(0x4663354B), UINT32_C(0x0C5D02EE) },
+ { UINT32_C(0xB5E606A4), UINT32_C(0xE4FB9AD6), UINT32_C(0xCF49FF95),
+ UINT32_C(0x93F507B8), UINT32_C(0x585C193B), UINT32_C(0x9406A90C),
+ UINT32_C(0x4ECF9517), UINT32_C(0xAD1440C1), UINT32_C(0x9CEA53F1),
+ UINT32_C(0x184CB475), UINT32_C(0x8EF11302), UINT32_C(0x6855C474) } },
+ { { UINT32_C(0xEDCAFA52), UINT32_C(0x00ECB523), UINT32_C(0x086F69D3),
+ UINT32_C(0x0DA0AE0E), UINT32_C(0xC242F347), UINT32_C(0xC384DE15),
+ UINT32_C(0x848C12B7), UINT32_C(0xFB050E6E), UINT32_C(0x64E015CE),
+ UINT32_C(0x22F67654), UINT32_C(0x7CA122F2), UINT32_C(0xCBDC2A48) },
+ { UINT32_C(0x445FB02C), UINT32_C(0xA940D973), UINT32_C(0x3767D89D),
+ UINT32_C(0x00F31E78), UINT32_C(0x613DABDD), UINT32_C(0x2B65A237),
+ UINT32_C(0xC875AE09), UINT32_C(0x2BE0AB05), UINT32_C(0xBA204F8E),
+ UINT32_C(0xB22E54FD), UINT32_C(0x0F7687B9), UINT32_C(0x65E2029D) } },
+ { { UINT32_C(0x1855A71C), UINT32_C(0xFFD82538), UINT32_C(0x438BD8D8),
+ UINT32_C(0x26A330B3), UINT32_C(0xF9D8C5F9), UINT32_C(0x89628311),
+ UINT32_C(0x953738A0), UINT32_C(0x8D5FB9CF), UINT32_C(0xEDFCD4E5),
+ UINT32_C(0xCB7159C9), UINT32_C(0x2064C7C2), UINT32_C(0xD64E5230) },
+ { UINT32_C(0x689F3CFE), UINT32_C(0xF858ED80), UINT32_C(0x56128B67),
+ UINT32_C(0x4830E309), UINT32_C(0xE0E90688), UINT32_C(0x2E1692DA),
+ UINT32_C(0xCA9CC232), UINT32_C(0xAB818913), UINT32_C(0xA5D229A6),
+ UINT32_C(0xE2E30C23), UINT32_C(0x0E740E23), UINT32_C(0xA544E8B1) } },
+ { { UINT32_C(0xDC61E6CC), UINT32_C(0x1C15E569), UINT32_C(0x58FC7800),
+ UINT32_C(0x8FD72967), UINT32_C(0x37A9DFC5), UINT32_C(0xE61E7DB7),
+ UINT32_C(0x5AFD7822), UINT32_C(0x3F34A9C6), UINT32_C(0x19E80773),
+ UINT32_C(0x0A112742), UINT32_C(0x4760FC58), UINT32_C(0xA353460C) },
+ { UINT32_C(0xB3124C71), UINT32_C(0x2FB7DEEB), UINT32_C(0x2D4009CC),
+ UINT32_C(0x48463627), UINT32_C(0xC3A10370), UINT32_C(0x399D1933),
+ UINT32_C(0x54388DBD), UINT32_C(0x7EB19450), UINT32_C(0x7C2A006A),
+ UINT32_C(0x8ECCE639), UINT32_C(0x55C932A0), UINT32_C(0x3D565DAF) } },
+ { { UINT32_C(0xD9ADAE53), UINT32_C(0xCEF57A9F), UINT32_C(0xF83FD8CD),
+ UINT32_C(0xE2EB27D7), UINT32_C(0x9BBD2DDE), UINT32_C(0x4AC8F719),
+ UINT32_C(0xE91ABFB7), UINT32_C(0x604283AA), UINT32_C(0x34799F87),
+ UINT32_C(0xB6A4E115), UINT32_C(0xE4C2A8F3), UINT32_C(0x2B253224) },
+ { UINT32_C(0xC8782294), UINT32_C(0xC34F8B92), UINT32_C(0xFCC2CB6B),
+ UINT32_C(0xC74D697D), UINT32_C(0xC2C84C46), UINT32_C(0xD990411B),
+ UINT32_C(0x31EA4955), UINT32_C(0x2807B5C6), UINT32_C(0xB9EB27F5),
+ UINT32_C(0x14AE2B93), UINT32_C(0x6163EDFA), UINT32_C(0xF0AE96A7) } },
+ { { UINT32_C(0x42DB7180), UINT32_C(0xA7BDCBB4), UINT32_C(0xEDCA752F),
+ UINT32_C(0xC9FAA41F), UINT32_C(0xE820F401), UINT32_C(0x147F91B4),
+ UINT32_C(0xF5F2645F), UINT32_C(0x1E6CEF86), UINT32_C(0x31FE711D),
+ UINT32_C(0xB4AB4D7F), UINT32_C(0x743EF882), UINT32_C(0xCE68FB3C) },
+ { UINT32_C(0x3EF2FCFF), UINT32_C(0xB9D7D682), UINT32_C(0x020DCAFD),
+ UINT32_C(0xF6893811), UINT32_C(0xBF81E760), UINT32_C(0x30D9A50C),
+ UINT32_C(0xB9B87228), UINT32_C(0x7F247D06), UINT32_C(0x5F40CFC0),
+ UINT32_C(0x143D4FEC), UINT32_C(0x329B2A88), UINT32_C(0x21D78D73) } },
+ { { UINT32_C(0xED3F2055), UINT32_C(0x06B3FF8A), UINT32_C(0x522BE214),
+ UINT32_C(0x50482C77), UINT32_C(0xDDF54620), UINT32_C(0x8DF69CD8),
+ UINT32_C(0xF78A1165), UINT32_C(0x6D1DB204), UINT32_C(0x9AFE6BF2),
+ UINT32_C(0x459AE4A2), UINT32_C(0x24AC871E), UINT32_C(0xC23A9FFD) },
+ { UINT32_C(0x89E85D81), UINT32_C(0xB7FD22E3), UINT32_C(0x122E9978),
+ UINT32_C(0x297F1F6B), UINT32_C(0x144BE1CE), UINT32_C(0xAB283D66),
+ UINT32_C(0xC00C614E), UINT32_C(0xC1F90AC2), UINT32_C(0x3224CD09),
+ UINT32_C(0x5465576E), UINT32_C(0x441B6059), UINT32_C(0x8E8D910D) } },
+ { { UINT32_C(0xAAA228BC), UINT32_C(0xF73A060A), UINT32_C(0x56EFF87D),
+ UINT32_C(0xCF1B0783), UINT32_C(0xA54C9133), UINT32_C(0x11EF17C0),
+ UINT32_C(0x76A4DAA5), UINT32_C(0x9E476B15), UINT32_C(0x8018FB92),
+ UINT32_C(0x5624FEAC), UINT32_C(0xCFEEC1B9), UINT32_C(0x9826A0FC) },
+ { UINT32_C(0x2DFE2046), UINT32_C(0xB732F7FE), UINT32_C(0x3B40DA6A),
+ UINT32_C(0x9260BD9F), UINT32_C(0x4F231773), UINT32_C(0xCC9F908F),
+ UINT32_C(0xDAFC0D55), UINT32_C(0x4827FEB9), UINT32_C(0x538ACE95),
+ UINT32_C(0x07D32E85), UINT32_C(0xB8EDAF37), UINT32_C(0xAD9F897C) } },
+ { { UINT32_C(0xE3415498), UINT32_C(0x2F75B82F), UINT32_C(0xF1015F30),
+ UINT32_C(0xF99CAC5F), UINT32_C(0x7D7F25DE), UINT32_C(0x76640824),
+ UINT32_C(0xEE74C047), UINT32_C(0x714BC9CD), UINT32_C(0x07448879),
+ UINT32_C(0x70F847BF), UINT32_C(0x072165C0), UINT32_C(0xA14481DE) },
+ { UINT32_C(0xDB1140A8), UINT32_C(0x9BFA59E3), UINT32_C(0xFCD13502),
+ UINT32_C(0x7B9C7FF0), UINT32_C(0x68459ABF), UINT32_C(0xF4D7538E),
+ UINT32_C(0xC8FC6AD2), UINT32_C(0xED93A791), UINT32_C(0xB51BD9B2),
+ UINT32_C(0xA8BBE2A8), UINT32_C(0x9FB34008), UINT32_C(0x084B5A27) } },
+ { { UINT32_C(0xEB138C84), UINT32_C(0xB3BB9545), UINT32_C(0x3FC88BFD),
+ UINT32_C(0x59C3489C), UINT32_C(0x85F53EC7), UINT32_C(0x3A97FF63),
+ UINT32_C(0x0AA69C3D), UINT32_C(0x40FDF5A6), UINT32_C(0x53D19668),
+ UINT32_C(0x0E8CCEC7), UINT32_C(0x33FAA661), UINT32_C(0x0AA72EF9) },
+ { UINT32_C(0x9B1E684B), UINT32_C(0xF5C5A6CF), UINT32_C(0x31A22EA1),
+ UINT32_C(0x630F9371), UINT32_C(0xAC60F7EA), UINT32_C(0x06B2AAC2),
+ UINT32_C(0x5BC37D80), UINT32_C(0xB181CAE2), UINT32_C(0x247B13EA),
+ UINT32_C(0x4601A929), UINT32_C(0x5F739797), UINT32_C(0x8A71C386) } },
+ { { UINT32_C(0xAB134786), UINT32_C(0x545387B3), UINT32_C(0x1599B64A),
+ UINT32_C(0x3179BB06), UINT32_C(0x07593574), UINT32_C(0xB0A61986),
+ UINT32_C(0x63FA7C3B), UINT32_C(0xC7E39B21), UINT32_C(0x91585D13),
+ UINT32_C(0xA1173F86), UINT32_C(0xCB9525CD), UINT32_C(0x09D5CC8E) },
+ { UINT32_C(0x8F3A3451), UINT32_C(0xAAD44FFD), UINT32_C(0x25820CC5),
+ UINT32_C(0x702B04F2), UINT32_C(0x1CB66C17), UINT32_C(0xE90CAC49),
+ UINT32_C(0xEE161DC4), UINT32_C(0x40F6B547), UINT32_C(0x1BA4AC4E),
+ UINT32_C(0xC08BB8B4), UINT32_C(0xAE5A6BC1), UINT32_C(0x7DC064FB) } },
+ { { UINT32_C(0x9D76DDC7), UINT32_C(0x90A5E871), UINT32_C(0xEDFC8E2E),
+ UINT32_C(0x39DC8FAE), UINT32_C(0x5B079C62), UINT32_C(0x98467A23),
+ UINT32_C(0x05450C98), UINT32_C(0xE25E3785), UINT32_C(0x96140083),
+ UINT32_C(0x2FE23A4D), UINT32_C(0xE9900312), UINT32_C(0x65CE3B9A) },
+ { UINT32_C(0x6B72B5D9), UINT32_C(0x1D87D088), UINT32_C(0xFD9AFC82),
+ UINT32_C(0x72F53220), UINT32_C(0x9E1F71FA), UINT32_C(0xC63C7C15),
+ UINT32_C(0x8D449637), UINT32_C(0x90DF26EA), UINT32_C(0xC1C2B215),
+ UINT32_C(0x97089F40), UINT32_C(0x42317FAA), UINT32_C(0x83AF2664) } },
+ },
+ {
+ { { UINT32_C(0x8D688E31), UINT32_C(0xFA2DB51A), UINT32_C(0xA09C88D4),
+ UINT32_C(0x225B696C), UINT32_C(0x6059171F), UINT32_C(0x9F88AF1D),
+ UINT32_C(0x782A0993), UINT32_C(0x1C5FEA5E), UINT32_C(0x4EC710D3),
+ UINT32_C(0xE0FB1588), UINT32_C(0xD32CE365), UINT32_C(0xFAF372E5) },
+ { UINT32_C(0x26506F45), UINT32_C(0xD9F896AB), UINT32_C(0x8373C724),
+ UINT32_C(0x8D350338), UINT32_C(0xCA6E7342), UINT32_C(0x1B76992D),
+ UINT32_C(0x6FD0C08B), UINT32_C(0x76338FCA), UINT32_C(0xA00F5C23),
+ UINT32_C(0xC3EA4C65), UINT32_C(0xB316B35B), UINT32_C(0xDFAB29B3) } },
+ { { UINT32_C(0x483AEBF9), UINT32_C(0x84E5541F), UINT32_C(0x49165772),
+ UINT32_C(0x8ADFF7DC), UINT32_C(0x9BEAAD3C), UINT32_C(0xE0A43AD6),
+ UINT32_C(0xF51C2714), UINT32_C(0x97DD1820), UINT32_C(0x57EA5B0C),
+ UINT32_C(0xAC2B4CB4), UINT32_C(0xD11767CA), UINT32_C(0x87DBD011) },
+ { UINT32_C(0xBFC7957A), UINT32_C(0x18CCF36C), UINT32_C(0x1BC79227),
+ UINT32_C(0xD4A08841), UINT32_C(0xD8D292A8), UINT32_C(0x9811CE43),
+ UINT32_C(0xD58C4EE7), UINT32_C(0x72C5FC68), UINT32_C(0xD35C65A7),
+ UINT32_C(0x5BC0F0BE), UINT32_C(0xCBBF9669), UINT32_C(0x0B446DBC) } },
+ { { UINT32_C(0x9CEE9BCE), UINT32_C(0x7EBA3DA6), UINT32_C(0xD5377750),
+ UINT32_C(0x3E2C1248), UINT32_C(0x2B93D8B2), UINT32_C(0x8C917D98),
+ UINT32_C(0x7CAD1F75), UINT32_C(0xCA8FC6AC), UINT32_C(0xA0FF150A),
+ UINT32_C(0x5F581F19), UINT32_C(0xE08327FA), UINT32_C(0x872CC14A) },
+ { UINT32_C(0xE9333188), UINT32_C(0xC774F187), UINT32_C(0x497AF7E8),
+ UINT32_C(0x528ED4AC), UINT32_C(0x8AD72B10), UINT32_C(0xCE036E9B),
+ UINT32_C(0x917986CF), UINT32_C(0x463F9EBB), UINT32_C(0x1325CF9B),
+ UINT32_C(0xBE516328), UINT32_C(0xDD7E5FEA), UINT32_C(0xD28D5C50) } },
+ { { UINT32_C(0xDD58BBE3), UINT32_C(0x714C1D1B), UINT32_C(0x039AFD0F),
+ UINT32_C(0x85BA01AE), UINT32_C(0x6951AC80), UINT32_C(0x7F23EA3A),
+ UINT32_C(0xAC00C837), UINT32_C(0x5C599290), UINT32_C(0xBF24CC1B),
+ UINT32_C(0xF6EFA2B3), UINT32_C(0x1E84462B), UINT32_C(0x393D8E42) },
+ { UINT32_C(0xF8B89453), UINT32_C(0x9BDA627D), UINT32_C(0xB23E0D1B),
+ UINT32_C(0xE66FFF2E), UINT32_C(0xC3B94EC2), UINT32_C(0xD1EE7089),
+ UINT32_C(0x3031699A), UINT32_C(0xF75DBA6E), UINT32_C(0x242B2453),
+ UINT32_C(0x8FF75F79), UINT32_C(0x289BFED4), UINT32_C(0xE721EDEB) } },
+ { { UINT32_C(0xC1390FA8), UINT32_C(0x083215A1), UINT32_C(0x6DCE8CE0),
+ UINT32_C(0x901D686A), UINT32_C(0x837073FF), UINT32_C(0x4AB1BA62),
+ UINT32_C(0x34BEABA5), UINT32_C(0x10C287AA), UINT32_C(0x46985239),
+ UINT32_C(0xB4931AF4), UINT32_C(0xB053C4DC), UINT32_C(0x07639899) },
+ { UINT32_C(0xE721EECD), UINT32_C(0x29E7F44D), UINT32_C(0x57B3FF48),
+ UINT32_C(0x65817182), UINT32_C(0x5054E2E0), UINT32_C(0x198542E2),
+ UINT32_C(0x84616DE8), UINT32_C(0x923C9E15), UINT32_C(0xAD465BB9),
+ UINT32_C(0x2A9C15E1), UINT32_C(0x16319245), UINT32_C(0xD8D4EFC7) } },
+ { { UINT32_C(0x9961A674), UINT32_C(0x72DC7943), UINT32_C(0xA0E13668),
+ UINT32_C(0x839A0A52), UINT32_C(0x334945EA), UINT32_C(0xD7A53FA9),
+ UINT32_C(0xE7AA25DB), UINT32_C(0xDB21DB77), UINT32_C(0x66E96DA3),
+ UINT32_C(0xB6675A7D), UINT32_C(0xE66F33C0), UINT32_C(0x2C31C406) },
+ { UINT32_C(0x6EC7B9CB), UINT32_C(0x45020B62), UINT32_C(0x0391F267),
+ UINT32_C(0xFF46E9CD), UINT32_C(0x0FA2F221), UINT32_C(0x7DABD744),
+ UINT32_C(0x9D4A2A3E), UINT32_C(0x9A32364B), UINT32_C(0x52D2E47A),
+ UINT32_C(0xF0F84AE8), UINT32_C(0x888F488A), UINT32_C(0xD0B872BB) } },
+ { { UINT32_C(0xC9790EEF), UINT32_C(0x531E4CEF), UINT32_C(0x2B8D1A58),
+ UINT32_C(0xF7B5735E), UINT32_C(0xEF568511), UINT32_C(0xB8882F1E),
+ UINT32_C(0x86A86DB3), UINT32_C(0xAFB08D1C), UINT32_C(0xF54DE8C7),
+ UINT32_C(0x88CB9DF2), UINT32_C(0x9A683282), UINT32_C(0xA44234F1) },
+ { UINT32_C(0xA6E9AB2E), UINT32_C(0xBC1B3D3A), UINT32_C(0x87FC99EE),
+ UINT32_C(0xEFA071FB), UINT32_C(0xA102DC0F), UINT32_C(0xFA3C737D),
+ UINT32_C(0xD6A0CBD2), UINT32_C(0xDF3248A6), UINT32_C(0x1ECC1BF4),
+ UINT32_C(0x6E62A4FF), UINT32_C(0xC8F1BC17), UINT32_C(0xF718F940) } },
+ { { UINT32_C(0x4F63F026), UINT32_C(0x2C8B0AAD), UINT32_C(0x50B253CC),
+ UINT32_C(0x2AFF6238), UINT32_C(0x10C4D122), UINT32_C(0xCAB3E942),
+ UINT32_C(0x07CD2816), UINT32_C(0x52B59F04), UINT32_C(0x982C41FC),
+ UINT32_C(0x22322803), UINT32_C(0x8CF50B19), UINT32_C(0x38844E66) },
+ { UINT32_C(0xBE3264CD), UINT32_C(0x42A959F7), UINT32_C(0x6C983524),
+ UINT32_C(0xBDDC24BD), UINT32_C(0x462B8640), UINT32_C(0xA489EB0C),
+ UINT32_C(0x98029BE7), UINT32_C(0xB7C05092), UINT32_C(0xA1ADDC64),
+ UINT32_C(0xD5546B5F), UINT32_C(0xA0C655AF), UINT32_C(0xE7CAC1FC) } },
+ { { UINT32_C(0x47636F97), UINT32_C(0x14547198), UINT32_C(0xEBCDCCFF),
+ UINT32_C(0x6FA67481), UINT32_C(0x395D3258), UINT32_C(0xC164872F),
+ UINT32_C(0xEE6ACDBC), UINT32_C(0xB8CECAFE), UINT32_C(0xA933F180),
+ UINT32_C(0x3FBFE5F3), UINT32_C(0x898C3B1E), UINT32_C(0xEC20CAC2) },
+ { UINT32_C(0x87DA73F9), UINT32_C(0x6A031BEE), UINT32_C(0x5C5AF46E),
+ UINT32_C(0xD1E667D1), UINT32_C(0x1DC6EEF9), UINT32_C(0xCB3DC168),
+ UINT32_C(0x33D310C0), UINT32_C(0x2DD1BD94), UINT32_C(0x9207E438),
+ UINT32_C(0x0F78D493), UINT32_C(0xA99C0E75), UINT32_C(0xC233D544) } },
+ { { UINT32_C(0x9E2A0113), UINT32_C(0x228F19F1), UINT32_C(0x0E1A5D37),
+ UINT32_C(0x58495BE5), UINT32_C(0x38D7F364), UINT32_C(0x97E08F69),
+ UINT32_C(0x510759B0), UINT32_C(0x1EC3BA3E), UINT32_C(0xE03CD40D),
+ UINT32_C(0x3682F19A), UINT32_C(0xF9E16D68), UINT32_C(0xC87745D8) },
+ { UINT32_C(0x09A642EA), UINT32_C(0xFD527AB5), UINT32_C(0xF9C81F27),
+ UINT32_C(0x6308EEBD), UINT32_C(0x550C5D68), UINT32_C(0xFA9F666C),
+ UINT32_C(0x584AB153), UINT32_C(0xDEBA436F), UINT32_C(0x5B63E939),
+ UINT32_C(0x1D4861D3), UINT32_C(0xC9850221), UINT32_C(0x073BED9B) } },
+ { { UINT32_C(0x8B171246), UINT32_C(0x802BCCF0), UINT32_C(0x733B072F),
+ UINT32_C(0xFFF7D15A), UINT32_C(0x4CBFA4EF), UINT32_C(0xEA386266),
+ UINT32_C(0xD635946B), UINT32_C(0x9E5B5073), UINT32_C(0xFA81BE95),
+ UINT32_C(0x16E9A979), UINT32_C(0xB14F701F), UINT32_C(0x41E8716E) },
+ { UINT32_C(0x101A6719), UINT32_C(0x25782E0F), UINT32_C(0xC9D66959),
+ UINT32_C(0x442C4875), UINT32_C(0x2B85D153), UINT32_C(0x52D845D9),
+ UINT32_C(0x2E831117), UINT32_C(0xFF925138), UINT32_C(0x8E02434B),
+ UINT32_C(0x01B700CC), UINT32_C(0xEC0BAE3E), UINT32_C(0xD2DB7F8E) } },
+ { { UINT32_C(0x966A4872), UINT32_C(0x1B225300), UINT32_C(0x566F537B),
+ UINT32_C(0x40C149BE), UINT32_C(0xCB680021), UINT32_C(0x3335F4D2),
+ UINT32_C(0x778E5F5F), UINT32_C(0x773D0263), UINT32_C(0x666FA9ED),
+ UINT32_C(0x1D9B7602), UINT32_C(0x2E6200CF), UINT32_C(0x52490A10) },
+ { UINT32_C(0x961F290B), UINT32_C(0x8434C7DD), UINT32_C(0x64456446),
+ UINT32_C(0x773AC156), UINT32_C(0x47B712BB), UINT32_C(0x5E2BB789),
+ UINT32_C(0xBE0974AD), UINT32_C(0xFD3BCBFD), UINT32_C(0x791AD5D8),
+ UINT32_C(0x71AE9351), UINT32_C(0x6F4E1400), UINT32_C(0x1EE738BA) } },
+ { { UINT32_C(0x0BE8E26E), UINT32_C(0x2FA428AB), UINT32_C(0xBB4CF9FC),
+ UINT32_C(0xFEFF0600), UINT32_C(0xB2EA5FB0), UINT32_C(0x76F25CA9),
+ UINT32_C(0x6835C5F4), UINT32_C(0xAB7FECF0), UINT32_C(0x19D5F328),
+ UINT32_C(0x649D0772), UINT32_C(0xACBCB12E), UINT32_C(0xABE7B895) },
+ { UINT32_C(0xD69B1EA8), UINT32_C(0xF2D1031A), UINT32_C(0xC60B0BBB),
+ UINT32_C(0x46065D5D), UINT32_C(0x85D798FF), UINT32_C(0xB0908DC1),
+ UINT32_C(0xD2C9B18A), UINT32_C(0x4E2420F0), UINT32_C(0xD30432A2),
+ UINT32_C(0x6B3A9BDD), UINT32_C(0xC9B134AD), UINT32_C(0x501C3383) } },
+ { { UINT32_C(0x98A21284), UINT32_C(0x608F0967), UINT32_C(0x059CCEDE),
+ UINT32_C(0x5361BE86), UINT32_C(0xAFD87EF7), UINT32_C(0x3A40655C),
+ UINT32_C(0x59083AA2), UINT32_C(0x03CF3117), UINT32_C(0xB6C366D9),
+ UINT32_C(0x57DB5F61), UINT32_C(0x6DD0D232), UINT32_C(0x29DC275B) },
+ { UINT32_C(0x8FA67501), UINT32_C(0xBDAB24DD), UINT32_C(0x65D08C37),
+ UINT32_C(0x5928F775), UINT32_C(0x645D466A), UINT32_C(0x9448A856),
+ UINT32_C(0xC0E927A5), UINT32_C(0x6E6B5E2E), UINT32_C(0xE80C6871),
+ UINT32_C(0xE884D546), UINT32_C(0x53A9A851), UINT32_C(0x10C881C9) } },
+ { { UINT32_C(0x9B627AA5), UINT32_C(0x35505374), UINT32_C(0x7976677B),
+ UINT32_C(0xE7CA1B57), UINT32_C(0x4976CE17), UINT32_C(0x81239712),
+ UINT32_C(0x96DA31B9), UINT32_C(0x96E9080B), UINT32_C(0xCC64AA1F),
+ UINT32_C(0x458254AB), UINT32_C(0x48E674C9), UINT32_C(0xFEFF6821) },
+ { UINT32_C(0x021F1488), UINT32_C(0x8772F37A), UINT32_C(0xAB56345C),
+ UINT32_C(0x2E274E18), UINT32_C(0x29823B76), UINT32_C(0x7C7BE61C),
+ UINT32_C(0x9EEFB39E), UINT32_C(0x275DB7B2), UINT32_C(0xBF5CBCEF),
+ UINT32_C(0x83B10ED4), UINT32_C(0x518E5183), UINT32_C(0x40D7F5B4) } },
+ { { UINT32_C(0xF960B41B), UINT32_C(0x315CCC01), UINT32_C(0x1D99E722),
+ UINT32_C(0x90B417C9), UINT32_C(0x013463E0), UINT32_C(0x84AFAA0D),
+ UINT32_C(0x13E6D9E1), UINT32_C(0xF133C5D8), UINT32_C(0x525B7430),
+ UINT32_C(0xD95C6ADC), UINT32_C(0x7A25106A), UINT32_C(0x082C61AD) },
+ { UINT32_C(0xBA1CE179), UINT32_C(0xABC1966D), UINT32_C(0xA5DB529A),
+ UINT32_C(0xE0578B77), UINT32_C(0xEC84107D), UINT32_C(0x10988C05),
+ UINT32_C(0x1B207F83), UINT32_C(0xFCADE5D7), UINT32_C(0xC5BA83DB),
+ UINT32_C(0x0BEB6FDB), UINT32_C(0x57537E34), UINT32_C(0x1C39B86D) } },
+ },
+ {
+ { { UINT32_C(0x2A7AECED), UINT32_C(0x5B0B5D69), UINT32_C(0x01DC545F),
+ UINT32_C(0x4C03450C), UINT32_C(0x404A3458), UINT32_C(0x72AD0A4A),
+ UINT32_C(0x9F467B60), UINT32_C(0x1DE8E255), UINT32_C(0x90634809),
+ UINT32_C(0xA4B35705), UINT32_C(0x706F0178), UINT32_C(0x76F30205) },
+ { UINT32_C(0x4454F0E5), UINT32_C(0x588D21AB), UINT32_C(0x64134928),
+ UINT32_C(0xD22DF549), UINT32_C(0x241BCD90), UINT32_C(0xF4E7E73D),
+ UINT32_C(0x2FACC7CC), UINT32_C(0xB8D8A1D2), UINT32_C(0x1D25D2A0),
+ UINT32_C(0x483C35A7), UINT32_C(0x1EF9F608), UINT32_C(0x7F8D2545) } },
+ { { UINT32_C(0x54EBC926), UINT32_C(0xCB51F039), UINT32_C(0xB8D4A7BB),
+ UINT32_C(0xE235D356), UINT32_C(0xB41FE1A6), UINT32_C(0x93C8FAFA),
+ UINT32_C(0xA719F254), UINT32_C(0x6297701D), UINT32_C(0x644F5CDE),
+ UINT32_C(0x6E9165BC), UINT32_C(0x0C11C542), UINT32_C(0x6506329D) },
+ { UINT32_C(0xA92B4250), UINT32_C(0xA2564809), UINT32_C(0x889C2E3E),
+ UINT32_C(0x0E9AC173), UINT32_C(0x22B1D1BE), UINT32_C(0x286A5926),
+ UINT32_C(0x6ECDD041), UINT32_C(0x86A3D752), UINT32_C(0x649F9524),
+ UINT32_C(0x4B867E0A), UINT32_C(0x0629CB0F), UINT32_C(0x1FE7D95A) } },
+ { { UINT32_C(0xCA5BAF54), UINT32_C(0xF4F66843), UINT32_C(0xEFE7DB78),
+ UINT32_C(0x298DB357), UINT32_C(0x7365712F), UINT32_C(0xF607E86E),
+ UINT32_C(0x8A822BC0), UINT32_C(0xD5882298), UINT32_C(0xC61299B3),
+ UINT32_C(0x2CFBD63A), UINT32_C(0x67167B1A), UINT32_C(0x6F713D9B) },
+ { UINT32_C(0xDE0B077A), UINT32_C(0x750F673F), UINT32_C(0xEE2178DA),
+ UINT32_C(0x07482708), UINT32_C(0x69123C75), UINT32_C(0x5E6D5BD1),
+ UINT32_C(0xEAB99B37), UINT32_C(0x6A93D1B6), UINT32_C(0x8CAEC6A3),
+ UINT32_C(0x6EF4F7E6), UINT32_C(0xCF3ED818), UINT32_C(0x7BE411D6) } },
+ { { UINT32_C(0x63A0A7D2), UINT32_C(0xF92B3073), UINT32_C(0x881DC8CF),
+ UINT32_C(0x32DA431C), UINT32_C(0xC578E3A3), UINT32_C(0xE51BD5ED),
+ UINT32_C(0x9587FA22), UINT32_C(0xEFDA70D2), UINT32_C(0x9B2EBA85),
+ UINT32_C(0xCFEC1708), UINT32_C(0xAF7BA530), UINT32_C(0x6AB51A4B) },
+ { UINT32_C(0x98174812), UINT32_C(0x5AC155AE), UINT32_C(0xCCB076E3),
+ UINT32_C(0xCAF07A71), UINT32_C(0xC38718A7), UINT32_C(0x280E86C2),
+ UINT32_C(0xD63745B7), UINT32_C(0x9D12DE73), UINT32_C(0xBF8A79AA),
+ UINT32_C(0x0E8EA855), UINT32_C(0xBD705BF7), UINT32_C(0x5EB2BED8) } },
+ { { UINT32_C(0xAE16DE53), UINT32_C(0x33FE9578), UINT32_C(0x10BEC902),
+ UINT32_C(0x3AE85EB5), UINT32_C(0x44AF850E), UINT32_C(0xC4F49658),
+ UINT32_C(0x087DD658), UINT32_C(0x6EA222B3), UINT32_C(0xA51F1447),
+ UINT32_C(0xB255E6FD), UINT32_C(0x117E3F48), UINT32_C(0xB35E4997) },
+ { UINT32_C(0x05616CA1), UINT32_C(0x562E813B), UINT32_C(0x8A61E156),
+ UINT32_C(0xDF5925D6), UINT32_C(0x571C728B), UINT32_C(0xB2FA8125),
+ UINT32_C(0xA2F2D1CF), UINT32_C(0x00864805), UINT32_C(0x1BCCB6FF),
+ UINT32_C(0x2DC26F41), UINT32_C(0x63AE37DD), UINT32_C(0xEBD5E093) } },
+ { { UINT32_C(0x0A285611), UINT32_C(0xD2D68BB3), UINT32_C(0xDC8378F2),
+ UINT32_C(0x3EAE7596), UINT32_C(0x6CC688A3), UINT32_C(0x2DC6CCC6),
+ UINT32_C(0x011F5DFB), UINT32_C(0xC45E5713), UINT32_C(0x62D34487),
+ UINT32_C(0x6B9C4F6C), UINT32_C(0x1FC65551), UINT32_C(0xFAD6F077) },
+ { UINT32_C(0x62B23B52), UINT32_C(0x5E3266E0), UINT32_C(0xE98F4715),
+ UINT32_C(0xF1DAF319), UINT32_C(0x3ED0AE83), UINT32_C(0x064D12EA),
+ UINT32_C(0x564125CB), UINT32_C(0x5CCF9326), UINT32_C(0xC63C1E9F),
+ UINT32_C(0x09057022), UINT32_C(0xDC9B5D2E), UINT32_C(0x7171972C) } },
+ { { UINT32_C(0xEABD21B2), UINT32_C(0x2364FD9A), UINT32_C(0x9174AD6D),
+ UINT32_C(0x3CE5F4BB), UINT32_C(0xB38688C0), UINT32_C(0xA4D6D5D0),
+ UINT32_C(0x6D87FD7D), UINT32_C(0x2292A2D2), UINT32_C(0x4CA02E54),
+ UINT32_C(0x2A7D1B53), UINT32_C(0xB4185715), UINT32_C(0x7BEE6E7E) },
+ { UINT32_C(0x8FC63ACD), UINT32_C(0x73E54609), UINT32_C(0x4064E09D),
+ UINT32_C(0xF4D93A12), UINT32_C(0x2B92DAA5), UINT32_C(0xD20E157A),
+ UINT32_C(0xC4B81A00), UINT32_C(0x90D125DB), UINT32_C(0x7682DE13),
+ UINT32_C(0xCB951C9E), UINT32_C(0x27987545), UINT32_C(0x1ABE58F4) } },
+ { { UINT32_C(0x30C70C8D), UINT32_C(0x6D351640), UINT32_C(0xCE2361B8),
+ UINT32_C(0x8047D811), UINT32_C(0xDF8E2C81), UINT32_C(0x3F8B3D4F),
+ UINT32_C(0x33FA1F6C), UINT32_C(0x5D595477), UINT32_C(0xE29B8A91),
+ UINT32_C(0xF769FE5A), UINT32_C(0xD737B2A2), UINT32_C(0x26F0E606) },
+ { UINT32_C(0xB8B31C6A), UINT32_C(0x70CBFA5D), UINT32_C(0x863D3AEA),
+ UINT32_C(0x0F883B4A), UINT32_C(0xE386AE2F), UINT32_C(0x156A4479),
+ UINT32_C(0xADE8A684), UINT32_C(0xA17A2FCD), UINT32_C(0xE2A7E335),
+ UINT32_C(0x78BDF958), UINT32_C(0x3B9E3041), UINT32_C(0xD1B4E673) } },
+ { { UINT32_C(0x449A6D11), UINT32_C(0x1EAF48EC), UINT32_C(0x6D2FA7B9),
+ UINT32_C(0x6B94B8E4), UINT32_C(0x728E4C1B), UINT32_C(0x1D75D269),
+ UINT32_C(0xDD304E2C), UINT32_C(0x91123819), UINT32_C(0x88804F4B),
+ UINT32_C(0x0B34CAE3), UINT32_C(0xC5495E9A), UINT32_C(0x2BA192FB) },
+ { UINT32_C(0xFF4D24BF), UINT32_C(0xC93FF6EF), UINT32_C(0x0342BA78),
+ UINT32_C(0xF8C2C0B0), UINT32_C(0x831EB94C), UINT32_C(0x8041F769),
+ UINT32_C(0x7782985E), UINT32_C(0x35310074), UINT32_C(0x3AF84E83),
+ UINT32_C(0xC755320B), UINT32_C(0x6F497E7F), UINT32_C(0x384B6D26) } },
+ { { UINT32_C(0x17E6BD17), UINT32_C(0xEF92CD59), UINT32_C(0xA426965C),
+ UINT32_C(0xA087305B), UINT32_C(0xAC47F773), UINT32_C(0x13895CE7),
+ UINT32_C(0xE0BB2867), UINT32_C(0xB85F2A9F), UINT32_C(0x7CD7C58E),
+ UINT32_C(0x2926E6AA), UINT32_C(0x450459C5), UINT32_C(0xE544EDA6) },
+ { UINT32_C(0xB90A9849), UINT32_C(0x73DBC351), UINT32_C(0x848EBE86),
+ UINT32_C(0x961183F6), UINT32_C(0x80534712), UINT32_C(0xC45BB210),
+ UINT32_C(0xA654D9A3), UINT32_C(0x379D08D7), UINT32_C(0xBD3FFA9C),
+ UINT32_C(0x5B97CEF2), UINT32_C(0xDDC2FCE5), UINT32_C(0x0F469F34) } },
+ { { UINT32_C(0x0642F38D), UINT32_C(0x6D146108), UINT32_C(0xD21EB887),
+ UINT32_C(0x055171A0), UINT32_C(0xD0DCEB28), UINT32_C(0x28DFFAB4),
+ UINT32_C(0x98DE9CCD), UINT32_C(0x0D0E6312), UINT32_C(0x118C3C3F),
+ UINT32_C(0x750A9156), UINT32_C(0xB049D799), UINT32_C(0x8C1F1390) },
+ { UINT32_C(0x439607C5), UINT32_C(0xE4823858), UINT32_C(0x5C111EAB),
+ UINT32_C(0x947E9BA0), UINT32_C(0xA355DF2E), UINT32_C(0x39C95616),
+ UINT32_C(0x10E54BDA), UINT32_C(0xF5F6B98E), UINT32_C(0x142B876A),
+ UINT32_C(0xB0E0B33D), UINT32_C(0xEA18C90C), UINT32_C(0x71197D73) } },
+ { { UINT32_C(0xF52BE819), UINT32_C(0x36A5139D), UINT32_C(0x29A45D2B),
+ UINT32_C(0xF60DDF34), UINT32_C(0xE9220E34), UINT32_C(0x0727EFEC),
+ UINT32_C(0x4EF7F446), UINT32_C(0x431D3386), UINT32_C(0xFCC4962C),
+ UINT32_C(0xC3165A64), UINT32_C(0xD64362BB), UINT32_C(0xB7D926E1) },
+ { UINT32_C(0xD45F9350), UINT32_C(0x216BC61F), UINT32_C(0xBBAED815),
+ UINT32_C(0xA974CB2F), UINT32_C(0x86FB2F76), UINT32_C(0x31DF342D),
+ UINT32_C(0x01D78314), UINT32_C(0x3AB67E05), UINT32_C(0xDEE33ED2),
+ UINT32_C(0x7AA951E0), UINT32_C(0xCEC78D94), UINT32_C(0x318FBBBD) } },
+ { { UINT32_C(0xB8FE0204), UINT32_C(0xAD7EFB65), UINT32_C(0x230AB7F7),
+ UINT32_C(0x0432E1C5), UINT32_C(0x9C967400), UINT32_C(0x7563A62D),
+ UINT32_C(0x3524D4FF), UINT32_C(0xD88B9C74), UINT32_C(0xF1A823E3),
+ UINT32_C(0x16A1991C), UINT32_C(0xFA6F0FFB), UINT32_C(0xCF2F9BFE) },
+ { UINT32_C(0xA50CA61F), UINT32_C(0x55AAA946), UINT32_C(0xFED4CAB3),
+ UINT32_C(0x8CBBD3C8), UINT32_C(0x7651365A), UINT32_C(0x03A0FAB8),
+ UINT32_C(0x62DC3913), UINT32_C(0x46B5234B), UINT32_C(0xB558CBBD),
+ UINT32_C(0xFD875B28), UINT32_C(0x11CEB361), UINT32_C(0xA48EC3AE) } },
+ { { UINT32_C(0xB3ADBD8B), UINT32_C(0x5DD131A1), UINT32_C(0x29B45EF8),
+ UINT32_C(0xF9FBCA3A), UINT32_C(0x9341EE18), UINT32_C(0x02204866),
+ UINT32_C(0x83BF9618), UINT32_C(0x8D13B895), UINT32_C(0xE807459C),
+ UINT32_C(0x0E395BAE), UINT32_C(0xB190E7DB), UINT32_C(0xB9C110CC) },
+ { UINT32_C(0x25D25063), UINT32_C(0xA0DC3452), UINT32_C(0x02371462),
+ UINT32_C(0x2FB78EC8), UINT32_C(0x8975C2D5), UINT32_C(0xC3A9E7BB),
+ UINT32_C(0x85A78264), UINT32_C(0x94666872), UINT32_C(0x8029AA92),
+ UINT32_C(0x480D2CC2), UINT32_C(0x5655726D), UINT32_C(0x237086C7) } },
+ { { UINT32_C(0x65EB9EEE), UINT32_C(0x197F14BB), UINT32_C(0x9F12E5FD),
+ UINT32_C(0xFC93125C), UINT32_C(0x8BFBAE5E), UINT32_C(0x9C20BC53),
+ UINT32_C(0x4BC053BA), UINT32_C(0xB35E2154), UINT32_C(0x21C3898E),
+ UINT32_C(0xE5FA9CC7), UINT32_C(0xD42F950F), UINT32_C(0x502D72FF) },
+ { UINT32_C(0xD1EB8C31), UINT32_C(0x6812D38A), UINT32_C(0x080D30BB),
+ UINT32_C(0x1F77F3F1), UINT32_C(0x5A8B1E98), UINT32_C(0x18D12833),
+ UINT32_C(0x299196CE), UINT32_C(0x7FD39FA9), UINT32_C(0xCF4ED6D6),
+ UINT32_C(0xFB8C9F11), UINT32_C(0xD6363194), UINT32_C(0x4C00F604) } },
+ { { UINT32_C(0xFA2A21C2), UINT32_C(0x5C8AFCF9), UINT32_C(0x1928D133),
+ UINT32_C(0x71CBF282), UINT32_C(0x42B29506), UINT32_C(0x56BEF28E),
+ UINT32_C(0x70323DE2), UINT32_C(0xAFBA250C), UINT32_C(0x7DED2C30),
+ UINT32_C(0x3FE208D1), UINT32_C(0xCE9AA598), UINT32_C(0xBD2CD213) },
+ { UINT32_C(0xCFEED070), UINT32_C(0x52C5EC52), UINT32_C(0xD3DA336B),
+ UINT32_C(0x0A7223E7), UINT32_C(0xCE156B46), UINT32_C(0x7156A4ED),
+ UINT32_C(0xED7E6159), UINT32_C(0x9AF6C499), UINT32_C(0x13C029AD),
+ UINT32_C(0x9D7A6797), UINT32_C(0x9018DC77), UINT32_C(0xE5B5C924) } },
+ },
+ {
+ { { UINT32_C(0xDE1E4E55), UINT32_C(0x3F2EFF53), UINT32_C(0xE4D3ECC4),
+ UINT32_C(0x6B749943), UINT32_C(0x0DDE190D), UINT32_C(0xAF10B18A),
+ UINT32_C(0xA26B0409), UINT32_C(0xF491B98D), UINT32_C(0xA2B1D944),
+ UINT32_C(0x66080782), UINT32_C(0x97E8C541), UINT32_C(0x59277DC6) },
+ { UINT32_C(0x006F18AA), UINT32_C(0xFDBFC5F6), UINT32_C(0xFADD8BE1),
+ UINT32_C(0x435D165B), UINT32_C(0x57645EF4), UINT32_C(0x8E5D2638),
+ UINT32_C(0xA0258363), UINT32_C(0x31BCFDA6), UINT32_C(0xD35D2503),
+ UINT32_C(0xF5330AB8), UINT32_C(0xC7CAB285), UINT32_C(0xB71369F0) } },
+ { { UINT32_C(0x40ACC5A8), UINT32_C(0xE6A19DCC), UINT32_C(0xDBC6DBF8),
+ UINT32_C(0x1C3A1FF1), UINT32_C(0xC6455613), UINT32_C(0xB4D89B9F),
+ UINT32_C(0xA7390D0E), UINT32_C(0x6CB0FE44), UINT32_C(0x59EA135A),
+ UINT32_C(0xADE197A4), UINT32_C(0x20680982), UINT32_C(0xDA6AA865) },
+ { UINT32_C(0x5A442C1B), UINT32_C(0x03DB9BE9), UINT32_C(0x2BFB93F2),
+ UINT32_C(0x221A2D73), UINT32_C(0x753C196C), UINT32_C(0x44DEE8D4),
+ UINT32_C(0x0B7C6FF5), UINT32_C(0x59ADCC70), UINT32_C(0x4CA1B142),
+ UINT32_C(0xC6260EC2), UINT32_C(0x46CBD4F2), UINT32_C(0x4C3CB5C6) } },
+ { { UINT32_C(0xA417111F), UINT32_C(0x8A15D6FE), UINT32_C(0x71D93FCC),
+ UINT32_C(0xFE4A16BD), UINT32_C(0x55BBE732), UINT32_C(0x7A7EE38C),
+ UINT32_C(0x1FF94A9D), UINT32_C(0xEFF146A5), UINT32_C(0xDD585AB5),
+ UINT32_C(0xE572D13E), UINT32_C(0x06491A5D), UINT32_C(0xD879790E) },
+ { UINT32_C(0x2A58CB2E), UINT32_C(0x9C84E1C5), UINT32_C(0x6C938630),
+ UINT32_C(0xD79D1374), UINT32_C(0x385F06C7), UINT32_C(0xDB12CD9B),
+ UINT32_C(0x7A7759C3), UINT32_C(0x0C93EB97), UINT32_C(0x683BD706),
+ UINT32_C(0xF1F5B0FE), UINT32_C(0x85EC3D50), UINT32_C(0x541E4F72) } },
+ { { UINT32_C(0x81833608), UINT32_C(0x9A0E1535), UINT32_C(0x6E2833AC),
+ UINT32_C(0x5CCE871E), UINT32_C(0xFB29777C), UINT32_C(0xC17059EA),
+ UINT32_C(0xE354CAFD), UINT32_C(0x7E40E5FA), UINT32_C(0x4D07C371),
+ UINT32_C(0x9CF59405), UINT32_C(0xA71C3945), UINT32_C(0x64CE36B2) },
+ { UINT32_C(0x56CAF487), UINT32_C(0x69309E96), UINT32_C(0x1AE3454B),
+ UINT32_C(0x3D719E9F), UINT32_C(0xE25823B6), UINT32_C(0xF2164070),
+ UINT32_C(0x0BC27359), UINT32_C(0xEAD851BD), UINT32_C(0xB0925094),
+ UINT32_C(0x3D21BFE8), UINT32_C(0x34A97F4E), UINT32_C(0xA783B1E9) } },
+ { { UINT32_C(0x9546491A), UINT32_C(0x406B0C26), UINT32_C(0xF293C4E5),
+ UINT32_C(0x9E5E15E2), UINT32_C(0x15B164DB), UINT32_C(0xC60D6413),
+ UINT32_C(0x0C75A78E), UINT32_C(0x0DA46F53), UINT32_C(0xEA0C656B),
+ UINT32_C(0x7C599BB7), UINT32_C(0x1B1A8122), UINT32_C(0x0F07A512) },
+ { UINT32_C(0x15172686), UINT32_C(0x14C7204A), UINT32_C(0x5165625D),
+ UINT32_C(0x8FAEDFF8), UINT32_C(0x37AEDE40), UINT32_C(0x20F260CE),
+ UINT32_C(0x8F357FFE), UINT32_C(0xC81F771E), UINT32_C(0xB0912557),
+ UINT32_C(0x25499197), UINT32_C(0x4C739C74), UINT32_C(0x736197DC) } },
+ { { UINT32_C(0x381B3462), UINT32_C(0x6151BAB1), UINT32_C(0x43DBD344),
+ UINT32_C(0x27E5A078), UINT32_C(0xA1C3E9FB), UINT32_C(0x2CB05BD6),
+ UINT32_C(0x27CF2A11), UINT32_C(0x2A759760), UINT32_C(0xFF43E702),
+ UINT32_C(0x0ADCF9DB), UINT32_C(0x1F484146), UINT32_C(0x4BBF03E2) },
+ { UINT32_C(0x55B6521A), UINT32_C(0x0E74997F), UINT32_C(0xADE17086),
+ UINT32_C(0x15629231), UINT32_C(0x7493FC58), UINT32_C(0x7F143E86),
+ UINT32_C(0xAF8B9670), UINT32_C(0x60869095), UINT32_C(0x7E524869),
+ UINT32_C(0x482CFCD7), UINT32_C(0x1D454756), UINT32_C(0x9E8060C3) } },
+ { { UINT32_C(0xC88B4D3B), UINT32_C(0xE495747A), UINT32_C(0xAE8A948F),
+ UINT32_C(0xB7559835), UINT32_C(0xDEB56853), UINT32_C(0x67EEF3A9),
+ UINT32_C(0x9DEE5ADF), UINT32_C(0x0E20E269), UINT32_C(0x61F0A1AA),
+ UINT32_C(0x9031AF67), UINT32_C(0x683402BC), UINT32_C(0x76669D32) },
+ { UINT32_C(0x06718B16), UINT32_C(0x90BD2313), UINT32_C(0x864EFDAC),
+ UINT32_C(0xE1B22A21), UINT32_C(0x6620089F), UINT32_C(0xE4FFE909),
+ UINT32_C(0x3428E2D9), UINT32_C(0xB84C842E), UINT32_C(0xFE3871FC),
+ UINT32_C(0x0E28C880), UINT32_C(0x3F21C200), UINT32_C(0x8932F698) } },
+ { { UINT32_C(0x6C90EA5D), UINT32_C(0x603F00CE), UINT32_C(0x40A2F693),
+ UINT32_C(0x64739307), UINT32_C(0x2174E517), UINT32_C(0xAF65148B),
+ UINT32_C(0xF784AE74), UINT32_C(0x162FC2CA), UINT32_C(0x4D5F6458),
+ UINT32_C(0x0D9A8825), UINT32_C(0x43AACE93), UINT32_C(0x0C2D5861) },
+ { UINT32_C(0x9F73CBFC), UINT32_C(0xBF1EADDE), UINT32_C(0x9C68BBCA),
+ UINT32_C(0xDE9C34C0), UINT32_C(0x67EF8A1A), UINT32_C(0x6D95602D),
+ UINT32_C(0xA791B241), UINT32_C(0x0AF2581B), UINT32_C(0x12CAD604),
+ UINT32_C(0x14F77361), UINT32_C(0xE2ACD1AD), UINT32_C(0x19F2354D) } },
+ { { UINT32_C(0x0D60F263), UINT32_C(0x272F78F6), UINT32_C(0x208FD785),
+ UINT32_C(0xE7A8F4AF), UINT32_C(0x36554F2C), UINT32_C(0x10E191C6),
+ UINT32_C(0xFD5CD0B3), UINT32_C(0x06D88551), UINT32_C(0x57069C27),
+ UINT32_C(0x29BF8568), UINT32_C(0x28AA6FAD), UINT32_C(0x3CE7ECD8) },
+ { UINT32_C(0xE9F1A1D8), UINT32_C(0x7D8A92D0), UINT32_C(0xD30B5725),
+ UINT32_C(0xD40C7FF8), UINT32_C(0xF54CAEB8), UINT32_C(0x16BE6CB2),
+ UINT32_C(0x14CB0A91), UINT32_C(0x14CA471A), UINT32_C(0x02733CAE),
+ UINT32_C(0xD5FF15B8), UINT32_C(0xDAA76580), UINT32_C(0xCAF88D87) } },
+ { { UINT32_C(0x2C046592), UINT32_C(0x39430E22), UINT32_C(0x1AD26706),
+ UINT32_C(0x6CDAE81F), UINT32_C(0xA25D9106), UINT32_C(0x8C102159),
+ UINT32_C(0x27CA9F30), UINT32_C(0x9A440572), UINT32_C(0x70287FBC),
+ UINT32_C(0x8D34C430), UINT32_C(0x29DB8AFA), UINT32_C(0x9003A455) },
+ { UINT32_C(0x7FD971AD), UINT32_C(0x91364CC3), UINT32_C(0x9C60EDB7),
+ UINT32_C(0x7B3AA048), UINT32_C(0x526F4DD8), UINT32_C(0x58B0E008),
+ UINT32_C(0xD86D98AE), UINT32_C(0xB7674454), UINT32_C(0xB2B45747),
+ UINT32_C(0xC25F4051), UINT32_C(0xCC043E8F), UINT32_C(0x8243BF9C) } },
+ { { UINT32_C(0x43A0C387), UINT32_C(0xA89641C6), UINT32_C(0x87B9AB17),
+ UINT32_C(0x6D92205C), UINT32_C(0xDAA0E102), UINT32_C(0x37D691F4),
+ UINT32_C(0xCDE5312E), UINT32_C(0xEB3E52D7), UINT32_C(0x16F518A2),
+ UINT32_C(0x60D3C099), UINT32_C(0x8A378EEB), UINT32_C(0x7854C051) },
+ { UINT32_C(0x4BBCAAC5), UINT32_C(0x7359DB51), UINT32_C(0x1713F102),
+ UINT32_C(0xF5B1B68C), UINT32_C(0xE4398DE5), UINT32_C(0xDAEAE645),
+ UINT32_C(0xD1ABFB82), UINT32_C(0x8C8ACB6C), UINT32_C(0x136423E2),
+ UINT32_C(0x2E8B76C3), UINT32_C(0xA8BA015E), UINT32_C(0x509DCB2D) } },
+ { { UINT32_C(0x9AD9C59C), UINT32_C(0x2FF36815), UINT32_C(0x658E65B9),
+ UINT32_C(0xB189A4E8), UINT32_C(0xEA786AD2), UINT32_C(0x7D33DDBB),
+ UINT32_C(0xC0D2DC05), UINT32_C(0x96D0D648), UINT32_C(0xBFA03BE9),
+ UINT32_C(0x05E49256), UINT32_C(0x8BAF5A1C), UINT32_C(0x0EA4E7A6) },
+ { UINT32_C(0x9F9AD5A8), UINT32_C(0x3DDCE0B0), UINT32_C(0x9E49C2CB),
+ UINT32_C(0xF7809195), UINT32_C(0x21782C2F), UINT32_C(0xBFCEF29D),
+ UINT32_C(0xC41BFD97), UINT32_C(0xE57AD39F), UINT32_C(0x1355AD19),
+ UINT32_C(0xC04B93E8), UINT32_C(0x59440F9F), UINT32_C(0xAABC9E6E) } },
+ { { UINT32_C(0x5B6459DA), UINT32_C(0x7AA48103), UINT32_C(0x0166E880),
+ UINT32_C(0x83EF7477), UINT32_C(0x511CCE80), UINT32_C(0x536182B1),
+ UINT32_C(0x73CA55AA), UINT32_C(0xAFDD2EEE), UINT32_C(0xA8716143),
+ UINT32_C(0xAB910D0D), UINT32_C(0x83707250), UINT32_C(0x8BEAA42B) },
+ { UINT32_C(0x8DA2AB3D), UINT32_C(0x4BCCFD89), UINT32_C(0xEC6AA105),
+ UINT32_C(0x1DBF68A9), UINT32_C(0x68EB42DA), UINT32_C(0x32CE6108),
+ UINT32_C(0x8EA62E37), UINT32_C(0x5C2C2C85), UINT32_C(0xCD3088A7),
+ UINT32_C(0x1ED2791F), UINT32_C(0xFF05070C), UINT32_C(0x496B4FEB) } },
+ { { UINT32_C(0x0AA629C5), UINT32_C(0x9FA9121A), UINT32_C(0x57558BEC),
+ UINT32_C(0xE286CFF1), UINT32_C(0x59813A4D), UINT32_C(0x4D9D657E),
+ UINT32_C(0x26103519), UINT32_C(0xC4676A16), UINT32_C(0x2BD4DF80),
+ UINT32_C(0x616160B3), UINT32_C(0x30FBAE87), UINT32_C(0x26FB78CC) },
+ { UINT32_C(0x8F0F66BD), UINT32_C(0x09607013), UINT32_C(0x03D9B90D),
+ UINT32_C(0xDD4E2D0C), UINT32_C(0x600D1B12), UINT32_C(0x5D3A8912),
+ UINT32_C(0x4308E126), UINT32_C(0xF76DD52F), UINT32_C(0x9E4FCCA6),
+ UINT32_C(0x97CC0409), UINT32_C(0x04C4DF7B), UINT32_C(0x0CFBE311) } },
+ { { UINT32_C(0x28437A23), UINT32_C(0x6CA62C12), UINT32_C(0x40E7A003),
+ UINT32_C(0x0DAF3353), UINT32_C(0xD20F8079), UINT32_C(0x1FD07DF0),
+ UINT32_C(0x3BBC9749), UINT32_C(0xEAE7969C), UINT32_C(0x9ECAD022),
+ UINT32_C(0x55861AFA), UINT32_C(0x1FBC3D4C), UINT32_C(0xEC41DAD9) },
+ { UINT32_C(0xDA8B261B), UINT32_C(0x1FE4CB40), UINT32_C(0x427C5C9D),
+ UINT32_C(0xC2671AB6), UINT32_C(0x261D4939), UINT32_C(0xDFCDA7B8),
+ UINT32_C(0x2072C0B9), UINT32_C(0x9E7B802B), UINT32_C(0xC7828CC2),
+ UINT32_C(0x3AFEE900), UINT32_C(0xF6DE987F), UINT32_C(0x3488BF28) } },
+ { { UINT32_C(0x7BE1F89E), UINT32_C(0x33B9F2DE), UINT32_C(0x299B15C9),
+ UINT32_C(0xD4E80821), UINT32_C(0x0E13F37F), UINT32_C(0x87A3067A),
+ UINT32_C(0x55FD239F), UINT32_C(0x6D4C09ED), UINT32_C(0x92EF014F),
+ UINT32_C(0x48B1042D), UINT32_C(0xB385A759), UINT32_C(0xA382B2E0) },
+ { UINT32_C(0x7F6F84F8), UINT32_C(0xBF571BB0), UINT32_C(0x0CE87F50),
+ UINT32_C(0x25AFFA37), UINT32_C(0xFE54F1BC), UINT32_C(0x826906D3),
+ UINT32_C(0xC53AE76A), UINT32_C(0x6B0421F4), UINT32_C(0x4855EB3C),
+ UINT32_C(0x44F85A3A), UINT32_C(0x8D1F2B27), UINT32_C(0xF49E2151) } },
+ },
+ {
+ { { UINT32_C(0x5E3C647B), UINT32_C(0xC0426B77), UINT32_C(0x8CF05348),
+ UINT32_C(0xBFCBD939), UINT32_C(0x172C0D3D), UINT32_C(0x31D312E3),
+ UINT32_C(0xEE754737), UINT32_C(0x5F49FDE6), UINT32_C(0x6DA7EE61),
+ UINT32_C(0x895530F0), UINT32_C(0xE8B3A5FB), UINT32_C(0xCF281B0A) },
+ { UINT32_C(0x41B8A543), UINT32_C(0xFD149735), UINT32_C(0x3080DD30),
+ UINT32_C(0x41A625A7), UINT32_C(0x653908CF), UINT32_C(0xE2BAAE07),
+ UINT32_C(0xBA02A278), UINT32_C(0xC3D01436), UINT32_C(0x7B21B8F8),
+ UINT32_C(0xA0D0222E), UINT32_C(0xD7EC1297), UINT32_C(0xFDC270E9) } },
+ { { UINT32_C(0x9F101E64), UINT32_C(0x06A67BD2), UINT32_C(0xE1733A4A),
+ UINT32_C(0xCB6E0AC7), UINT32_C(0x97BC62D2), UINT32_C(0xEE0B5D51),
+ UINT32_C(0x24C51874), UINT32_C(0x52B17039), UINT32_C(0x82A1A0D5),
+ UINT32_C(0xFED1F423), UINT32_C(0xDB6270AC), UINT32_C(0x55D90569) },
+ { UINT32_C(0x5D73D533), UINT32_C(0x36BE4A9C), UINT32_C(0x976ED4D5),
+ UINT32_C(0xBE9266D6), UINT32_C(0xB8F8074B), UINT32_C(0xC17436D3),
+ UINT32_C(0x718545C6), UINT32_C(0x3BB4D399), UINT32_C(0x5C757D21),
+ UINT32_C(0x8E1EA355), UINT32_C(0x8C474366), UINT32_C(0xF7EDBC97) } },
+ { { UINT32_C(0x6EA83242), UINT32_C(0xEC72C650), UINT32_C(0x1B2D237F),
+ UINT32_C(0xF7DE7BE5), UINT32_C(0x1819EFB0), UINT32_C(0x3C5E2200),
+ UINT32_C(0x8CDDE870), UINT32_C(0xDF5AB6D6), UINT32_C(0x92A87AEE),
+ UINT32_C(0x75A44E9D), UINT32_C(0xBCF77F19), UINT32_C(0xBDDC46F4) },
+ { UINT32_C(0x669B674D), UINT32_C(0x8191EFBD), UINT32_C(0xED71768F),
+ UINT32_C(0x52884DF9), UINT32_C(0x65CF242C), UINT32_C(0xE62BE582),
+ UINT32_C(0x80B1D17B), UINT32_C(0xAE99A3B1), UINT32_C(0x92DE59A9),
+ UINT32_C(0x48CBB446), UINT32_C(0x2DCB3CE2), UINT32_C(0xD3C226CF) } },
+ { { UINT32_C(0x9FD94EC4), UINT32_C(0x9580CDFB), UINT32_C(0x28631AD9),
+ UINT32_C(0xED273A6C), UINT32_C(0xC327F3E7), UINT32_C(0x5D3D5F77),
+ UINT32_C(0x35353C5F), UINT32_C(0x05D5339C), UINT32_C(0x5C258EB1),
+ UINT32_C(0xC56FB5FE), UINT32_C(0xEDCE1F79), UINT32_C(0xEFF8425E) },
+ { UINT32_C(0xCF83CF9C), UINT32_C(0xAB7AA141), UINT32_C(0x207D6D4F),
+ UINT32_C(0xBD2A690A), UINT32_C(0x458D9E52), UINT32_C(0xE1241491),
+ UINT32_C(0xAA7F0F31), UINT32_C(0xDD2448CC), UINT32_C(0xF0FDA7AB),
+ UINT32_C(0xEC58D3C7), UINT32_C(0xC91BBA4D), UINT32_C(0x7B6E122D) } },
+ { { UINT32_C(0xB1B48156), UINT32_C(0x2A2DEDAF), UINT32_C(0xBB93DB87),
+ UINT32_C(0xA0A2C63A), UINT32_C(0x08ACD99E), UINT32_C(0xC6559078),
+ UINT32_C(0xFE4AC331), UINT32_C(0x03EA42AF), UINT32_C(0xEB180ED6),
+ UINT32_C(0x43D2C14A), UINT32_C(0xB1156A1A), UINT32_C(0xC2F293DD) },
+ { UINT32_C(0xA9D81249), UINT32_C(0x1FAFABF5), UINT32_C(0x9A8EEE87),
+ UINT32_C(0x39ADDEAD), UINT32_C(0x119E2E92), UINT32_C(0x21E206F2),
+ UINT32_C(0xD74DCEB6), UINT32_C(0xBC5DCC2E), UINT32_C(0x0A73A358),
+ UINT32_C(0x86647FA3), UINT32_C(0x2F53F642), UINT32_C(0xEAD8BEA4) } },
+ { { UINT32_C(0x91C09091), UINT32_C(0x636225F5), UINT32_C(0x71BDCFDF),
+ UINT32_C(0xCCF5070A), UINT32_C(0xB9668EE2), UINT32_C(0x0EF8D625),
+ UINT32_C(0xB5E04E4F), UINT32_C(0x57BDF6CD), UINT32_C(0x7C75EA43),
+ UINT32_C(0xFC6AB0A6), UINT32_C(0xF7FD6EF3), UINT32_C(0xEB6B8AFB) },
+ { UINT32_C(0x2A3DF404), UINT32_C(0x5B2AEEF0), UINT32_C(0xB9823197),
+ UINT32_C(0x31FD3B48), UINT32_C(0x83A7EB23), UINT32_C(0x56226DB6),
+ UINT32_C(0x5BB1ED2F), UINT32_C(0x3772C21E), UINT32_C(0xCD1ABA6A),
+ UINT32_C(0x3E833624), UINT32_C(0xAC672DAD), UINT32_C(0xBAE58FFA) } },
+ { { UINT32_C(0x31BA1705), UINT32_C(0xCE92224D), UINT32_C(0xF0197F63),
+ UINT32_C(0x022C6ED2), UINT32_C(0xA4DC1113), UINT32_C(0x21F18D99),
+ UINT32_C(0x03616BF1), UINT32_C(0x5CD04DE8), UINT32_C(0x9FF12E08),
+ UINT32_C(0x6F900679), UINT32_C(0x48E61DDF), UINT32_C(0xF59A3315) },
+ { UINT32_C(0xB51BD024), UINT32_C(0x9474D42C), UINT32_C(0x9051E49D),
+ UINT32_C(0x11A0A413), UINT32_C(0xDCE70EDB), UINT32_C(0x79C92705),
+ UINT32_C(0x34198426), UINT32_C(0x113CE278), UINT32_C(0xEA8616D2),
+ UINT32_C(0x8978396F), UINT32_C(0xEA894C36), UINT32_C(0x9A2A14D0) } },
+ { { UINT32_C(0x604F6E4A), UINT32_C(0x4F1E1254), UINT32_C(0x0187D585),
+ UINT32_C(0x4513B088), UINT32_C(0x19E0F482), UINT32_C(0x9022F257),
+ UINT32_C(0xE2239DBF), UINT32_C(0x51FB2A80), UINT32_C(0x998ED9D5),
+ UINT32_C(0x49940D9E), UINT32_C(0x6C932C5D), UINT32_C(0x0583D241) },
+ { UINT32_C(0xF25B73F7), UINT32_C(0x1188CEC8), UINT32_C(0x3B3D06CD),
+ UINT32_C(0xA28788CB), UINT32_C(0xA083DB5A), UINT32_C(0xDEA194EC),
+ UINT32_C(0x22DF4272), UINT32_C(0xD93A4F7E), UINT32_C(0x6A009C49),
+ UINT32_C(0x8D84E4BF), UINT32_C(0x3E3E4A9E), UINT32_C(0x893D8DD9) } },
+ { { UINT32_C(0x33D31160), UINT32_C(0x35E909EA), UINT32_C(0x57172F1E),
+ UINT32_C(0x50203168), UINT32_C(0x51F3D866), UINT32_C(0x2707FC44),
+ UINT32_C(0xD2442A5D), UINT32_C(0xEB9D2018), UINT32_C(0x5DBFE378),
+ UINT32_C(0x904D7209), UINT32_C(0x5F13CF77), UINT32_C(0x6DB132A3) },
+ { UINT32_C(0x7A3AF54B), UINT32_C(0x9D842BA6), UINT32_C(0x5AA5B4F9),
+ UINT32_C(0x4E16EA19), UINT32_C(0xAF24228E), UINT32_C(0x2BBA457C),
+ UINT32_C(0x16F3C5FE), UINT32_C(0xCC04B3BB), UINT32_C(0x77E64944),
+ UINT32_C(0xBAFAC516), UINT32_C(0xF08BCEE0), UINT32_C(0x31580A34) } },
+ { { UINT32_C(0x20C30ACA), UINT32_C(0xC6808DEE), UINT32_C(0xA3EA2056),
+ UINT32_C(0xDADD216F), UINT32_C(0x7A4A9F9D), UINT32_C(0xD331394E),
+ UINT32_C(0x424C4026), UINT32_C(0x9E0441AD), UINT32_C(0x0AEB5350),
+ UINT32_C(0xAEED102F), UINT32_C(0xD45B09DA), UINT32_C(0xC6697FBB) },
+ { UINT32_C(0xDEAC1496), UINT32_C(0x52A2590E), UINT32_C(0x250B87AF),
+ UINT32_C(0x7142B831), UINT32_C(0x6D0784A8), UINT32_C(0xBEF2E68B),
+ UINT32_C(0xA5F71CEF), UINT32_C(0x5F62593A), UINT32_C(0xB5DA51A3),
+ UINT32_C(0x3B8F7616), UINT32_C(0xB680F5FE), UINT32_C(0xC7A6FA0D) } },
+ { { UINT32_C(0x99C8227C), UINT32_C(0x36C21DE6), UINT32_C(0xC26813B1),
+ UINT32_C(0xBEE3E867), UINT32_C(0xBDD91549), UINT32_C(0x9B05F2E6),
+ UINT32_C(0xA7D1110F), UINT32_C(0x34FF2B1F), UINT32_C(0x37F67FD0),
+ UINT32_C(0x8E6953B9), UINT32_C(0xC3183E20), UINT32_C(0x56C7F18B) },
+ { UINT32_C(0x9E2019ED), UINT32_C(0x48AF46DE), UINT32_C(0xF551BBBF),
+ UINT32_C(0xDEAF972E), UINT32_C(0xCC5E3EEF), UINT32_C(0x88EE38F8),
+ UINT32_C(0x392D6BAF), UINT32_C(0xFB8D7A44), UINT32_C(0x0127187D),
+ UINT32_C(0x32293BFC), UINT32_C(0xE58647CC), UINT32_C(0x7689E767) } },
+ { { UINT32_C(0x52168013), UINT32_C(0x00CE901B), UINT32_C(0x837AAE71),
+ UINT32_C(0xC6BF8E38), UINT32_C(0x167677D8), UINT32_C(0xD6F11EFA),
+ UINT32_C(0x86C8E5CF), UINT32_C(0xE53BB485), UINT32_C(0xC48E74AB),
+ UINT32_C(0x671167CE), UINT32_C(0x8AD720A7), UINT32_C(0x8A40218C) },
+ { UINT32_C(0xE7C1191A), UINT32_C(0x81E827A6), UINT32_C(0xADDB153D),
+ UINT32_C(0x54058F8D), UINT32_C(0x0D950FA2), UINT32_C(0x0BAF2925),
+ UINT32_C(0x576DDA13), UINT32_C(0xC244674D), UINT32_C(0x41BCD13B),
+ UINT32_C(0x8C4630AE), UINT32_C(0x5A077419), UINT32_C(0x6C2127BF) } },
+ { { UINT32_C(0xA83C501F), UINT32_C(0xCF977FD5), UINT32_C(0xB6AB176F),
+ UINT32_C(0xD7C6DF36), UINT32_C(0x397BC6B5), UINT32_C(0x117F6331),
+ UINT32_C(0xF7A2D491), UINT32_C(0x72A6078B), UINT32_C(0x5242FE2E),
+ UINT32_C(0xE5A2AAED), UINT32_C(0xFEBDC212), UINT32_C(0x88ECFFDC) },
+ { UINT32_C(0xCE33BA21), UINT32_C(0xF2DBBF50), UINT32_C(0xCEB19F07),
+ UINT32_C(0xE1343B76), UINT32_C(0xD2C28F71), UINT32_C(0x1F32D4C9),
+ UINT32_C(0x18587685), UINT32_C(0x93FC64B4), UINT32_C(0xBA1F8BD1),
+ UINT32_C(0x39CEEF9B), UINT32_C(0x8D6D6BB0), UINT32_C(0x99C36A78) } },
+ { { UINT32_C(0x3E9561CF), UINT32_C(0x0D063817), UINT32_C(0x3D33704D),
+ UINT32_C(0x1D8646AA), UINT32_C(0x7A08BA33), UINT32_C(0x8C451384),
+ UINT32_C(0xE02D6624), UINT32_C(0x96446BD3), UINT32_C(0x2D6F4166),
+ UINT32_C(0x749849F0), UINT32_C(0x14268BF0), UINT32_C(0xE364DA01) },
+ { UINT32_C(0x9AEBFCFD), UINT32_C(0x7CE4587E), UINT32_C(0x56234393),
+ UINT32_C(0xD4686064), UINT32_C(0x16DF73B2), UINT32_C(0x00231D51),
+ UINT32_C(0x7279C78C), UINT32_C(0xF6A969B7), UINT32_C(0x6CB4117C),
+ UINT32_C(0x1FF1F6B6), UINT32_C(0xD3EAB680), UINT32_C(0x30AEBC39) } },
+ { { UINT32_C(0x93EF00B9), UINT32_C(0x5CC97E64), UINT32_C(0x972345AE),
+ UINT32_C(0xDAE13841), UINT32_C(0x4788F43C), UINT32_C(0x85839184),
+ UINT32_C(0xE2E6CF3E), UINT32_C(0xD0FF521E), UINT32_C(0x4B707C86),
+ UINT32_C(0xAED14A5B), UINT32_C(0xD2523CF7), UINT32_C(0x7EAAE4A6) },
+ { UINT32_C(0x024C8AC6), UINT32_C(0x266472C5), UINT32_C(0xC0170051),
+ UINT32_C(0xE47E1522), UINT32_C(0x73826BAE), UINT32_C(0x7B83DA61),
+ UINT32_C(0xCF543F0D), UINT32_C(0xE97E19F5), UINT32_C(0x20BF38E2),
+ UINT32_C(0x5D5248FA), UINT32_C(0xDF56A037), UINT32_C(0x8A7C2F7D) } },
+ { { UINT32_C(0x87B0526C), UINT32_C(0xB04659DD), UINT32_C(0x2307565E),
+ UINT32_C(0x593C604A), UINT32_C(0x7C630AB8), UINT32_C(0x49E52225),
+ UINT32_C(0xDCE9CD23), UINT32_C(0x24C1D0C6), UINT32_C(0x85177079),
+ UINT32_C(0x6FDB241C), UINT32_C(0xF250C351), UINT32_C(0x5F521D19) },
+ { UINT32_C(0xA6FB61DF), UINT32_C(0xFB56134B), UINT32_C(0xD75C07ED),
+ UINT32_C(0xA4E70D69), UINT32_C(0x7D8825A8), UINT32_C(0xB7A82448),
+ UINT32_C(0xDD64BBCC), UINT32_C(0xA3AEA7D4), UINT32_C(0x8692F539),
+ UINT32_C(0xD53E6E6C), UINT32_C(0xF7AA4BC0), UINT32_C(0x8DDDA83B) } },
+ },
+ {
+ { { UINT32_C(0xDD93D50A), UINT32_C(0x140A0F9F), UINT32_C(0x83B7ABAC),
+ UINT32_C(0x4799FFDE), UINT32_C(0x04A1F742), UINT32_C(0x78FF7C23),
+ UINT32_C(0x195BA34E), UINT32_C(0xC0568F51), UINT32_C(0x3B7F78B4),
+ UINT32_C(0xE9718360), UINT32_C(0xF9EFAA53), UINT32_C(0x9CFD1FF1) },
+ { UINT32_C(0xBB06022E), UINT32_C(0xE924D2C5), UINT32_C(0xFAA2AF6D),
+ UINT32_C(0x9987FA86), UINT32_C(0x6EE37E0F), UINT32_C(0x4B12E73F),
+ UINT32_C(0x5E5A1DDE), UINT32_C(0x1836FDFA), UINT32_C(0x9DCD6416),
+ UINT32_C(0x7F1B9225), UINT32_C(0x677544D8), UINT32_C(0xCB2C1B4D) } },
+ { { UINT32_C(0x9C213D95), UINT32_C(0x0254486D), UINT32_C(0xCB2F6E94),
+ UINT32_C(0x68A9DB56), UINT32_C(0x000F5491), UINT32_C(0xFB5858BA),
+ UINT32_C(0x34009FB6), UINT32_C(0x1315BDD9), UINT32_C(0xC42BDE30),
+ UINT32_C(0xB18A8E0A), UINT32_C(0xF1070358), UINT32_C(0xFDCF93D1) },
+ { UINT32_C(0x3022937E), UINT32_C(0xBEB1DB75), UINT32_C(0xCAC20DB4),
+ UINT32_C(0x9B9ECA7A), UINT32_C(0xE4122B20), UINT32_C(0x152214D4),
+ UINT32_C(0xAABCCC7B), UINT32_C(0xD3E673F2), UINT32_C(0xAED07571),
+ UINT32_C(0x94C50F64), UINT32_C(0xE66B4F17), UINT32_C(0xD767059A) } },
+ { { UINT32_C(0xDCD6D14B), UINT32_C(0x40336B12), UINT32_C(0xE3B4919C),
+ UINT32_C(0xF6BCFF5D), UINT32_C(0x9C841F0C), UINT32_C(0xC337048D),
+ UINT32_C(0x1D617F50), UINT32_C(0x4CE6D025), UINT32_C(0x8117D379),
+ UINT32_C(0x00FEF219), UINT32_C(0xF95BE243), UINT32_C(0x18B7C4E9) },
+ { UINT32_C(0x38DF08FF), UINT32_C(0x98DE119E), UINT32_C(0x8D772D20),
+ UINT32_C(0xDFD803BD), UINT32_C(0x0F9678BD), UINT32_C(0x94125B72),
+ UINT32_C(0x334ACE30), UINT32_C(0xFC5B57CD), UINT32_C(0xB7E86E04),
+ UINT32_C(0x09486527), UINT32_C(0x6E552039), UINT32_C(0xFE9F8BCC) } },
+ { { UINT32_C(0xD6F5A10E), UINT32_C(0x3B75C45B), UINT32_C(0xC1C35F38),
+ UINT32_C(0xFD4680F4), UINT32_C(0xF8E0A113), UINT32_C(0x5450227D),
+ UINT32_C(0x73DDBA24), UINT32_C(0x5E69F1AE), UINT32_C(0x57F24645),
+ UINT32_C(0x2007B80E), UINT32_C(0x3D159741), UINT32_C(0xC63695DC) },
+ { UINT32_C(0x4530F623), UINT32_C(0xCBE54D29), UINT32_C(0x2869586B),
+ UINT32_C(0x986AD573), UINT32_C(0x4CC39F73), UINT32_C(0xE19F7059),
+ UINT32_C(0x2B1B8DA9), UINT32_C(0x80F00AB3), UINT32_C(0x73F68D26),
+ UINT32_C(0xB765AAF9), UINT32_C(0xE993F829), UINT32_C(0xBC79A394) } },
+ { { UINT32_C(0xF310D2A0), UINT32_C(0x9C441043), UINT32_C(0xDC5EB106),
+ UINT32_C(0x2865EE58), UINT32_C(0x9CB8065C), UINT32_C(0x71A95922),
+ UINT32_C(0xA052AF0F), UINT32_C(0x8EB3A733), UINT32_C(0xB09D716E),
+ UINT32_C(0x56009F42), UINT32_C(0xABCBE6AD), UINT32_C(0xA7F923C5) },
+ { UINT32_C(0xFA375C01), UINT32_C(0x263B7669), UINT32_C(0x21EF27A2),
+ UINT32_C(0x641C47E5), UINT32_C(0xB08FFD25), UINT32_C(0xA89B474E),
+ UINT32_C(0xF0A239F3), UINT32_C(0x5BE8EC3F), UINT32_C(0x242A6C5A),
+ UINT32_C(0x0E79957A), UINT32_C(0x0C6C75F5), UINT32_C(0x1DFB26D0) } },
+ { { UINT32_C(0x9DFBF22A), UINT32_C(0x2FD97B9B), UINT32_C(0x5643532D),
+ UINT32_C(0xDEC16CC8), UINT32_C(0x60FEE7C3), UINT32_C(0xDF0E6E39),
+ UINT32_C(0x545860C8), UINT32_C(0xD09AD7B6), UINT32_C(0x73FC3B7C),
+ UINT32_C(0xCC16E984), UINT32_C(0x0D4E1555), UINT32_C(0x6CE734C1) },
+ { UINT32_C(0x4B5F6032), UINT32_C(0xC6EFE68B), UINT32_C(0x14F54073),
+ UINT32_C(0x3A64F34C), UINT32_C(0xAC44DC95), UINT32_C(0x25DA689C),
+ UINT32_C(0x5358AD8A), UINT32_C(0x990C477E), UINT32_C(0xF36DA7DE),
+ UINT32_C(0x00E958A5), UINT32_C(0xC9B6F161), UINT32_C(0x902B7360) } },
+ { { UINT32_C(0x9347B90A), UINT32_C(0x454AB42C), UINT32_C(0xA698B02B),
+ UINT32_C(0xCAEBE64A), UINT32_C(0xFB86FA40), UINT32_C(0x119CDC69),
+ UINT32_C(0xC3109281), UINT32_C(0x2E5CB7AD), UINT32_C(0xCD0C3D00),
+ UINT32_C(0x67BB1EC5), UINT32_C(0x83F25BBF), UINT32_C(0x5D430BC7) },
+ { UINT32_C(0x5CDE0ABB), UINT32_C(0x69FD84A8), UINT32_C(0x9816B688),
+ UINT32_C(0x69DA263E), UINT32_C(0x0E53CBB8), UINT32_C(0xE52D93DF),
+ UINT32_C(0xADD2D5A7), UINT32_C(0x42CF6F25), UINT32_C(0xC87CA88F),
+ UINT32_C(0x227BA59D), UINT32_C(0xDA738554), UINT32_C(0x7A1CA876) } },
+ { { UINT32_C(0x1CAC82C4), UINT32_C(0x3FA5C105), UINT32_C(0x8A78C9BE),
+ UINT32_C(0x23C76087), UINT32_C(0x1C5CFA42), UINT32_C(0xE98CDAD6),
+ UINT32_C(0x0A6C0421), UINT32_C(0x09C30252), UINT32_C(0x42FC61B9),
+ UINT32_C(0x149BAC7C), UINT32_C(0x3004A3E2), UINT32_C(0x3A1C22AC) },
+ { UINT32_C(0x202C7FED), UINT32_C(0xDE6B0D6E), UINT32_C(0xE7E63052),
+ UINT32_C(0xB2457377), UINT32_C(0x3706B3EF), UINT32_C(0x31725FD4),
+ UINT32_C(0x2B1AFDBF), UINT32_C(0xE16A347D), UINT32_C(0x8C29CF66),
+ UINT32_C(0xBE4850C4), UINT32_C(0x2939F23C), UINT32_C(0x8F51CC4D) } },
+ { { UINT32_C(0x219AE6C1), UINT32_C(0x169E025B), UINT32_C(0x116E1CA1),
+ UINT32_C(0x55FF526F), UINT32_C(0xB191F55D), UINT32_C(0x01B810A3),
+ UINT32_C(0x29588A69), UINT32_C(0x2D981272), UINT32_C(0x48B92199),
+ UINT32_C(0x53C93770), UINT32_C(0x8A85236F), UINT32_C(0x8C7DD84E) },
+ { UINT32_C(0xCAACF958), UINT32_C(0x293D48B6), UINT32_C(0x43572B30),
+ UINT32_C(0x1F084ACB), UINT32_C(0xFAD91F28), UINT32_C(0x628BFA2D),
+ UINT32_C(0x829386AF), UINT32_C(0x8D627B11), UINT32_C(0xD44A77BE),
+ UINT32_C(0x3EC1DD00), UINT32_C(0x649AC7F0), UINT32_C(0x8D3B0D08) } },
+ { { UINT32_C(0x177513BF), UINT32_C(0x00A93DAA), UINT32_C(0x42AD79E1),
+ UINT32_C(0x2EF0B96F), UINT32_C(0xA07129D9), UINT32_C(0x81F5AAF1),
+ UINT32_C(0x923F2449), UINT32_C(0xFC04B7EF), UINT32_C(0x60CDB1B7),
+ UINT32_C(0x855DA795), UINT32_C(0xAD5D61D4), UINT32_C(0xB1EB5DAB) },
+ { UINT32_C(0x353FD028), UINT32_C(0xD2CEF1AE), UINT32_C(0x9EE94847),
+ UINT32_C(0xC21D5439), UINT32_C(0x0380C1A8), UINT32_C(0x9ED552BB),
+ UINT32_C(0x2BAC328F), UINT32_C(0xB156FE7A), UINT32_C(0x7213C6A4),
+ UINT32_C(0xBB7E0196), UINT32_C(0x1701ED5B), UINT32_C(0x36002A33) } },
+ { { UINT32_C(0xDDC9EF4D), UINT32_C(0x20B1632A), UINT32_C(0x272D082B),
+ UINT32_C(0x2A35FF4C), UINT32_C(0xF6CC9BD3), UINT32_C(0x30D39923),
+ UINT32_C(0xE65C9D08), UINT32_C(0x6D879BC2), UINT32_C(0x6FA9983C),
+ UINT32_C(0xCE8274E1), UINT32_C(0x0EB7424F), UINT32_C(0x652371E8) },
+ { UINT32_C(0xC5C35282), UINT32_C(0x32B77503), UINT32_C(0xC885A931),
+ UINT32_C(0xD7306333), UINT32_C(0x72955AA8), UINT32_C(0x8A16D719),
+ UINT32_C(0x7D51F882), UINT32_C(0x5548F163), UINT32_C(0xBABA59EF),
+ UINT32_C(0xB311DC66), UINT32_C(0x0DB8F627), UINT32_C(0x773D5448) } },
+ { { UINT32_C(0x7A62EB3B), UINT32_C(0x59B1B134), UINT32_C(0xCCEEFB34),
+ UINT32_C(0x0F8CE157), UINT32_C(0xA798CB2B), UINT32_C(0x3FE842A8),
+ UINT32_C(0x0BF4161D), UINT32_C(0xD01BC626), UINT32_C(0x4D016FDB),
+ UINT32_C(0x55EF6E55), UINT32_C(0xB242B201), UINT32_C(0xCB561503) },
+ { UINT32_C(0xAF4199C1), UINT32_C(0x076EBC73), UINT32_C(0x697244F7),
+ UINT32_C(0x39DEDCBB), UINT32_C(0x040162BC), UINT32_C(0x9D184733),
+ UINT32_C(0x7F6B5FA6), UINT32_C(0x902992C1), UINT32_C(0xBB4952B5),
+ UINT32_C(0xAD1DE754), UINT32_C(0xA121F6C8), UINT32_C(0x7ACF1B93) } },
+ { { UINT32_C(0x325C9B9A), UINT32_C(0x7A56867C), UINT32_C(0xF3DC3D6A),
+ UINT32_C(0x1A143999), UINT32_C(0x03F5BCB8), UINT32_C(0xCE109590),
+ UINT32_C(0xD6EEE5B7), UINT32_C(0x034E9035), UINT32_C(0x495DF1BC),
+ UINT32_C(0x2AFA81C8), UINT32_C(0x08924D02), UINT32_C(0x5EAB52DC) },
+ { UINT32_C(0xAA181904), UINT32_C(0xEE6AA014), UINT32_C(0x310AD621),
+ UINT32_C(0xE62DEF09), UINT32_C(0xC7538A03), UINT32_C(0x6C9792FC),
+ UINT32_C(0x3E41D789), UINT32_C(0xA89D3E88), UINT32_C(0x9F94AE83),
+ UINT32_C(0xD60FA11C), UINT32_C(0xE0D6234A), UINT32_C(0x5E16A8C2) } },
+ { { UINT32_C(0xA9242F3B), UINT32_C(0x87EC053D), UINT32_C(0xF0E03545),
+ UINT32_C(0x99544637), UINT32_C(0x6B7019E9), UINT32_C(0xEA0633FF),
+ UINT32_C(0x68DDDB5B), UINT32_C(0x8CB8AE07), UINT32_C(0x1A811AC7),
+ UINT32_C(0x892E7C84), UINT32_C(0x73664249), UINT32_C(0xC7EF19EB) },
+ { UINT32_C(0xCD1489E3), UINT32_C(0xD1B5819A), UINT32_C(0xDE45D24A),
+ UINT32_C(0xF9C80FB0), UINT32_C(0x83BB7491), UINT32_C(0x045C21A6),
+ UINT32_C(0x73F7A47D), UINT32_C(0xA65325BE), UINT32_C(0x9C394F0C),
+ UINT32_C(0x08D09F0E), UINT32_C(0x268D4F08), UINT32_C(0xE7FB21C6) } },
+ { { UINT32_C(0x6CA95C18), UINT32_C(0xC4CCAB95), UINT32_C(0xBC42E040),
+ UINT32_C(0x563FFD56), UINT32_C(0xE701C604), UINT32_C(0xFA3C64D8),
+ UINT32_C(0xB0ABAFEE), UINT32_C(0xC88D4426), UINT32_C(0x8542E4C3),
+ UINT32_C(0x1A353E5E), UINT32_C(0xED726186), UINT32_C(0x9A2D8B7C) },
+ { UINT32_C(0x42D097FA), UINT32_C(0xD61CE190), UINT32_C(0x799A748B),
+ UINT32_C(0x6A63E280), UINT32_C(0x3225486B), UINT32_C(0x0F48D063),
+ UINT32_C(0x42A3C443), UINT32_C(0x848F8FE1), UINT32_C(0x8493CEF4),
+ UINT32_C(0x2CCDE250), UINT32_C(0x45E77E7C), UINT32_C(0x5450A508) } },
+ { { UINT32_C(0x03112816), UINT32_C(0xD0F4E248), UINT32_C(0xCCBE9E16),
+ UINT32_C(0xFCAD9DDB), UINT32_C(0x5AE01EA0), UINT32_C(0x177999BF),
+ UINT32_C(0xCE832DCE), UINT32_C(0xD20C78B9), UINT32_C(0x50C8C646),
+ UINT32_C(0x3CC694FB), UINT32_C(0xC93D4887), UINT32_C(0x24D75968) },
+ { UINT32_C(0x87BC08AF), UINT32_C(0x9F06366A), UINT32_C(0x7FD0DF2A),
+ UINT32_C(0x59FAB50E), UINT32_C(0x6C4CC234), UINT32_C(0x5FFCC7F7),
+ UINT32_C(0x65F52D86), UINT32_C(0x87198DD7), UINT32_C(0xA855DF04),
+ UINT32_C(0x5B9C94B0), UINT32_C(0x8A067AD7), UINT32_C(0xD8BA6C73) } },
+ },
+ {
+ { { UINT32_C(0x1C4C9D90), UINT32_C(0x9E9AF315), UINT32_C(0xD12E0A89),
+ UINT32_C(0x8665C5A9), UINT32_C(0x58286493), UINT32_C(0x204ABD92),
+ UINT32_C(0xB2E09205), UINT32_C(0x79959889), UINT32_C(0xFE56B101),
+ UINT32_C(0x0C727A3D), UINT32_C(0x8B657F26), UINT32_C(0xF366244C) },
+ { UINT32_C(0xCCA65BE2), UINT32_C(0xDE35D954), UINT32_C(0xB0FD41CE),
+ UINT32_C(0x52EE1230), UINT32_C(0x36019FEE), UINT32_C(0xFA03261F),
+ UINT32_C(0x66511D8F), UINT32_C(0xAFDA42D9), UINT32_C(0x821148B9),
+ UINT32_C(0xF63211DD), UINT32_C(0x6F13A3E1), UINT32_C(0x7B56AF7E) } },
+ { { UINT32_C(0x5913E184), UINT32_C(0x47FE4799), UINT32_C(0x82145900),
+ UINT32_C(0x5BBE584C), UINT32_C(0x9A867173), UINT32_C(0xB76CFA8B),
+ UINT32_C(0x514BF471), UINT32_C(0x9BC87BF0), UINT32_C(0x71DCF1FC),
+ UINT32_C(0x37392DCE), UINT32_C(0x3AD1EFA8), UINT32_C(0xEC3EFAE0) },
+ { UINT32_C(0x14876451), UINT32_C(0xBBEA5A34), UINT32_C(0x6217090F),
+ UINT32_C(0x96E5F543), UINT32_C(0x9B1665A9), UINT32_C(0x5B3D4ECD),
+ UINT32_C(0xE329DF22), UINT32_C(0xE7B0DF26), UINT32_C(0x0BAA808D),
+ UINT32_C(0x18FB438E), UINT32_C(0xDD516FAF), UINT32_C(0x90757EBF) } },
+ { { UINT32_C(0xD5A98D68), UINT32_C(0x1E6F9A95), UINT32_C(0x849DA828),
+ UINT32_C(0x759EA7DF), UINT32_C(0x6E8B4198), UINT32_C(0x365D5625),
+ UINT32_C(0x7A4A53F9), UINT32_C(0xE1B9C53B), UINT32_C(0xE32B9B16),
+ UINT32_C(0x55DC1D50), UINT32_C(0xBB6D5701), UINT32_C(0xA4657EBB) },
+ { UINT32_C(0xEACC76E2), UINT32_C(0x4C270249), UINT32_C(0x162B1CC7),
+ UINT32_C(0xBE49EC75), UINT32_C(0x0689902B), UINT32_C(0x19A95B61),
+ UINT32_C(0xA4CFC5A8), UINT32_C(0xDD5706BF), UINT32_C(0x14E5B424),
+ UINT32_C(0xD33BDB73), UINT32_C(0xE69EBA87), UINT32_C(0x21311BD1) } },
+ { { UINT32_C(0x72A21ACC), UINT32_C(0x75BA2F9B), UINT32_C(0xA28EDB4C),
+ UINT32_C(0x356688D4), UINT32_C(0x610D080F), UINT32_C(0x3C339E0B),
+ UINT32_C(0x33A99C2F), UINT32_C(0x614AC293), UINT32_C(0xAA580AFF),
+ UINT32_C(0xA5E23AF2), UINT32_C(0xE1FDBA3A), UINT32_C(0xA6BCB860) },
+ { UINT32_C(0xB43F9425), UINT32_C(0xAA603365), UINT32_C(0xF7EE4635),
+ UINT32_C(0xAE8D7126), UINT32_C(0x56330A32), UINT32_C(0xA2B25244),
+ UINT32_C(0x9E025AA3), UINT32_C(0xC396B5BB), UINT32_C(0xF8A0D5CF),
+ UINT32_C(0xABBF77FA), UINT32_C(0xEA31C83B), UINT32_C(0xB322EE30) } },
+ { { UINT32_C(0x7890E234), UINT32_C(0x04881384), UINT32_C(0x672E70C6),
+ UINT32_C(0x387F1159), UINT32_C(0x7B307F75), UINT32_C(0x1468A614),
+ UINT32_C(0xED85EC96), UINT32_C(0x56335B52), UINT32_C(0xD45BCAE9),
+ UINT32_C(0xDA1BB60F), UINT32_C(0xF9FAEADD), UINT32_C(0x4D94F3F0) },
+ { UINT32_C(0xFC78D86B), UINT32_C(0x6C6A7183), UINT32_C(0x3018DEC6),
+ UINT32_C(0xA425B5C7), UINT32_C(0x2D877399), UINT32_C(0xB1549C33),
+ UINT32_C(0x92B2BC37), UINT32_C(0x6C41C50C), UINT32_C(0x83EE0DDB),
+ UINT32_C(0x3A9F380C), UINT32_C(0xC4599E73), UINT32_C(0xDED5FEB6) } },
+ { { UINT32_C(0x0B7F8354), UINT32_C(0x14D34C21), UINT32_C(0x9177CE45),
+ UINT32_C(0x1475A1CD), UINT32_C(0x9B926E4B), UINT32_C(0x9F5F764A),
+ UINT32_C(0x05DD21FE), UINT32_C(0x77260D1E), UINT32_C(0xC4B937F7),
+ UINT32_C(0x3C882480), UINT32_C(0x722372F2), UINT32_C(0xC92DCD39) },
+ { UINT32_C(0xEC6F657E), UINT32_C(0xF636A1BE), UINT32_C(0x1D30DD35),
+ UINT32_C(0xB0E6C312), UINT32_C(0xE4654EFE), UINT32_C(0xFE4B0528),
+ UINT32_C(0x21D230D2), UINT32_C(0x1C4A6820), UINT32_C(0x98FA45AB),
+ UINT32_C(0x615D2E48), UINT32_C(0x01FDBABF), UINT32_C(0x1F35D6D8) } },
+ { { UINT32_C(0x3A7B10D1), UINT32_C(0xA636EEB8), UINT32_C(0xF4A29E73),
+ UINT32_C(0x4E1AE352), UINT32_C(0xE6BB1EC7), UINT32_C(0x01704F5F),
+ UINT32_C(0x0EF020AE), UINT32_C(0x75C04F72), UINT32_C(0x5A31E6A6),
+ UINT32_C(0x448D8CEE), UINT32_C(0x208F994B), UINT32_C(0xE40A9C29) },
+ { UINT32_C(0xFD8F9D5D), UINT32_C(0x69E09A30), UINT32_C(0x449BAB7E),
+ UINT32_C(0xE6A5F7EB), UINT32_C(0x2AA1768B), UINT32_C(0xF25BC18A),
+ UINT32_C(0x3C841234), UINT32_C(0x9449E404), UINT32_C(0x016A7BEF),
+ UINT32_C(0x7A3BF43E), UINT32_C(0x2A150B60), UINT32_C(0xF25803E8) } },
+ { { UINT32_C(0xB215F9E0), UINT32_C(0xE44A2A57), UINT32_C(0x19066F0A),
+ UINT32_C(0x38B34DCE), UINT32_C(0x40BB1BFB), UINT32_C(0x8BB91DAD),
+ UINT32_C(0xE67735FC), UINT32_C(0x64C9F775), UINT32_C(0x88D613CD),
+ UINT32_C(0xDE142417), UINT32_C(0x1901D88D), UINT32_C(0xC5014FF5) },
+ { UINT32_C(0xF38116B0), UINT32_C(0xA250341D), UINT32_C(0x9D6CBCB2),
+ UINT32_C(0xF96B9DD4), UINT32_C(0x76B3FAC2), UINT32_C(0x15EC6C72),
+ UINT32_C(0x8124C1E9), UINT32_C(0x88F1952F), UINT32_C(0x975BE4F5),
+ UINT32_C(0x6B72F8EA), UINT32_C(0x061F7530), UINT32_C(0x23D288FF) } },
+ { { UINT32_C(0xAFB96CE3), UINT32_C(0xEBFE3E5F), UINT32_C(0xB1979537),
+ UINT32_C(0x2275EDFB), UINT32_C(0xC97BA741), UINT32_C(0xC37AB9E8),
+ UINT32_C(0x63D7C626), UINT32_C(0x446E4B10), UINT32_C(0xD025EB02),
+ UINT32_C(0xB73E2DCE), UINT32_C(0x7669EEA7), UINT32_C(0x1F952B51) },
+ { UINT32_C(0x6069A424), UINT32_C(0xABDD00F6), UINT32_C(0xDC298BFB),
+ UINT32_C(0x1C0F9D9B), UINT32_C(0xEB757B33), UINT32_C(0x831B1FD3),
+ UINT32_C(0x59D60B32), UINT32_C(0xD7DBE183), UINT32_C(0x9EF094B3),
+ UINT32_C(0x663D1F36), UINT32_C(0x67F7F11A), UINT32_C(0x1BD5732E) } },
+ { { UINT32_C(0xC75D8892), UINT32_C(0x3C7FB3F5), UINT32_C(0xBA68DA69),
+ UINT32_C(0x2CFF9A0C), UINT32_C(0x60EC740B), UINT32_C(0x76455E8B),
+ UINT32_C(0x167B88F0), UINT32_C(0x4B8D67FF), UINT32_C(0x5A4186B1),
+ UINT32_C(0xEDEC0C02), UINT32_C(0xBEBF35AB), UINT32_C(0x127C462D) },
+ { UINT32_C(0x049430FC), UINT32_C(0x9159C67E), UINT32_C(0xE7747320),
+ UINT32_C(0x86B21DD2), UINT32_C(0x0CF27B89), UINT32_C(0x0E0E0152),
+ UINT32_C(0xCD1316B6), UINT32_C(0x705F28F5), UINT32_C(0xBEAEA8A8),
+ UINT32_C(0x76751691), UINT32_C(0x360C5B69), UINT32_C(0x4C73E282) } },
+ { { UINT32_C(0xFD7B3D74), UINT32_C(0x46BCC0D5), UINT32_C(0x0DC4F410),
+ UINT32_C(0x6F13C20E), UINT32_C(0x72F11CDF), UINT32_C(0x98A1AF7D),
+ UINT32_C(0x7928881C), UINT32_C(0x6099FD83), UINT32_C(0x371BB94B),
+ UINT32_C(0x66976356), UINT32_C(0x19B945AB), UINT32_C(0x673FBA72) },
+ { UINT32_C(0xAED00700), UINT32_C(0xE4D8FA6E), UINT32_C(0x5C71A9F7),
+ UINT32_C(0xEA2313EC), UINT32_C(0xF99D4AEA), UINT32_C(0xF9ED8268),
+ UINT32_C(0x42AB59C7), UINT32_C(0xADD89164), UINT32_C(0x3F3A2D45),
+ UINT32_C(0xB37EB26F), UINT32_C(0xA924841E), UINT32_C(0x0B39BD7A) } },
+ { { UINT32_C(0xE03CDBBB), UINT32_C(0xD811EB32), UINT32_C(0x7CC3610E),
+ UINT32_C(0x12055F1D), UINT32_C(0xA9046E3F), UINT32_C(0x6B23A1A0),
+ UINT32_C(0x9DD4A749), UINT32_C(0x4D712122), UINT32_C(0xB1BF0AC3),
+ UINT32_C(0xB0C2ACA1), UINT32_C(0xC1B0432F), UINT32_C(0x71EFF575) },
+ { UINT32_C(0x2B44E285), UINT32_C(0x6CD81492), UINT32_C(0xD87E8D20),
+ UINT32_C(0x3088BD9C), UINT32_C(0xF567E8FA), UINT32_C(0xACE218E5),
+ UINT32_C(0xCF90CBBB), UINT32_C(0xB3FA0424), UINT32_C(0x770734D3),
+ UINT32_C(0xADBDA751), UINT32_C(0x5AD6569A), UINT32_C(0xBCD78BAD) } },
+ { { UINT32_C(0x7F39641F), UINT32_C(0xCADB31FA), UINT32_C(0x825E5562),
+ UINT32_C(0x3EF3E295), UINT32_C(0xF4094C64), UINT32_C(0x4893C633),
+ UINT32_C(0x8ADDF432), UINT32_C(0x52F685F1), UINT32_C(0x7FDC9373),
+ UINT32_C(0x9FD887AB), UINT32_C(0xE8680E8B), UINT32_C(0x47A9ADA0) },
+ { UINT32_C(0xF0CD44F6), UINT32_C(0x579313B7), UINT32_C(0xE188AE2E),
+ UINT32_C(0xAC4B8668), UINT32_C(0x8FB145BD), UINT32_C(0x648F4369),
+ UINT32_C(0x74629E31), UINT32_C(0xE0460AB3), UINT32_C(0x8FF2B05F),
+ UINT32_C(0xC25F2875), UINT32_C(0x2D31EAEA), UINT32_C(0x4720C2B6) } },
+ { { UINT32_C(0x13D48F80), UINT32_C(0x4603CDF4), UINT32_C(0xA49725DA),
+ UINT32_C(0x9ADB50E2), UINT32_C(0x65DF63F0), UINT32_C(0x8CD33050),
+ UINT32_C(0xCD643003), UINT32_C(0x58D8B3BB), UINT32_C(0xB739826B),
+ UINT32_C(0x170A4F4A), UINT32_C(0x1EAD0E17), UINT32_C(0x857772B5) },
+ { UINT32_C(0xE65320F1), UINT32_C(0x01B78152), UINT32_C(0xB7503FC0),
+ UINT32_C(0xA6B4D845), UINT32_C(0x3DD50798), UINT32_C(0x0F5089B9),
+ UINT32_C(0x5690B6BE), UINT32_C(0x488F200F), UINT32_C(0x9E096F36),
+ UINT32_C(0x220B4ADF), UINT32_C(0x8CE5BC7C), UINT32_C(0x474D7C9F) } },
+ { { UINT32_C(0xC745F8C9), UINT32_C(0xFED8C058), UINT32_C(0x291262D1),
+ UINT32_C(0xB683179E), UINT32_C(0xD15EE88C), UINT32_C(0x26ABD367),
+ UINT32_C(0xF60A6249), UINT32_C(0x29E8EED3), UINT32_C(0x1E02D6E1),
+ UINT32_C(0xED6008BB), UINT32_C(0xA6B12B8D), UINT32_C(0xD82ECF4C) },
+ { UINT32_C(0xAAE4FA22), UINT32_C(0x9929D021), UINT32_C(0x336A1AB3),
+ UINT32_C(0xBE4DEF14), UINT32_C(0x8C80A312), UINT32_C(0x529B7E09),
+ UINT32_C(0xEE0EB0CE), UINT32_C(0xB059188D), UINT32_C(0x16DEAB7F),
+ UINT32_C(0x1E42979A), UINT32_C(0x84EE9477), UINT32_C(0x24110349) } },
+ { { UINT32_C(0x2BE579CC), UINT32_C(0xD6524685), UINT32_C(0xC456FDED),
+ UINT32_C(0x849316F1), UINT32_C(0x2D1B67DA), UINT32_C(0xC51B7DA4),
+ UINT32_C(0x41BC6D6A), UINT32_C(0xC25B539E), UINT32_C(0xA9BF8BED),
+ UINT32_C(0xE3B7CCA3), UINT32_C(0x045C15E4), UINT32_C(0x813EF18C) },
+ { UINT32_C(0x697982C4), UINT32_C(0x5F3789A1), UINT32_C(0x8C435566),
+ UINT32_C(0x4C125369), UINT32_C(0xDC0A92C6), UINT32_C(0x00A7AE6E),
+ UINT32_C(0x2F64A053), UINT32_C(0x1ABC929B), UINT32_C(0x38666B44),
+ UINT32_C(0xF4925C4C), UINT32_C(0x0F3DE7F6), UINT32_C(0xA81044B0) } },
+ },
+ {
+ { { UINT32_C(0xC2EC3731), UINT32_C(0xBCC88422), UINT32_C(0x10DC4EC2),
+ UINT32_C(0x78A3E4D4), UINT32_C(0x2571D6B1), UINT32_C(0x745DA1EF),
+ UINT32_C(0x739A956E), UINT32_C(0xF01C2921), UINT32_C(0xE4BFFC16),
+ UINT32_C(0xEFFD8065), UINT32_C(0xF36FE72C), UINT32_C(0x6EFE62A1) },
+ { UINT32_C(0x0F4629A4), UINT32_C(0xF49E90D2), UINT32_C(0x8CE646F4),
+ UINT32_C(0xADD1DCC7), UINT32_C(0xB7240D91), UINT32_C(0xCB78B583),
+ UINT32_C(0x03F8387F), UINT32_C(0x2E1A7C3C), UINT32_C(0x3200F2D9),
+ UINT32_C(0x16566C22), UINT32_C(0xAAF80A84), UINT32_C(0x2361B14B) } },
+ { { UINT32_C(0xB5733309), UINT32_C(0xDB1CFFD2), UINT32_C(0x0F9DD939),
+ UINT32_C(0x24BC250B), UINT32_C(0xA3C1DB85), UINT32_C(0xA4181E5A),
+ UINT32_C(0xAC55D391), UINT32_C(0xE5183E51), UINT32_C(0xEFD270D0),
+ UINT32_C(0x2793D5EF), UINT32_C(0xC0631546), UINT32_C(0x7D56F63D) },
+ { UINT32_C(0x0C1EE59D), UINT32_C(0xECB40A59), UINT32_C(0xBB5BFA2C),
+ UINT32_C(0xE613A9E4), UINT32_C(0x6C5830F9), UINT32_C(0xA89B14AB),
+ UINT32_C(0xA03F201E), UINT32_C(0x4DC477DC), UINT32_C(0xC88C54F6),
+ UINT32_C(0x5604F5DA), UINT32_C(0x2ACFC66E), UINT32_C(0xD49264DC) } },
+ { { UINT32_C(0x1C4DFA95), UINT32_C(0x283DD7F0), UINT32_C(0x62C0B160),
+ UINT32_C(0xB898CC2C), UINT32_C(0x870282AA), UINT32_C(0xBA08C095),
+ UINT32_C(0xF4E36324), UINT32_C(0xB02B00D8), UINT32_C(0x604CECF2),
+ UINT32_C(0x53AADDC0), UINT32_C(0x84DDD24E), UINT32_C(0xF1F927D3) },
+ { UINT32_C(0xE2ABC9E1), UINT32_C(0x34BC00A0), UINT32_C(0x60289F88),
+ UINT32_C(0x2DA1227D), UINT32_C(0xCEF68F74), UINT32_C(0x5228EAAA),
+ UINT32_C(0x3C029351), UINT32_C(0x40A790D2), UINT32_C(0x8442E3B7),
+ UINT32_C(0xE0E9AF5C), UINT32_C(0xA9F141E0), UINT32_C(0xA3214142) } },
+ { { UINT32_C(0xF9A58E3D), UINT32_C(0x72F4949E), UINT32_C(0xA48660A6),
+ UINT32_C(0x738C700B), UINT32_C(0x092A5805), UINT32_C(0x71B04726),
+ UINT32_C(0x0F5CDB72), UINT32_C(0xAD5C3C11), UINT32_C(0x554BFC49),
+ UINT32_C(0xD4951F9E), UINT32_C(0x6131EBE7), UINT32_C(0xEE594EE5) },
+ { UINT32_C(0x3C1AF0A9), UINT32_C(0x37DA59F3), UINT32_C(0xCB040A63),
+ UINT32_C(0xD7AFC73B), UINT32_C(0x4D89FA65), UINT32_C(0xD020962A),
+ UINT32_C(0x71D824F5), UINT32_C(0x2610C61E), UINT32_C(0x3C050E31),
+ UINT32_C(0x9C917DA7), UINT32_C(0xE6E7EBFB), UINT32_C(0x3840F92F) } },
+ { { UINT32_C(0x8D8B8CED), UINT32_C(0x50FBD7FE), UINT32_C(0x47D240AE),
+ UINT32_C(0xC7282F75), UINT32_C(0x1930FF73), UINT32_C(0x79646A47),
+ UINT32_C(0x2F7F5A77), UINT32_C(0x2E0BAC4E), UINT32_C(0x26127E0B),
+ UINT32_C(0x0EE44FA5), UINT32_C(0x82BC2AA7), UINT32_C(0x678881B7) },
+ { UINT32_C(0x67F5F497), UINT32_C(0xB9E5D384), UINT32_C(0xA9B7106B),
+ UINT32_C(0x8F94A7D4), UINT32_C(0x9D329F68), UINT32_C(0xBF7E0B07),
+ UINT32_C(0x45D192FB), UINT32_C(0x169B93EA), UINT32_C(0x20DBE8C0),
+ UINT32_C(0xCCAA9467), UINT32_C(0x938F9574), UINT32_C(0xD4513A50) } },
+ { { UINT32_C(0x054CB874), UINT32_C(0x841C96B4), UINT32_C(0xA3C26834),
+ UINT32_C(0xD75B1AF1), UINT32_C(0xEE6575F0), UINT32_C(0x7237169D),
+ UINT32_C(0x0322AADC), UINT32_C(0xD71FC7E5), UINT32_C(0x949E3A8E),
+ UINT32_C(0xD7A23F1E), UINT32_C(0xDD31D8C7), UINT32_C(0x77E2D102) },
+ { UINT32_C(0xD10F5A1F), UINT32_C(0x5AD69D09), UINT32_C(0xB99D9A0B),
+ UINT32_C(0x526C9CB4), UINT32_C(0x972B237D), UINT32_C(0x521BB10B),
+ UINT32_C(0xA326F342), UINT32_C(0x1E4CD42F), UINT32_C(0xF0F126CA),
+ UINT32_C(0x5BB6DB27), UINT32_C(0xA4A515AD), UINT32_C(0x587AF22C) } },
+ { { UINT32_C(0xB12E542F), UINT32_C(0x1123A531), UINT32_C(0xB9EB2811),
+ UINT32_C(0x1D01A64D), UINT32_C(0xF2D70F87), UINT32_C(0xA4A3515B),
+ UINT32_C(0xB4BD0270), UINT32_C(0xFA205234), UINT32_C(0x5EDA26B9),
+ UINT32_C(0x74B81830), UINT32_C(0x56578E75), UINT32_C(0x9305D6E6) },
+ { UINT32_C(0x9F11BE19), UINT32_C(0xF38E69DE), UINT32_C(0x44DBE89F),
+ UINT32_C(0x1E2A5C23), UINT32_C(0xFD286654), UINT32_C(0x1077E7BC),
+ UINT32_C(0x0FCA4741), UINT32_C(0xD3669894), UINT32_C(0x278F8497),
+ UINT32_C(0x893BF904), UINT32_C(0xEB3E14F4), UINT32_C(0xD6AC5F83) } },
+ { { UINT32_C(0x488F5F74), UINT32_C(0x327B9DAB), UINT32_C(0xCAB7364F),
+ UINT32_C(0x2B44F4B8), UINT32_C(0x19B6C6BD), UINT32_C(0xB4A6D22D),
+ UINT32_C(0xFC77CD3E), UINT32_C(0xA087E613), UINT32_C(0xB0B49BC7),
+ UINT32_C(0x4558E327), UINT32_C(0xCD835D35), UINT32_C(0x188805BE) },
+ { UINT32_C(0xC1DC1007), UINT32_C(0x592F293C), UINT32_C(0x6AF02B44),
+ UINT32_C(0xFAEE660F), UINT32_C(0x904035F2), UINT32_C(0x5BFBB3BF),
+ UINT32_C(0x79C07E70), UINT32_C(0xD7C9AE60), UINT32_C(0x234896C2),
+ UINT32_C(0xC5287DD4), UINT32_C(0xCB0E4121), UINT32_C(0xC4CE4523) } },
+ { { UINT32_C(0x58344831), UINT32_C(0x3626B406), UINT32_C(0x8E55C984),
+ UINT32_C(0xABCCE356), UINT32_C(0x77241602), UINT32_C(0x495CC81C),
+ UINT32_C(0x6D70DF8F), UINT32_C(0x4FB79676), UINT32_C(0x5B071DCA),
+ UINT32_C(0x6354B37C), UINT32_C(0x8C0FC0AD), UINT32_C(0x2CAD80A4) },
+ { UINT32_C(0xF68739B4), UINT32_C(0x18AADD51), UINT32_C(0x47F09C6C),
+ UINT32_C(0x1BFBB177), UINT32_C(0xA8FD51C4), UINT32_C(0x9355EA19),
+ UINT32_C(0xEE58DB7B), UINT32_C(0x3D512A84), UINT32_C(0xE9237640),
+ UINT32_C(0x70842AFD), UINT32_C(0xACAF858D), UINT32_C(0x36F515CA) } },
+ { { UINT32_C(0x7E768B23), UINT32_C(0x3DDEC7C4), UINT32_C(0x036D43ED),
+ UINT32_C(0x97E13C53), UINT32_C(0x3A39AB5F), UINT32_C(0x871E5925),
+ UINT32_C(0x07E68E2B), UINT32_C(0x9AF292DE), UINT32_C(0x4A40112E),
+ UINT32_C(0x41158349), UINT32_C(0x3D4D97E6), UINT32_C(0xCDBB46AF) },
+ { UINT32_C(0x3C0EBE40), UINT32_C(0x2F891293), UINT32_C(0x3EBAD1E5),
+ UINT32_C(0x696C7EEE), UINT32_C(0x33B50D99), UINT32_C(0x8A5F3B69),
+ UINT32_C(0x7ED47DDE), UINT32_C(0xB7BC4840), UINT32_C(0x1E6706D8),
+ UINT32_C(0x3A6F8E6C), UINT32_C(0x3D84BB8F), UINT32_C(0x6A147943) } },
+ { { UINT32_C(0x603AE8D1), UINT32_C(0xEC3A9C78), UINT32_C(0x228C29E5),
+ UINT32_C(0xBFE07E37), UINT32_C(0x396DBC2B), UINT32_C(0xB0385C5B),
+ UINT32_C(0xDF85F41F), UINT32_C(0x7C14FE83), UINT32_C(0xADFD463E),
+ UINT32_C(0xE2E64676), UINT32_C(0x8BF9F23D), UINT32_C(0x5BEF10AA) },
+ { UINT32_C(0xF6BAB6DA), UINT32_C(0xFA83EA0D), UINT32_C(0x966BF7E3),
+ UINT32_C(0xCD0C8BA5), UINT32_C(0x98501C2E), UINT32_C(0xD62216B4),
+ UINT32_C(0xC3E69F2D), UINT32_C(0xB7F298A4), UINT32_C(0x9C8740F4),
+ UINT32_C(0x42CEF13B), UINT32_C(0x0DD64307), UINT32_C(0xBB317E52) } },
+ { { UINT32_C(0x3FFEE775), UINT32_C(0x22B6245C), UINT32_C(0xB37CE7AA),
+ UINT32_C(0x5C3F60BE), UINT32_C(0xE1FEC0DF), UINT32_C(0xDE195D40),
+ UINT32_C(0xA0A82074), UINT32_C(0x3BFAFBC5), UINT32_C(0xC72CA86A),
+ UINT32_C(0xC36EC86A), UINT32_C(0x13FD43EA), UINT32_C(0x56062851) },
+ { UINT32_C(0x8E0B03A4), UINT32_C(0x8686BE80), UINT32_C(0xD540D440),
+ UINT32_C(0xC3BD1F93), UINT32_C(0xBF96CEC5), UINT32_C(0x13E4EBC0),
+ UINT32_C(0x9190C844), UINT32_C(0xE8E23984), UINT32_C(0x00844802),
+ UINT32_C(0x183593A6), UINT32_C(0x4D206878), UINT32_C(0x46716879) } },
+ { { UINT32_C(0xB6F63D19), UINT32_C(0x358F394D), UINT32_C(0x6B052194),
+ UINT32_C(0xA75D4849), UINT32_C(0x5C8D7975), UINT32_C(0x58403590),
+ UINT32_C(0x6CBFBD77), UINT32_C(0x86DC9B6B), UINT32_C(0x647A51E5),
+ UINT32_C(0x2DB04D77), UINT32_C(0xF8950D88), UINT32_C(0x5E9A5B02) },
+ { UINT32_C(0x017168B0), UINT32_C(0xCE69A7E5), UINT32_C(0xC4843AD3),
+ UINT32_C(0x94630FAC), UINT32_C(0x1EFC44FF), UINT32_C(0xB3B9D736),
+ UINT32_C(0xB14D7F93), UINT32_C(0xE729E9B6), UINT32_C(0xE0ED0ABC),
+ UINT32_C(0xA071FC60), UINT32_C(0x8C8D9B83), UINT32_C(0xFC1A9971) } },
+ { { UINT32_C(0xD138E975), UINT32_C(0x49686031), UINT32_C(0x5A8EF0D1),
+ UINT32_C(0x64864038), UINT32_C(0xE7F7DE49), UINT32_C(0x32679713),
+ UINT32_C(0x29D1CD1D), UINT32_C(0x59132349), UINT32_C(0x20BE9ED2),
+ UINT32_C(0x849AA23A), UINT32_C(0x284B3F33), UINT32_C(0x15D303E1) },
+ { UINT32_C(0xB63F9FE9), UINT32_C(0x37309475), UINT32_C(0x45B7256A),
+ UINT32_C(0x327BAC8B), UINT32_C(0xD17FC5D3), UINT32_C(0x291CD227),
+ UINT32_C(0xA973EDF1), UINT32_C(0x8291D8CD), UINT32_C(0x437ABA09),
+ UINT32_C(0xF3843562), UINT32_C(0x271D0785), UINT32_C(0x33FFB704) } },
+ { { UINT32_C(0x47E11E5E), UINT32_C(0x5248D6E4), UINT32_C(0x269C7ED3),
+ UINT32_C(0x0F66FC3C), UINT32_C(0x903E346E), UINT32_C(0x18C0D2B9),
+ UINT32_C(0x4BEAE1B8), UINT32_C(0xD81D9D97), UINT32_C(0xFC30FDF3),
+ UINT32_C(0x610326B0), UINT32_C(0x19A7DFCD), UINT32_C(0x2B136870) },
+ { UINT32_C(0xB9527676), UINT32_C(0xEC75F70A), UINT32_C(0x29A3D897),
+ UINT32_C(0x90829F51), UINT32_C(0x97980302), UINT32_C(0x92FE1809),
+ UINT32_C(0x68474991), UINT32_C(0xA3F2498E), UINT32_C(0x0F22BBAD),
+ UINT32_C(0x6A66307B), UINT32_C(0x20378557), UINT32_C(0x32014B91) } },
+ { { UINT32_C(0x3CD98610), UINT32_C(0x72CD7D55), UINT32_C(0x74504ADF),
+ UINT32_C(0xC3D560B0), UINT32_C(0xCEBB5D5D), UINT32_C(0x23F0A982),
+ UINT32_C(0xB839DDB8), UINT32_C(0x1431C15B), UINT32_C(0xCEB72207),
+ UINT32_C(0x7E207CD8), UINT32_C(0xE7EFB28D), UINT32_C(0x28E0A848) },
+ { UINT32_C(0x1BD96F6E), UINT32_C(0xD22561FE), UINT32_C(0x62A8236B),
+ UINT32_C(0x04812C18), UINT32_C(0x975491FA), UINT32_C(0xA0BF2334),
+ UINT32_C(0x435DF87F), UINT32_C(0x294F42A6), UINT32_C(0xA5D6F4F6),
+ UINT32_C(0x2772B783), UINT32_C(0x2724F853), UINT32_C(0x348F92ED) } },
+ },
+ {
+ { { UINT32_C(0x1A42E5E7), UINT32_C(0xC20FB911), UINT32_C(0x81D12863),
+ UINT32_C(0x075A678B), UINT32_C(0x5CC0AA89), UINT32_C(0x12BCBC6A),
+ UINT32_C(0x4FB9F01E), UINT32_C(0x5279C6AB), UINT32_C(0x11AE1B89),
+ UINT32_C(0xBC8E1789), UINT32_C(0xC290003C), UINT32_C(0xAE74A706) },
+ { UINT32_C(0x79DF3F45), UINT32_C(0x9949D6EC), UINT32_C(0x96C8D37F),
+ UINT32_C(0xBA18E262), UINT32_C(0xDD2275BF), UINT32_C(0x68DE6EE2),
+ UINT32_C(0xC419F1D5), UINT32_C(0xA9E4FFF8), UINT32_C(0xA52B5A40),
+ UINT32_C(0xBC759CA4), UINT32_C(0x63B0996D), UINT32_C(0xFF18CBD8) } },
+ { { UINT32_C(0xD7DD47E5), UINT32_C(0x73C57FDE), UINT32_C(0xD49A7F5D),
+ UINT32_C(0xB0FE5479), UINT32_C(0xCFB9821E), UINT32_C(0xD25C71F1),
+ UINT32_C(0xCF6A1D68), UINT32_C(0x9427E209), UINT32_C(0xACD24E64),
+ UINT32_C(0xBF3C3916), UINT32_C(0xBDA7B8B5), UINT32_C(0x7E9F5583) },
+ { UINT32_C(0xCF971E11), UINT32_C(0xE7C5F7C8), UINT32_C(0x3C7F035E),
+ UINT32_C(0xEC16D5D7), UINT32_C(0xE66B277C), UINT32_C(0x818DC472),
+ UINT32_C(0xB2816F1E), UINT32_C(0x4413FD47), UINT32_C(0x48383C6D),
+ UINT32_C(0x40F262AF), UINT32_C(0x4F190537), UINT32_C(0xFB057584) } },
+ { { UINT32_C(0x08962F6B), UINT32_C(0x487EDC07), UINT32_C(0x190A7E55),
+ UINT32_C(0x6002F1E7), UINT32_C(0x10FDBA0C), UINT32_C(0x7FC62BEA),
+ UINT32_C(0x2C3DBF33), UINT32_C(0xC836BBC5), UINT32_C(0x4F7D2A46),
+ UINT32_C(0x4FDFB5C3), UINT32_C(0xDCA0DF71), UINT32_C(0x824654DE) },
+ { UINT32_C(0x0C23902B), UINT32_C(0x30A07676), UINT32_C(0x77FBBF37),
+ UINT32_C(0x7F1EBB93), UINT32_C(0xFACC13DB), UINT32_C(0xD307D49D),
+ UINT32_C(0xAE1A261A), UINT32_C(0x148D673A), UINT32_C(0x52D98650),
+ UINT32_C(0xE008F95B), UINT32_C(0x9F558FDE), UINT32_C(0xC7614440) } },
+ { { UINT32_C(0x9CB16650), UINT32_C(0x17CD6AF6), UINT32_C(0x69F4EEBE),
+ UINT32_C(0x86CC27C1), UINT32_C(0x78822432), UINT32_C(0x7E495B1D),
+ UINT32_C(0x1B974525), UINT32_C(0xFED338E3), UINT32_C(0x86F3CE21),
+ UINT32_C(0x527743D3), UINT32_C(0xB515C896), UINT32_C(0x87948AD3) },
+ { UINT32_C(0xB17F2FB8), UINT32_C(0x9FDE7039), UINT32_C(0xD9B89D96),
+ UINT32_C(0xA2FA9A5F), UINT32_C(0x36FF74DC), UINT32_C(0x5D46600B),
+ UINT32_C(0x8302C3C9), UINT32_C(0x8EA74B04), UINT32_C(0xF744B5EB),
+ UINT32_C(0xD560F570), UINT32_C(0xFE762402), UINT32_C(0xC921023B) } },
+ { { UINT32_C(0xFFF4C8ED), UINT32_C(0xA35AB657), UINT32_C(0x8A5FABD7),
+ UINT32_C(0x017C6124), UINT32_C(0x09ACDA28), UINT32_C(0x56463025),
+ UINT32_C(0x14CF238A), UINT32_C(0x6038D361), UINT32_C(0xAF1B9F07),
+ UINT32_C(0x1428B1B6), UINT32_C(0x7482E95C), UINT32_C(0x5827FF44) },
+ { UINT32_C(0x780FF362), UINT32_C(0xCB997E18), UINT32_C(0xE0BCAC1E),
+ UINT32_C(0x2B89D702), UINT32_C(0xA837DDC8), UINT32_C(0xC632A0B5),
+ UINT32_C(0x59762647), UINT32_C(0xF3EFCF1F), UINT32_C(0x38B0D60A),
+ UINT32_C(0xE9BA309A), UINT32_C(0x20B5FB37), UINT32_C(0x05DEABDD) } },
+ { { UINT32_C(0xCB8AF047), UINT32_C(0xD44E5DBA), UINT32_C(0x943CFE82),
+ UINT32_C(0x15400CB4), UINT32_C(0x9DF88B67), UINT32_C(0xDBD69575),
+ UINT32_C(0xB2405A7D), UINT32_C(0x8299DB2B), UINT32_C(0x0B1D80CD),
+ UINT32_C(0x46E3BF77), UINT32_C(0xE82BA3D9), UINT32_C(0xC50CF66C) },
+ { UINT32_C(0xF2F747A9), UINT32_C(0xB2910A07), UINT32_C(0x5ADC89C1),
+ UINT32_C(0xF6B669DB), UINT32_C(0x9052B081), UINT32_C(0x3B5EF1A0),
+ UINT32_C(0xB594ACE2), UINT32_C(0x0F5D5ED3), UINT32_C(0xD5F01320),
+ UINT32_C(0xDA30B8D5), UINT32_C(0xAAFCD58F), UINT32_C(0x0D688C5E) } },
+ { { UINT32_C(0x2A161074), UINT32_C(0x5EEE3A31), UINT32_C(0xEFE2BE37),
+ UINT32_C(0x6BAAAE56), UINT32_C(0xE3D78698), UINT32_C(0xF9787F61),
+ UINT32_C(0x50630A30), UINT32_C(0xC6836B26), UINT32_C(0x1445DEF1),
+ UINT32_C(0x7445B85D), UINT32_C(0xD568A6A5), UINT32_C(0xD72016A2) },
+ { UINT32_C(0xE355614F), UINT32_C(0x9DD6F533), UINT32_C(0x91E04588),
+ UINT32_C(0x637E7E5F), UINT32_C(0xB9FB1391), UINT32_C(0x42E142F3),
+ UINT32_C(0x41AFE5DA), UINT32_C(0x0D07C05C), UINT32_C(0x1394EDF1),
+ UINT32_C(0xD7CD25C8), UINT32_C(0xB99288EE), UINT32_C(0xEBE6A0FC) } },
+ { { UINT32_C(0xBABBAD86), UINT32_C(0xB8E63B7B), UINT32_C(0x90D66766),
+ UINT32_C(0x63226A9F), UINT32_C(0x5CF26666), UINT32_C(0x26381836),
+ UINT32_C(0x4CADD0BF), UINT32_C(0xCCBD142D), UINT32_C(0x9AC29470),
+ UINT32_C(0xA070965E), UINT32_C(0x25FF23ED), UINT32_C(0x6BDCA260) },
+ { UINT32_C(0x87DCA7B3), UINT32_C(0xD4E00FD4), UINT32_C(0x9E0E8734),
+ UINT32_C(0xA5097833), UINT32_C(0x048173A4), UINT32_C(0xF73F162E),
+ UINT32_C(0x9C3C2FA2), UINT32_C(0xD23F9196), UINT32_C(0xE4AC397A),
+ UINT32_C(0x9AB98B45), UINT32_C(0x543F2D4B), UINT32_C(0x2BAA0300) } },
+ { { UINT32_C(0xC658C445), UINT32_C(0xBBBE15E7), UINT32_C(0xC28941D1),
+ UINT32_C(0xB8CBCB20), UINT32_C(0x027D6540), UINT32_C(0x65549BE2),
+ UINT32_C(0x1E8EF4F4), UINT32_C(0xEBBCA802), UINT32_C(0xD2ACA397),
+ UINT32_C(0x18214B4B), UINT32_C(0xE31784A3), UINT32_C(0xCBEC7DE2) },
+ { UINT32_C(0x0116FDF3), UINT32_C(0x96F0533F), UINT32_C(0x5C8F5EE1),
+ UINT32_C(0x68911C90), UINT32_C(0xD568603A), UINT32_C(0x7DE9A3AE),
+ UINT32_C(0x6A3AD7B7), UINT32_C(0x3F56C52C), UINT32_C(0x670B4D0E),
+ UINT32_C(0x5BE9AFCA), UINT32_C(0x375DFE2F), UINT32_C(0x628BFEEE) } },
+ { { UINT32_C(0xDD4ADDB3), UINT32_C(0x97DAE81B), UINT32_C(0x8704761B),
+ UINT32_C(0x12D2CF4E), UINT32_C(0x3247788D), UINT32_C(0x5E820B40),
+ UINT32_C(0x0051CA80), UINT32_C(0x82234B62), UINT32_C(0x6CB5EA74),
+ UINT32_C(0x0C62704D), UINT32_C(0x23941593), UINT32_C(0xDE560420) },
+ { UINT32_C(0xF1B04145), UINT32_C(0xB3912A3C), UINT32_C(0xAF93688D),
+ UINT32_C(0xE3967CD7), UINT32_C(0x58DABB4B), UINT32_C(0x2E2DCD2F),
+ UINT32_C(0x0E303911), UINT32_C(0x6564836F), UINT32_C(0xECE07C5C),
+ UINT32_C(0x1F10F19B), UINT32_C(0xD8919126), UINT32_C(0xB47F07EE) } },
+ { { UINT32_C(0xE9A2EEC9), UINT32_C(0xE3545085), UINT32_C(0x2C8E51FE),
+ UINT32_C(0x81866A97), UINT32_C(0x50027243), UINT32_C(0xD2BA7DB5),
+ UINT32_C(0x4AE87DE4), UINT32_C(0x29DAEAB5), UINT32_C(0x684F9497),
+ UINT32_C(0x5EF3D4B8), UINT32_C(0x9D5D6873), UINT32_C(0xE2DACE3B) },
+ { UINT32_C(0xFFD29C9C), UINT32_C(0xF012C951), UINT32_C(0xADBADA14),
+ UINT32_C(0x48289445), UINT32_C(0x89558C49), UINT32_C(0x8751F50D),
+ UINT32_C(0x99E35BEE), UINT32_C(0x75511A4F), UINT32_C(0x7D59AA5F),
+ UINT32_C(0xEF802D6E), UINT32_C(0xA2A795E2), UINT32_C(0x14FCAD65) } },
+ { { UINT32_C(0x08CB8F2C), UINT32_C(0xC8EB00E8), UINT32_C(0x2B45BD86),
+ UINT32_C(0x68607532), UINT32_C(0x59969713), UINT32_C(0x7A29B459),
+ UINT32_C(0xD684201B), UINT32_C(0x5FA15B9B), UINT32_C(0xB9E538EE),
+ UINT32_C(0x1A853190), UINT32_C(0xD573D043), UINT32_C(0x4150605C) },
+ { UINT32_C(0xEB9FBB68), UINT32_C(0xEF011D3B), UINT32_C(0x66AE32B6),
+ UINT32_C(0x67279982), UINT32_C(0x445DE5EC), UINT32_C(0x861B86EA),
+ UINT32_C(0xA34A50E1), UINT32_C(0x62837D18), UINT32_C(0xBF5F0663),
+ UINT32_C(0x228C006A), UINT32_C(0x396DB36A), UINT32_C(0xE007FDE7) } },
+ { { UINT32_C(0x5A916A55), UINT32_C(0xDEE4F881), UINT32_C(0xF39C82CB),
+ UINT32_C(0x20DC0370), UINT32_C(0x40F09821), UINT32_C(0xD9A71615),
+ UINT32_C(0xF7273492), UINT32_C(0xD50AD8BF), UINT32_C(0x32E7C4BF),
+ UINT32_C(0xA06F7D12), UINT32_C(0x4C5CEA36), UINT32_C(0xFA0F6154) },
+ { UINT32_C(0x5FC49CFE), UINT32_C(0xF4FD9BED), UINT32_C(0xC9291678),
+ UINT32_C(0xD8CB45D1), UINT32_C(0x7B92C9F2), UINT32_C(0x94DB86CC),
+ UINT32_C(0x73C81169), UINT32_C(0x09CA5F38), UINT32_C(0xAEED06F0),
+ UINT32_C(0x109F40B0), UINT32_C(0x14DCAA0A), UINT32_C(0x9F0360B2) } },
+ { { UINT32_C(0xE12AD3E7), UINT32_C(0x4189B70D), UINT32_C(0x10B06607),
+ UINT32_C(0x5208ADB2), UINT32_C(0xEE8497FA), UINT32_C(0xEBD8E2A2),
+ UINT32_C(0xE04F2ECB), UINT32_C(0x61B1BD67), UINT32_C(0x4F3F5F99),
+ UINT32_C(0x0E2DDA72), UINT32_C(0xF747B16D), UINT32_C(0xD5D96740) },
+ { UINT32_C(0xA6BF397F), UINT32_C(0x308A48F6), UINT32_C(0x23A93595),
+ UINT32_C(0x7021C3E5), UINT32_C(0x36470AA0), UINT32_C(0xF10B0229),
+ UINT32_C(0x4E03295B), UINT32_C(0x7761E8EC), UINT32_C(0x07339770),
+ UINT32_C(0x16EFEF58), UINT32_C(0x5DA5DAA2), UINT32_C(0x0D55D2DD) } },
+ { { UINT32_C(0x8A22F87A), UINT32_C(0x915EA6A3), UINT32_C(0x2E5A088E),
+ UINT32_C(0x191151C1), UINT32_C(0x7F1D5CBE), UINT32_C(0x190252F1),
+ UINT32_C(0x3B0EC99B), UINT32_C(0xE43F59C3), UINT32_C(0xFF2A6135),
+ UINT32_C(0xBE8588D4), UINT32_C(0x2ECB4B9F), UINT32_C(0x103877CC) },
+ { UINT32_C(0x023CF92B), UINT32_C(0x8F4147E5), UINT32_C(0x0CC2085B),
+ UINT32_C(0xC24384CC), UINT32_C(0xD082D311), UINT32_C(0x6A2DB4A2),
+ UINT32_C(0xED7BA9AE), UINT32_C(0x06283811), UINT32_C(0x2A8E1592),
+ UINT32_C(0xE9A3F532), UINT32_C(0x5A59E894), UINT32_C(0xAC20F0F4) } },
+ { { UINT32_C(0x74AAB4B1), UINT32_C(0x788CAA52), UINT32_C(0x2FEAFC7E),
+ UINT32_C(0xEB84ABA1), UINT32_C(0xAC04FF77), UINT32_C(0x31DA71DA),
+ UINT32_C(0x24E4D0BF), UINT32_C(0x39D12EB9), UINT32_C(0x87A34EF8),
+ UINT32_C(0x4F2F292F), UINT32_C(0xA237A8ED), UINT32_C(0x9B324372) },
+ { UINT32_C(0x2EE3A82D), UINT32_C(0xBB2D04B1), UINT32_C(0xD18D36B2),
+ UINT32_C(0xED4FF367), UINT32_C(0xA6EA0138), UINT32_C(0x99D231EE),
+ UINT32_C(0x4F92E04A), UINT32_C(0x7C2D4F06), UINT32_C(0xCA272FD0),
+ UINT32_C(0x78A82AB2), UINT32_C(0xAB8CDC32), UINT32_C(0x7EC41340) } },
+ },
+ {
+ { { UINT32_C(0xD2E15A8C), UINT32_C(0xD23658C8), UINT32_C(0x16BA28CA),
+ UINT32_C(0x23F93DF7), UINT32_C(0x082210F1), UINT32_C(0x6DAB10EC),
+ UINT32_C(0xBFC36490), UINT32_C(0xFB1ADD91), UINT32_C(0x9A4F2D14),
+ UINT32_C(0xEDA8B02F), UINT32_C(0x56560443), UINT32_C(0x9060318C) },
+ { UINT32_C(0x64711AB2), UINT32_C(0x6C01479E), UINT32_C(0xE337EB85),
+ UINT32_C(0x41446FC7), UINT32_C(0x71888397), UINT32_C(0x4DCF3C1D),
+ UINT32_C(0x13C34FD2), UINT32_C(0x87A9C04E), UINT32_C(0x510C15AC),
+ UINT32_C(0xFE0E08EC), UINT32_C(0xC0F495D2), UINT32_C(0xFC0D0413) } },
+ { { UINT32_C(0x156636C2), UINT32_C(0xEB05C516), UINT32_C(0x090E93FC),
+ UINT32_C(0x2F613ABA), UINT32_C(0x489576F5), UINT32_C(0xCFD573CD),
+ UINT32_C(0x535A8D57), UINT32_C(0xE6535380), UINT32_C(0x671436C4),
+ UINT32_C(0x13947314), UINT32_C(0x5F0A122D), UINT32_C(0x1172FB0C) },
+ { UINT32_C(0xC12F58F6), UINT32_C(0xAECC7EC1), UINT32_C(0x8E41AFD2),
+ UINT32_C(0xFE42F957), UINT32_C(0x3D4221AA), UINT32_C(0xDF96F652),
+ UINT32_C(0x2851996B), UINT32_C(0xFEF5649F), UINT32_C(0xD5CFB67E),
+ UINT32_C(0x46FB9F26), UINT32_C(0xEF5C4052), UINT32_C(0xB047BFC7) } },
+ { { UINT32_C(0xF4484374), UINT32_C(0x5CBDC442), UINT32_C(0xF92452EF),
+ UINT32_C(0x6B156957), UINT32_C(0xC118D02A), UINT32_C(0x58A26886),
+ UINT32_C(0x75AAF276), UINT32_C(0x87FF74E6), UINT32_C(0xF65F6EC1),
+ UINT32_C(0xB133BE95), UINT32_C(0x4B1B8D32), UINT32_C(0xA89B6284) },
+ { UINT32_C(0x09C81004), UINT32_C(0xDD8A8EF3), UINT32_C(0x0CF21991),
+ UINT32_C(0x7F8225DB), UINT32_C(0x26623FAF), UINT32_C(0xD525A6DB),
+ UINT32_C(0xBAE15453), UINT32_C(0xF2368D40), UINT32_C(0x84F89FC9),
+ UINT32_C(0x55D6A84D), UINT32_C(0x86021A3E), UINT32_C(0xAF38358A) } },
+ { { UINT32_C(0xFF52E280), UINT32_C(0xBD048BDC), UINT32_C(0x526A1795),
+ UINT32_C(0x8A51D0B2), UINT32_C(0xA985AC0F), UINT32_C(0x40AAA758),
+ UINT32_C(0xF2C7ACE9), UINT32_C(0x6039BCDC), UINT32_C(0x6AEC347D),
+ UINT32_C(0x712092CC), UINT32_C(0x6B5ACAB7), UINT32_C(0x7976D090) },
+ { UINT32_C(0x6EED9617), UINT32_C(0x1EBCF80D), UINT32_C(0xB0F404A4),
+ UINT32_C(0xB3A63149), UINT32_C(0xD0B610EF), UINT32_C(0x3FDD3D1A),
+ UINT32_C(0x98C28AC7), UINT32_C(0xDD3F6F94), UINT32_C(0x3A59750F),
+ UINT32_C(0x650B7794), UINT32_C(0x2D3991AC), UINT32_C(0xEC59BAB1) } },
+ { { UINT32_C(0x2E552766), UINT32_C(0x01F40E88), UINT32_C(0x66F5354F),
+ UINT32_C(0x1FE3D509), UINT32_C(0xB3A8EA7F), UINT32_C(0x0E46D006),
+ UINT32_C(0xF831CD6A), UINT32_C(0xF75AB629), UINT32_C(0x91465119),
+ UINT32_C(0xDAD808D7), UINT32_C(0x17EF9B10), UINT32_C(0x442405AF) },
+ { UINT32_C(0x672BDFCB), UINT32_C(0xD5FE0A96), UINT32_C(0x355DBDEC),
+ UINT32_C(0xA9DFA422), UINT32_C(0x79B25636), UINT32_C(0xFDB79AA1),
+ UINT32_C(0xEECE8AEC), UINT32_C(0xE7F26FFD), UINT32_C(0x7EDD5AA2),
+ UINT32_C(0xB5925550), UINT32_C(0x8EB3A6C2), UINT32_C(0x2C8F6FF0) } },
+ { { UINT32_C(0x757D6136), UINT32_C(0x88887756), UINT32_C(0x88B92E72),
+ UINT32_C(0xAD9AC183), UINT32_C(0x8785D3EB), UINT32_C(0x92CB2FC4),
+ UINT32_C(0x9319764B), UINT32_C(0xD1A542FE), UINT32_C(0x626A62F8),
+ UINT32_C(0xAF4CC78F), UINT32_C(0x26BFFAAE), UINT32_C(0x7F3F5FC9) },
+ { UINT32_C(0x40AE2231), UINT32_C(0x0A203D43), UINT32_C(0x387898E8),
+ UINT32_C(0xA8BFD9E0), UINT32_C(0x474B7DDD), UINT32_C(0x1A0C379C),
+ UINT32_C(0x34FD49EA), UINT32_C(0x03855E0A), UINT32_C(0xB3EF4AE1),
+ UINT32_C(0x02B26223), UINT32_C(0xE399E0A3), UINT32_C(0x804BD8CF) } },
+ { { UINT32_C(0xDE865713), UINT32_C(0x11A9F3D0), UINT32_C(0xBDE98821),
+ UINT32_C(0x81E36B6B), UINT32_C(0x6AA891D0), UINT32_C(0x324996C8),
+ UINT32_C(0x395682B5), UINT32_C(0x7B95BDC1), UINT32_C(0xC1600563),
+ UINT32_C(0x47BF2219), UINT32_C(0x643E38B4), UINT32_C(0x7A473F50) },
+ { UINT32_C(0xF5738288), UINT32_C(0x0911F50A), UINT32_C(0x6F9C415B),
+ UINT32_C(0xDF947A70), UINT32_C(0x67A067F6), UINT32_C(0xBDB994F2),
+ UINT32_C(0x88BE96CD), UINT32_C(0x3F4BEC1B), UINT32_C(0xE56DD6D9),
+ UINT32_C(0x9820E931), UINT32_C(0x0A80F419), UINT32_C(0xB138F14F) } },
+ { { UINT32_C(0x0429077A), UINT32_C(0xA11A1A8F), UINT32_C(0x10351C68),
+ UINT32_C(0x2BB1E33D), UINT32_C(0x89459A27), UINT32_C(0x3C25ABFE),
+ UINT32_C(0x6B8AC774), UINT32_C(0x2D0091B8), UINT32_C(0x3B2415D9),
+ UINT32_C(0xDAFC7853), UINT32_C(0x9201680D), UINT32_C(0xDE713CF1) },
+ { UINT32_C(0x68889D57), UINT32_C(0x8E5F445D), UINT32_C(0x60EABF5B),
+ UINT32_C(0x608B209C), UINT32_C(0xF9CFA408), UINT32_C(0x10EC0ACC),
+ UINT32_C(0x4D1EE754), UINT32_C(0xD5256B9D), UINT32_C(0x0AA6C18D),
+ UINT32_C(0xFF866BAB), UINT32_C(0xACB90A45), UINT32_C(0x9D196DB8) } },
+ { { UINT32_C(0xB9B081B2), UINT32_C(0xA46D76A9), UINT32_C(0x62163C25),
+ UINT32_C(0xFC743A10), UINT32_C(0x7761C392), UINT32_C(0xCD2A5C8D),
+ UINT32_C(0xBE808583), UINT32_C(0x39BDDE0B), UINT32_C(0xB98E4DFE),
+ UINT32_C(0x7C416021), UINT32_C(0x65913A44), UINT32_C(0xF930E563) },
+ { UINT32_C(0x7585CF3C), UINT32_C(0xC3555F7E), UINT32_C(0x3D6333D5),
+ UINT32_C(0xC737E383), UINT32_C(0xB430B03D), UINT32_C(0x5B60DBA4),
+ UINT32_C(0xE7555404), UINT32_C(0x42B715EB), UINT32_C(0x7C7796E3),
+ UINT32_C(0x571BDF5B), UINT32_C(0x6DB6331F), UINT32_C(0x33DC62C6) } },
+ { { UINT32_C(0xE61DEE59), UINT32_C(0x3FB9CCB0), UINT32_C(0x18B14DB9),
+ UINT32_C(0xC5185F23), UINT32_C(0x845EF36C), UINT32_C(0x1B2ADC4F),
+ UINT32_C(0x5C1A33AB), UINT32_C(0x195D5B50), UINT32_C(0x421F59D2),
+ UINT32_C(0x8CEA528E), UINT32_C(0xD2931CEA), UINT32_C(0x7DFCCECF) },
+ { UINT32_C(0x8CF7E3F7), UINT32_C(0x51FFA1D5), UINT32_C(0xBDC9FB43),
+ UINT32_C(0xF01B7886), UINT32_C(0x261A0D35), UINT32_C(0xD65AB610),
+ UINT32_C(0x7574A554), UINT32_C(0x84BCBAFD), UINT32_C(0xFAD70208),
+ UINT32_C(0x4B119956), UINT32_C(0x4FAB5243), UINT32_C(0xDDC329C2) } },
+ { { UINT32_C(0x9CE92177), UINT32_C(0x1A08AA57), UINT32_C(0xDC2B5C36),
+ UINT32_C(0x3395E557), UINT32_C(0x394ED04E), UINT32_C(0xFDFE7041),
+ UINT32_C(0xC6DFCDDE), UINT32_C(0xB797EB24), UINT32_C(0xCB9DE5D6),
+ UINT32_C(0x284A6B2A), UINT32_C(0x07222765), UINT32_C(0xE0BD95C8) },
+ { UINT32_C(0x9FE678A7), UINT32_C(0x114A951B), UINT32_C(0x9E4954EC),
+ UINT32_C(0xE7ECD0BD), UINT32_C(0x79F0B8A9), UINT32_C(0x7D4096FE),
+ UINT32_C(0x09724FE2), UINT32_C(0xBDB26E9A), UINT32_C(0xF787AF95),
+ UINT32_C(0x08741AD8), UINT32_C(0x24045AD8), UINT32_C(0x2BF97272) } },
+ { { UINT32_C(0xA9451D57), UINT32_C(0xAB1FEDD9), UINT32_C(0x483E38C9),
+ UINT32_C(0xDF4D91DF), UINT32_C(0x24E9CF8E), UINT32_C(0x2D54D311),
+ UINT32_C(0x7A22EEB6), UINT32_C(0x9C2A5AF8), UINT32_C(0x0A43F123),
+ UINT32_C(0xBD9861EF), UINT32_C(0x38A18B7B), UINT32_C(0x581EA6A2) },
+ { UINT32_C(0x296470A3), UINT32_C(0xAF339C85), UINT32_C(0xAFD8203E),
+ UINT32_C(0xF9603FCD), UINT32_C(0x96763C28), UINT32_C(0x95D05350),
+ UINT32_C(0x860EC831), UINT32_C(0x15445C16), UINT32_C(0x6867A323),
+ UINT32_C(0x2AFB8728), UINT32_C(0x0C4838BF), UINT32_C(0x4B152D6D) } },
+ { { UINT32_C(0x837CACBA), UINT32_C(0x45BA0E4F), UINT32_C(0xC0725275),
+ UINT32_C(0x7ADB38AE), UINT32_C(0x942D3C28), UINT32_C(0x19C82831),
+ UINT32_C(0x6D0FE7DD), UINT32_C(0x94F4731D), UINT32_C(0x4898F1E6),
+ UINT32_C(0xC3C07E13), UINT32_C(0xED410B51), UINT32_C(0x76350EAC) },
+ { UINT32_C(0xF99AACFC), UINT32_C(0x0FA8BECA), UINT32_C(0x65FAF9CF),
+ UINT32_C(0x2834D86F), UINT32_C(0x6F3866AF), UINT32_C(0x8E62846A),
+ UINT32_C(0x3DFD6A2B), UINT32_C(0xDAA9BD4F), UINT32_C(0xA6132655),
+ UINT32_C(0xC27115BB), UINT32_C(0xBD5A32C2), UINT32_C(0x83972DF7) } },
+ { { UINT32_C(0xD513B825), UINT32_C(0xA330CB5B), UINT32_C(0xEE37BEC3),
+ UINT32_C(0xAE18B2D3), UINT32_C(0xF780A902), UINT32_C(0xFC3AB80A),
+ UINT32_C(0xD607DDF1), UINT32_C(0xD7835BE2), UINT32_C(0x5B6E4C2B),
+ UINT32_C(0x8120F767), UINT32_C(0x67E78CCB), UINT32_C(0xAA8C3859) },
+ { UINT32_C(0xAA0ED321), UINT32_C(0xA8DA8CE2), UINT32_C(0xD766341A),
+ UINT32_C(0xCB8846FD), UINT32_C(0x33DC9D9A), UINT32_C(0xF2A342EE),
+ UINT32_C(0xD0A18A80), UINT32_C(0xA519E0BE), UINT32_C(0xAF48DF4C),
+ UINT32_C(0x9CDAA39C), UINT32_C(0x7E0C19EE), UINT32_C(0xA4B500CA) } },
+ { { UINT32_C(0x8217001B), UINT32_C(0x83A7FD2F), UINT32_C(0x4296A8BA),
+ UINT32_C(0x4F6FCF06), UINT32_C(0x91619927), UINT32_C(0x7D748643),
+ UINT32_C(0x941E4D41), UINT32_C(0x174C1075), UINT32_C(0xA64F5A6C),
+ UINT32_C(0x037EDEBD), UINT32_C(0x6E29DC56), UINT32_C(0xCF64DB3A) },
+ { UINT32_C(0x37C0B9F4), UINT32_C(0x150B3ACE), UINT32_C(0x7168178B),
+ UINT32_C(0x1323234A), UINT32_C(0xEF4D1879), UINT32_C(0x1CE47014),
+ UINT32_C(0x17FB4D5C), UINT32_C(0xA22E3742), UINT32_C(0xD985F794),
+ UINT32_C(0x69B81822), UINT32_C(0x081D7214), UINT32_C(0x199C21C4) } },
+ { { UINT32_C(0x8F04B4D2), UINT32_C(0x160BC7A1), UINT32_C(0xB10DE174),
+ UINT32_C(0x79CA81DD), UINT32_C(0x2DA1E9C7), UINT32_C(0xE2A280B0),
+ UINT32_C(0x1D6A0A29), UINT32_C(0xB4F6BD99), UINT32_C(0x1C5B8F27),
+ UINT32_C(0x57CF3EDD), UINT32_C(0x158C2FD4), UINT32_C(0x7E34FC57) },
+ { UINT32_C(0xCAC93459), UINT32_C(0x828CFD89), UINT32_C(0xB7AF499F),
+ UINT32_C(0x9E631B6F), UINT32_C(0xDA26C135), UINT32_C(0xF4DC8BC0),
+ UINT32_C(0x37186735), UINT32_C(0x6128ED39), UINT32_C(0x67BF0BA5),
+ UINT32_C(0xBB45538B), UINT32_C(0x0064A3AB), UINT32_C(0x1ADDD4C1) } },
+ },
+ {
+ { { UINT32_C(0xDD14D47E), UINT32_C(0xC32730E8), UINT32_C(0xC0F01E0F),
+ UINT32_C(0xCDC1FD42), UINT32_C(0x3F5CD846), UINT32_C(0x2BACFDBF),
+ UINT32_C(0x7272D4DD), UINT32_C(0x45F36416), UINT32_C(0x5EB75776),
+ UINT32_C(0xDD813A79), UINT32_C(0x50997BE2), UINT32_C(0xB57885E4) },
+ { UINT32_C(0xDB8C9829), UINT32_C(0xDA054E2B), UINT32_C(0xAAB5A594),
+ UINT32_C(0x4161D820), UINT32_C(0x026116A3), UINT32_C(0x4C428F31),
+ UINT32_C(0xDCD85E91), UINT32_C(0x372AF9A0), UINT32_C(0x673ADC2D),
+ UINT32_C(0xFDA6E903), UINT32_C(0xA8DB59E6), UINT32_C(0x4526B8AC) } },
+ { { UINT32_C(0xE23A8472), UINT32_C(0x68FE359D), UINT32_C(0x4CE3C101),
+ UINT32_C(0x43EB12BD), UINT32_C(0xFC704935), UINT32_C(0x0EC652C3),
+ UINT32_C(0x52E4E22D), UINT32_C(0x1EEFF1F9), UINT32_C(0x083E3ADA),
+ UINT32_C(0xBA6777CB), UINT32_C(0x8BEFC871), UINT32_C(0xAB52D7DC) },
+ { UINT32_C(0x497CBD59), UINT32_C(0x4EDE689F), UINT32_C(0x27577DD9),
+ UINT32_C(0xC8AE42B9), UINT32_C(0x7AB83C27), UINT32_C(0xE0F08051),
+ UINT32_C(0x2C8C1F48), UINT32_C(0x1F3D5F25), UINT32_C(0xAF241AAC),
+ UINT32_C(0x57991607), UINT32_C(0xB8A337E0), UINT32_C(0xC4458B0A) } },
+ { { UINT32_C(0x51DD1BA9), UINT32_C(0x3DBB3FA6), UINT32_C(0x545E960B),
+ UINT32_C(0xE53C1C4D), UINT32_C(0x793CE803), UINT32_C(0x35AC6574),
+ UINT32_C(0x83DBCE4F), UINT32_C(0xB2697DC7), UINT32_C(0xE13CF6B0),
+ UINT32_C(0xE35C5BF2), UINT32_C(0xB0C4A164), UINT32_C(0x35034280) },
+ { UINT32_C(0xD9C0D3C1), UINT32_C(0xAA490908), UINT32_C(0xCB4D2E90),
+ UINT32_C(0x2CCE614D), UINT32_C(0x54D504E4), UINT32_C(0xF646E96C),
+ UINT32_C(0xB73310A3), UINT32_C(0xD74E7541), UINT32_C(0x18BDE5DA),
+ UINT32_C(0xEAD71596), UINT32_C(0xAA09AEF7), UINT32_C(0x96E7F4A8) } },
+ { { UINT32_C(0x5D6E5F48), UINT32_C(0xA8393A24), UINT32_C(0xF9175CE8),
+ UINT32_C(0x2C8D7EA2), UINT32_C(0x55A20268), UINT32_C(0xD8824E02),
+ UINT32_C(0xA446BCC6), UINT32_C(0x9DD9A272), UINT32_C(0x5351499B),
+ UINT32_C(0xC929CDED), UINT32_C(0xCFE76535), UINT32_C(0xEA5AD9EC) },
+ { UINT32_C(0xDC32D001), UINT32_C(0x26F3D7D9), UINT32_C(0x43EB9689),
+ UINT32_C(0x51C3BE83), UINT32_C(0x759E6DDB), UINT32_C(0x91FDCC06),
+ UINT32_C(0xE302B891), UINT32_C(0xAC2E1904), UINT32_C(0xC207E1F7),
+ UINT32_C(0xAD25C645), UINT32_C(0xAB3DEB4A), UINT32_C(0x28A70F0D) } },
+ { { UINT32_C(0x03BEA8F1), UINT32_C(0x922D7F97), UINT32_C(0x584570BE),
+ UINT32_C(0x3AD820D4), UINT32_C(0x3CD46B43), UINT32_C(0x0CE0A850),
+ UINT32_C(0xAE66743D), UINT32_C(0x4C07911F), UINT32_C(0xFDA60023),
+ UINT32_C(0x66519EB9), UINT32_C(0xEC2ACD9C), UINT32_C(0x7F83004B) },
+ { UINT32_C(0xC3117EAD), UINT32_C(0x001E0B80), UINT32_C(0x0722BA25),
+ UINT32_C(0xBB72D541), UINT32_C(0x6E9A5078), UINT32_C(0x3AF7DB96),
+ UINT32_C(0x701B6B4C), UINT32_C(0x86C5774E), UINT32_C(0x37824DB5),
+ UINT32_C(0xBD2C0E8E), UINT32_C(0xBFAC286D), UINT32_C(0x3AE3028C) } },
+ { { UINT32_C(0xA33E071B), UINT32_C(0x83D4D4A8), UINT32_C(0x61444BB5),
+ UINT32_C(0x881C0A92), UINT32_C(0x520E3BC3), UINT32_C(0xEEA1E292),
+ UINT32_C(0x2AAAB729), UINT32_C(0x5A5F4C3C), UINT32_C(0xE63C7C94),
+ UINT32_C(0x0B766C5E), UINT32_C(0xBB2CC79C), UINT32_C(0x62BB8A9F) },
+ { UINT32_C(0xAA5DC49D), UINT32_C(0x97ADC7D2), UINT32_C(0x31718681),
+ UINT32_C(0x30CC26B3), UINT32_C(0x56E86EDE), UINT32_C(0xAC86E6FF),
+ UINT32_C(0xCD52F7F2), UINT32_C(0x37BCA7A2), UINT32_C(0x9CE6D87F),
+ UINT32_C(0x734D2C94), UINT32_C(0xC2F7E0CA), UINT32_C(0x06A71D71) } },
+ { { UINT32_C(0xC6357D33), UINT32_C(0x559DCF75), UINT32_C(0x652517DE),
+ UINT32_C(0x4616D940), UINT32_C(0x1CCF207B), UINT32_C(0x3D576B98),
+ UINT32_C(0x1979F631), UINT32_C(0x51E2D1EF), UINT32_C(0x06AE8296),
+ UINT32_C(0x57517DDD), UINT32_C(0xD6E7151F), UINT32_C(0x309A3D7F) },
+ { UINT32_C(0x0E3A6FE5), UINT32_C(0xBA2A23E6), UINT32_C(0xD28B22C3),
+ UINT32_C(0x76CF674A), UINT32_C(0xF8B808C3), UINT32_C(0xD235AD07),
+ UINT32_C(0x6B71213A), UINT32_C(0x7BBF4C58), UINT32_C(0x93271EBB),
+ UINT32_C(0x0676792E), UINT32_C(0x05B1FC31), UINT32_C(0x2CFD2C76) } },
+ { { UINT32_C(0x37A450F5), UINT32_C(0x4258E5C0), UINT32_C(0x52D2B118),
+ UINT32_C(0xC3245F1B), UINT32_C(0x82BC5963), UINT32_C(0x6DF7B484),
+ UINT32_C(0x9C273D1E), UINT32_C(0xE520DA4D), UINT32_C(0x2C3010E5),
+ UINT32_C(0xED78E012), UINT32_C(0x3C1D4C05), UINT32_C(0x11222948) },
+ { UINT32_C(0xC692B490), UINT32_C(0xE3DAE5AF), UINT32_C(0xC197F793),
+ UINT32_C(0x3272BD10), UINT32_C(0xE709ACAA), UINT32_C(0xF7EAE411),
+ UINT32_C(0x778270A6), UINT32_C(0x00B0C95F), UINT32_C(0x220D4350),
+ UINT32_C(0x4DA76EE1), UINT32_C(0xAB71E308), UINT32_C(0x521E1461) } },
+ { { UINT32_C(0x343196A3), UINT32_C(0x7B654323), UINT32_C(0xB0C95250),
+ UINT32_C(0x35D442AD), UINT32_C(0xE264FF17), UINT32_C(0x38AF50E6),
+ UINT32_C(0x2030D2EA), UINT32_C(0x28397A41), UINT32_C(0xF74EEDA1),
+ UINT32_C(0x8F1D84E9), UINT32_C(0xE6FB3C52), UINT32_C(0xD521F92D) },
+ { UINT32_C(0x95733811), UINT32_C(0xAF358D77), UINT32_C(0x93ABFE94),
+ UINT32_C(0xEBFDDD01), UINT32_C(0xD18D99DE), UINT32_C(0x05D8A028),
+ UINT32_C(0xB5D5BDD9), UINT32_C(0x5A664019), UINT32_C(0x2AA12FE8),
+ UINT32_C(0x3DF17282), UINT32_C(0xB889A28E), UINT32_C(0xB42E006F) } },
+ { { UINT32_C(0xBC35CB1A), UINT32_C(0xCF10E97D), UINT32_C(0x994DEDC5),
+ UINT32_C(0xC70A7BBD), UINT32_C(0x37D04FB9), UINT32_C(0x76A5327C),
+ UINT32_C(0xA76E0CDA), UINT32_C(0x87539F76), UINT32_C(0xCD60A6B1),
+ UINT32_C(0xE9FE493F), UINT32_C(0x132F01C0), UINT32_C(0xA4574796) },
+ { UINT32_C(0xDB70B167), UINT32_C(0xC43B85EB), UINT32_C(0x98551DFA),
+ UINT32_C(0x81D5039A), UINT32_C(0x1D979FA4), UINT32_C(0x6B56FBE9),
+ UINT32_C(0x8615098F), UINT32_C(0x49714FD7), UINT32_C(0x94DECAB5),
+ UINT32_C(0xB10E1CEA), UINT32_C(0x480EF6E3), UINT32_C(0x8342EBA3) } },
+ { { UINT32_C(0xB3677288), UINT32_C(0xE1E030B0), UINT32_C(0x8D5CE3AF),
+ UINT32_C(0x2978174C), UINT32_C(0xF7B2DE98), UINT32_C(0xAFC0271C),
+ UINT32_C(0xB99C20B5), UINT32_C(0x745BC6F3), UINT32_C(0x1E3BB4E5),
+ UINT32_C(0x9F6EDCED), UINT32_C(0x73C8C1FC), UINT32_C(0x58D3EE4E) },
+ { UINT32_C(0x7FD30124), UINT32_C(0x1F3535F4), UINT32_C(0x5FA62502),
+ UINT32_C(0xF366AC70), UINT32_C(0x965363FE), UINT32_C(0x4C4C1FDD),
+ UINT32_C(0x1DE2CA2B), UINT32_C(0x8B2C7777), UINT32_C(0x882F1173),
+ UINT32_C(0x0CB54743), UINT32_C(0x71343331), UINT32_C(0x94B6B8C0) } },
+ { { UINT32_C(0x65B8B35B), UINT32_C(0x75AF0141), UINT32_C(0x4670A1F5),
+ UINT32_C(0x6D7B8485), UINT32_C(0xA3B6D376), UINT32_C(0x6EAA3A47),
+ UINT32_C(0xCB3E5B66), UINT32_C(0xD7E673D2), UINT32_C(0x9589AB38),
+ UINT32_C(0xC0338E6C), UINT32_C(0x09440FAA), UINT32_C(0x4BE26CB3) },
+ { UINT32_C(0x394F9AA3), UINT32_C(0x82CB05E7), UINT32_C(0x7F7792EA),
+ UINT32_C(0xC45C8A8A), UINT32_C(0xB687DC70), UINT32_C(0x37E5E33B),
+ UINT32_C(0xDFE48E49), UINT32_C(0x63853219), UINT32_C(0x6D0E5C8C),
+ UINT32_C(0x087951C1), UINT32_C(0x2BC27310), UINT32_C(0x7696A8C7) } },
+ { { UINT32_C(0xB67E834A), UINT32_C(0xA05736D5), UINT32_C(0x9098D42A),
+ UINT32_C(0xDD2AA0F2), UINT32_C(0x49C69DDC), UINT32_C(0x09F0C1D8),
+ UINT32_C(0x8FF0F0F3), UINT32_C(0x81F8BC1C), UINT32_C(0x03037775),
+ UINT32_C(0x36FD3A4F), UINT32_C(0x4B06DF5C), UINT32_C(0x8286717D) },
+ { UINT32_C(0xA9079EA2), UINT32_C(0xB878F496), UINT32_C(0xD7DC796D),
+ UINT32_C(0xA5642426), UINT32_C(0x67FDAC2B), UINT32_C(0x29B9351A),
+ UINT32_C(0x1D543CDE), UINT32_C(0x93774C0E), UINT32_C(0x1A8E31C4),
+ UINT32_C(0x4F8793BA), UINT32_C(0x6C94798A), UINT32_C(0x7C9F3F3A) } },
+ { { UINT32_C(0xCB8ECDB8), UINT32_C(0x23C5AD11), UINT32_C(0x485A6A02),
+ UINT32_C(0x1E88D25E), UINT32_C(0xF1E268AE), UINT32_C(0xB27CBE84),
+ UINT32_C(0xF4CD0475), UINT32_C(0xDDA80238), UINT32_C(0x49F8EB1B),
+ UINT32_C(0x4F88857B), UINT32_C(0x52FB07F9), UINT32_C(0x91B1221F) },
+ { UINT32_C(0x8637FA67), UINT32_C(0x7CE97460), UINT32_C(0x632198D8),
+ UINT32_C(0x528B3CF4), UINT32_C(0xF6623769), UINT32_C(0x33365AB3),
+ UINT32_C(0x3A83A30F), UINT32_C(0x6FEBCFFF), UINT32_C(0x9BD341EB),
+ UINT32_C(0x398F4C99), UINT32_C(0xB33A333C), UINT32_C(0x180712BB) } },
+ { { UINT32_C(0xD93429E7), UINT32_C(0x2B8655A2), UINT32_C(0x75C8B9EE),
+ UINT32_C(0x99D600BB), UINT32_C(0x88FCA6CD), UINT32_C(0x9FC1AF8B),
+ UINT32_C(0x7C311F80), UINT32_C(0x2FB53386), UINT32_C(0xE8A71EEE),
+ UINT32_C(0x20743ECB), UINT32_C(0xE848B49E), UINT32_C(0xEC3713C4) },
+ { UINT32_C(0xBB886817), UINT32_C(0x5B2037B5), UINT32_C(0x307DBAF4),
+ UINT32_C(0x40EF5AC2), UINT32_C(0x1B3F643D), UINT32_C(0xC2888AF2),
+ UINT32_C(0x9D5A4190), UINT32_C(0x0D8252E1), UINT32_C(0x2DB52A8A),
+ UINT32_C(0x06CC0BEC), UINT32_C(0xAB94E969), UINT32_C(0xB84B98EA) } },
+ { { UINT32_C(0xA0321E0E), UINT32_C(0x2E7AC078), UINT32_C(0xEF3DAAB6),
+ UINT32_C(0x5C5A1168), UINT32_C(0xADDD454A), UINT32_C(0xD2D573CB),
+ UINT32_C(0x36259CC7), UINT32_C(0x27E149E2), UINT32_C(0xA63F47F1),
+ UINT32_C(0x1EDFD469), UINT32_C(0xF1BD2CFD), UINT32_C(0x039AD674) },
+ { UINT32_C(0x3077D3CC), UINT32_C(0xBFA633FC), UINT32_C(0x2FD64E9F),
+ UINT32_C(0x14A7C82F), UINT32_C(0x9D824999), UINT32_C(0xAAA65014),
+ UINT32_C(0x21760F2E), UINT32_C(0x41AB113B), UINT32_C(0x1CAE260A),
+ UINT32_C(0x23E646C5), UINT32_C(0x68DC5159), UINT32_C(0x08062C8F) } },
+ },
+ {
+ { { UINT32_C(0x204BE028), UINT32_C(0x2E7D0A16), UINT32_C(0xD0E41851),
+ UINT32_C(0x4F1D082E), UINT32_C(0x3EB317F9), UINT32_C(0x15F1DDC6),
+ UINT32_C(0x5ADF71D7), UINT32_C(0xF0275071), UINT32_C(0xEE858BC3),
+ UINT32_C(0x2CE33C2E), UINT32_C(0xDA73B71A), UINT32_C(0xA24C76D1) },
+ { UINT32_C(0x6C70C483), UINT32_C(0x9EF6A70A), UINT32_C(0x05CF9612),
+ UINT32_C(0xEFCF1705), UINT32_C(0x7502DE64), UINT32_C(0x9F5BF5A6),
+ UINT32_C(0xA4701973), UINT32_C(0xD11122A1), UINT32_C(0xA2EA7B24),
+ UINT32_C(0x82CFAAC2), UINT32_C(0x0A4582E1), UINT32_C(0x6CAD67CC) } },
+ { { UINT32_C(0xB4DC8600), UINT32_C(0x597A26FF), UINT32_C(0xF9288555),
+ UINT32_C(0x264A09F3), UINT32_C(0x5C27F5F6), UINT32_C(0x0B06AFF6),
+ UINT32_C(0xD8D544E6), UINT32_C(0xCE5AB665), UINT32_C(0x99275C32),
+ UINT32_C(0x92F031BE), UINT32_C(0xF42E0E7C), UINT32_C(0xAF51C5BB) },
+ { UINT32_C(0x1E37B36D), UINT32_C(0x5BB28B06), UINT32_C(0x8473543A),
+ UINT32_C(0x583FBA6A), UINT32_C(0xF93FB7DC), UINT32_C(0xE73FD299),
+ UINT32_C(0x6E2CCAD9), UINT32_C(0xFCD999A8), UINT32_C(0x334D4F57),
+ UINT32_C(0xB8C8A6DF), UINT32_C(0x9A2ACC9B), UINT32_C(0x5ADB28DD) } },
+ { { UINT32_C(0x111792B9), UINT32_C(0x5ADF3D9A), UINT32_C(0x4F1E0D09),
+ UINT32_C(0x1C77A305), UINT32_C(0xA82D3736), UINT32_C(0xF9FBCE33),
+ UINT32_C(0x718C8AA3), UINT32_C(0xF307823E), UINT32_C(0x416CCF69),
+ UINT32_C(0x860578CF), UINT32_C(0x1EF8465B), UINT32_C(0xB942ADD8) },
+ { UINT32_C(0xCD9472E1), UINT32_C(0x9EE0CF97), UINT32_C(0xB01528A8),
+ UINT32_C(0xE6792EEF), UINT32_C(0xC09DA90B), UINT32_C(0xF99B9A8D),
+ UINT32_C(0xCBF3CCB8), UINT32_C(0x1F521C2D), UINT32_C(0x91A62632),
+ UINT32_C(0x6BF66948), UINT32_C(0x854FE9DA), UINT32_C(0xCC7A9CEB) } },
+ { { UINT32_C(0x491CCB92), UINT32_C(0x46303171), UINT32_C(0x2771235B),
+ UINT32_C(0xA80A8C0D), UINT32_C(0xF172C7CF), UINT32_C(0xD8E497FF),
+ UINT32_C(0x35B193CF), UINT32_C(0x7F7009D7), UINT32_C(0xF19DF4BC),
+ UINT32_C(0x6B9FD3F7), UINT32_C(0xB46F1E37), UINT32_C(0xADA548C3) },
+ { UINT32_C(0xC7A20270), UINT32_C(0x87C6EAA9), UINT32_C(0xAE78EF99),
+ UINT32_C(0xEF2245D6), UINT32_C(0x539EAB95), UINT32_C(0x2A121042),
+ UINT32_C(0x79B8F5CC), UINT32_C(0x29A6D5D7), UINT32_C(0xB77840DC),
+ UINT32_C(0x33803A10), UINT32_C(0x11A6A30F), UINT32_C(0xFEDD3A70) } },
+ { { UINT32_C(0x142403D1), UINT32_C(0xFA070E22), UINT32_C(0x15C6F7F5),
+ UINT32_C(0x68FF3160), UINT32_C(0x223A0CE8), UINT32_C(0xE09F04E6),
+ UINT32_C(0x53E14183), UINT32_C(0x22BBD018), UINT32_C(0xCF45B75B),
+ UINT32_C(0x35D9FAFC), UINT32_C(0x7ECEEC88), UINT32_C(0x3A34819D) },
+ { UINT32_C(0xD33262D2), UINT32_C(0xD9CF7568), UINT32_C(0x841D1505),
+ UINT32_C(0x431036D5), UINT32_C(0x9EB2A79A), UINT32_C(0x0C800565),
+ UINT32_C(0x5F7EDC6A), UINT32_C(0x8E77D9F0), UINT32_C(0x65E800AA),
+ UINT32_C(0x19E12D05), UINT32_C(0xB7784E7C), UINT32_C(0x335C8D36) } },
+ { { UINT32_C(0x6484FD40), UINT32_C(0x8B2FC4E9), UINT32_C(0xA35D24EA),
+ UINT32_C(0xEE702764), UINT32_C(0xB871C3F3), UINT32_C(0x15B28AC7),
+ UINT32_C(0xE097047F), UINT32_C(0x805B4048), UINT32_C(0x647CAD2F),
+ UINT32_C(0xD6F1B8DF), UINT32_C(0xDC7DD67F), UINT32_C(0xF1D5B458) },
+ { UINT32_C(0x25148803), UINT32_C(0x324C529C), UINT32_C(0x21274FAF),
+ UINT32_C(0xF6185EBE), UINT32_C(0x95148B55), UINT32_C(0xAF14751E),
+ UINT32_C(0x28F284F4), UINT32_C(0x283ED89D), UINT32_C(0x4CBEBF1A),
+ UINT32_C(0x93AD20E7), UINT32_C(0x882935E1), UINT32_C(0x5F6EC65D) } },
+ { { UINT32_C(0xA4DCEFE9), UINT32_C(0xE222EBA4), UINT32_C(0xEC1CEB74),
+ UINT32_C(0x63AD235F), UINT32_C(0xE05B18E7), UINT32_C(0x2E0BF749),
+ UINT32_C(0xB48BDD87), UINT32_C(0x547BD050), UINT32_C(0xF5AA2FC4),
+ UINT32_C(0x0490C970), UINT32_C(0x2B431390), UINT32_C(0xCED5E4CF) },
+ { UINT32_C(0x51D2898E), UINT32_C(0x07D82704), UINT32_C(0x083B57D4),
+ UINT32_C(0x44B72442), UINT32_C(0x5037FCE8), UINT32_C(0xA4ADA230),
+ UINT32_C(0x50510DA6), UINT32_C(0x55F7905E), UINT32_C(0x8D890A98),
+ UINT32_C(0xD8EE724F), UINT32_C(0x11B85640), UINT32_C(0x925A8E7C) } },
+ { { UINT32_C(0x1CA459ED), UINT32_C(0x5BFA10CD), UINT32_C(0x6DCF56BF),
+ UINT32_C(0x593F085A), UINT32_C(0xC0579C3E), UINT32_C(0xE6F0AD9B),
+ UINT32_C(0x2527C1AD), UINT32_C(0xC11C95A2), UINT32_C(0xCF1CB8B3),
+ UINT32_C(0x7CFA71E1), UINT32_C(0x1D6DC79D), UINT32_C(0xEDCFF833) },
+ { UINT32_C(0x432521C9), UINT32_C(0x581C4BBE), UINT32_C(0x144E11A0),
+ UINT32_C(0xBF620096), UINT32_C(0xBE3A107B), UINT32_C(0x54C38B71),
+ UINT32_C(0xE2606EC0), UINT32_C(0xED555E37), UINT32_C(0xD721D034),
+ UINT32_C(0x3FB148B8), UINT32_C(0x0091BC90), UINT32_C(0x79D53DAD) } },
+ { { UINT32_C(0xB7082C80), UINT32_C(0xE32068C5), UINT32_C(0x7A144E22),
+ UINT32_C(0x4140FFD2), UINT32_C(0x9EDD9E86), UINT32_C(0x5811D2F0),
+ UINT32_C(0xC572C465), UINT32_C(0xCDD79B5F), UINT32_C(0xC97BF450),
+ UINT32_C(0x3563FED1), UINT32_C(0xF2CE5C9C), UINT32_C(0x985C1444) },
+ { UINT32_C(0x99950F1C), UINT32_C(0x260AE797), UINT32_C(0x765E9DED),
+ UINT32_C(0x659F4F40), UINT32_C(0x2E3BC286), UINT32_C(0x2A412D66),
+ UINT32_C(0xF87E0C82), UINT32_C(0xE865E62C), UINT32_C(0x6C05E7D7),
+ UINT32_C(0xD63D3A9A), UINT32_C(0x8686F89A), UINT32_C(0x96725D67) } },
+ { { UINT32_C(0xAB7EA0F5), UINT32_C(0xC99A5E4C), UINT32_C(0xC5393FA9),
+ UINT32_C(0xC9860A1A), UINT32_C(0x8FDEEFC0), UINT32_C(0x9ED83CEE),
+ UINT32_C(0x5ED6869A), UINT32_C(0xE3EA8B4C), UINT32_C(0xD2EED3A9),
+ UINT32_C(0x89A85463), UINT32_C(0xE421A622), UINT32_C(0x2CD91B6D) },
+ { UINT32_C(0x2C91C41D), UINT32_C(0x6FEC1EF3), UINT32_C(0x8171037D),
+ UINT32_C(0xB1540D1F), UINT32_C(0x1C010E5B), UINT32_C(0x4FE4991A),
+ UINT32_C(0xFC1C7368), UINT32_C(0x28A3469F), UINT32_C(0xAF118781),
+ UINT32_C(0xE1EEECD1), UINT32_C(0x99EF3531), UINT32_C(0x1BCCB977) } },
+ { { UINT32_C(0xC4DAB7B8), UINT32_C(0x63D3B638), UINT32_C(0x3F7F5BAB),
+ UINT32_C(0xD92133B6), UINT32_C(0x09FB6069), UINT32_C(0x2573EE20),
+ UINT32_C(0x890A1686), UINT32_C(0x771FABDF), UINT32_C(0xA77AFFF5),
+ UINT32_C(0x1D0BA21F), UINT32_C(0xBA3DD2C0), UINT32_C(0x83145FCC) },
+ { UINT32_C(0x2D115C20), UINT32_C(0xFA073A81), UINT32_C(0x19176F27),
+ UINT32_C(0x6AB7A9D3), UINT32_C(0x9AC639EE), UINT32_C(0xAF62CF93),
+ UINT32_C(0x2CCD1319), UINT32_C(0xF73848B9), UINT32_C(0x3C71659D),
+ UINT32_C(0x3B613234), UINT32_C(0x10AB3826), UINT32_C(0xF8E0011C) } },
+ { { UINT32_C(0x0282FFA5), UINT32_C(0x0501F036), UINT32_C(0xD9E0F15A),
+ UINT32_C(0xC39A5CF4), UINT32_C(0x9A3D1F3C), UINT32_C(0x48D8C729),
+ UINT32_C(0x64E18EDA), UINT32_C(0xB5FC136B), UINT32_C(0x7E58FEF0),
+ UINT32_C(0xE81B53D9), UINT32_C(0xF7B0F28D), UINT32_C(0x0D534055) },
+ { UINT32_C(0x7A80619B), UINT32_C(0x47B8DE12), UINT32_C(0x81F9E55D),
+ UINT32_C(0x60E2A2B3), UINT32_C(0xCF564CC5), UINT32_C(0x6E9624D7),
+ UINT32_C(0x6BDEDFFF), UINT32_C(0xFDF18A21), UINT32_C(0xC0D5FC82),
+ UINT32_C(0x3787DE38), UINT32_C(0x497A6B11), UINT32_C(0xCBCAA347) } },
+ { { UINT32_C(0xB226465A), UINT32_C(0x6E7EF35E), UINT32_C(0x5F8A2BAF),
+ UINT32_C(0x4B469919), UINT32_C(0x1120D93F), UINT32_C(0x44B3A3CF),
+ UINT32_C(0x68F34AD1), UINT32_C(0xB052C8B6), UINT32_C(0xEF7632DD),
+ UINT32_C(0x27EC574B), UINT32_C(0x685DE26F), UINT32_C(0xAEBEA108) },
+ { UINT32_C(0xE39424B6), UINT32_C(0xDA33236B), UINT32_C(0xEBCC22AD),
+ UINT32_C(0xB1BD94A9), UINT32_C(0x2CDFB5D5), UINT32_C(0x6DDEE6CC),
+ UINT32_C(0x6F14069A), UINT32_C(0xBDAED927), UINT32_C(0x2A247CB7),
+ UINT32_C(0x2ADE427C), UINT32_C(0xED156A40), UINT32_C(0xCE96B436) } },
+ { { UINT32_C(0x81F3F819), UINT32_C(0xDDDCA360), UINT32_C(0xD419B96A),
+ UINT32_C(0x4AF4A49F), UINT32_C(0x7CB966B9), UINT32_C(0x746C6525),
+ UINT32_C(0x6F610023), UINT32_C(0x01E39088), UINT32_C(0x98DD33FC),
+ UINT32_C(0x05ECB38D), UINT32_C(0x8F84EDF4), UINT32_C(0x962B971B) },
+ { UINT32_C(0x6A6F2602), UINT32_C(0xEB32C0A5), UINT32_C(0x562D60F2),
+ UINT32_C(0xF026AF71), UINT32_C(0x84615FAB), UINT32_C(0xA9E246BF),
+ UINT32_C(0x75DBAE01), UINT32_C(0xAD967092), UINT32_C(0x3ECE5D07),
+ UINT32_C(0xBF97C79B), UINT32_C(0x74EAA3D3), UINT32_C(0xE06266C7) } },
+ { { UINT32_C(0x2E6DBB6E), UINT32_C(0x161A0157), UINT32_C(0x60FA8F47),
+ UINT32_C(0xB8AF4904), UINT32_C(0x00197F22), UINT32_C(0xE4336C44),
+ UINT32_C(0x9CEDCE0E), UINT32_C(0xF811AFFA), UINT32_C(0xF94C2EF1),
+ UINT32_C(0xB1DD7685), UINT32_C(0xCA957BB0), UINT32_C(0xEEDC0F4B) },
+ { UINT32_C(0x4AA76BB1), UINT32_C(0xD319FD57), UINT32_C(0x16CD7CCB),
+ UINT32_C(0xB3525D7C), UINT32_C(0xA97DD072), UINT32_C(0x7B22DA9C),
+ UINT32_C(0x38A83E71), UINT32_C(0x99DB84BD), UINT32_C(0xC0EDD8BE),
+ UINT32_C(0x4939BC8D), UINT32_C(0x903A932C), UINT32_C(0x06D524EA) } },
+ { { UINT32_C(0x0E31F639), UINT32_C(0x4BC950EC), UINT32_C(0x6016BE30),
+ UINT32_C(0xB7ABD3DC), UINT32_C(0x6703DAD0), UINT32_C(0x3B0F4473),
+ UINT32_C(0x0AC1C4EA), UINT32_C(0xCC405F8B), UINT32_C(0x176C3FEE),
+ UINT32_C(0x9BED5E57), UINT32_C(0x36AE36C2), UINT32_C(0xF4524810) },
+ { UINT32_C(0x15D7B503), UINT32_C(0xC1EDBB83), UINT32_C(0xE30F3657),
+ UINT32_C(0x943B1156), UINT32_C(0x98377805), UINT32_C(0x984E9EEF),
+ UINT32_C(0x36CF1DEB), UINT32_C(0x291AE7AC), UINT32_C(0xA9F66DF3),
+ UINT32_C(0xFED8748C), UINT32_C(0xFEA8FA5D), UINT32_C(0xECA758BB) } },
+ },
+ {
+ { { UINT32_C(0x2DD1B249), UINT32_C(0xACC787EF), UINT32_C(0xD82976F1),
+ UINT32_C(0x736E1030), UINT32_C(0xA01B3649), UINT32_C(0x0A6940FA),
+ UINT32_C(0xC42341E7), UINT32_C(0xE00B926B), UINT32_C(0xDE8FFD6C),
+ UINT32_C(0x911508D0), UINT32_C(0x5276B0CB), UINT32_C(0x4DCF8D46) },
+ { UINT32_C(0xCC3CAD8D), UINT32_C(0x23AD0A90), UINT32_C(0xADED962A),
+ UINT32_C(0x2A92E54C), UINT32_C(0xF231BFAF), UINT32_C(0x93FBEC4D),
+ UINT32_C(0x4798987A), UINT32_C(0x9544BC77), UINT32_C(0x08E29F60),
+ UINT32_C(0x48084E25), UINT32_C(0x32DE5869), UINT32_C(0x0C0D2F43) } },
+ { { UINT32_C(0x3A9ABC13), UINT32_C(0x6778F970), UINT32_C(0x3D2B166B),
+ UINT32_C(0xFD014FAC), UINT32_C(0x3C6FED60), UINT32_C(0x1FE4FC78),
+ UINT32_C(0xAA7C69C5), UINT32_C(0x04295FA8), UINT32_C(0x7C123175),
+ UINT32_C(0xA01DE56D), UINT32_C(0x3D9A713A), UINT32_C(0x0FA0D3A8) },
+ { UINT32_C(0xE3E08ADD), UINT32_C(0xA7A6E5E3), UINT32_C(0x1AC58F85),
+ UINT32_C(0xBD77E94B), UINT32_C(0xB7321A9C), UINT32_C(0x078F6FD2),
+ UINT32_C(0x911EF6D9), UINT32_C(0x9564601E), UINT32_C(0x415C6BEF),
+ UINT32_C(0x31C5C1B2), UINT32_C(0xD3212C62), UINT32_C(0xE6C0C91E) } },
+ { { UINT32_C(0x0D16022F), UINT32_C(0xBA7BD23C), UINT32_C(0x198BE288),
+ UINT32_C(0xE9CF4750), UINT32_C(0x47DEEC65), UINT32_C(0x304E3169),
+ UINT32_C(0x96EEB288), UINT32_C(0xCF65B41F), UINT32_C(0x927E9E3B),
+ UINT32_C(0x17E99C17), UINT32_C(0xF6630A80), UINT32_C(0x82225546) },
+ { UINT32_C(0xCA067BD9), UINT32_C(0x15122B8A), UINT32_C(0xB77B4E98),
+ UINT32_C(0xE2673205), UINT32_C(0x9407CA63), UINT32_C(0x13037565),
+ UINT32_C(0x8B621602), UINT32_C(0x53624F54), UINT32_C(0xEAE4BD06),
+ UINT32_C(0x96AF2CB1), UINT32_C(0x8FA20829), UINT32_C(0x576ECD1C) } },
+ { { UINT32_C(0x7E02D2D0), UINT32_C(0xA551CE10), UINT32_C(0x9D13DBC7),
+ UINT32_C(0x1584ED24), UINT32_C(0x4DA7B6D8), UINT32_C(0x082017AD),
+ UINT32_C(0xE054BC48), UINT32_C(0x81918A8F), UINT32_C(0x572DC384),
+ UINT32_C(0x677DB48E), UINT32_C(0x6155484C), UINT32_C(0x2EF82296) },
+ { UINT32_C(0x41B9C231), UINT32_C(0xC3DB14C6), UINT32_C(0x4A766192),
+ UINT32_C(0x910A87D1), UINT32_C(0x10AB8E0F), UINT32_C(0x93D5CC86),
+ UINT32_C(0xAE57CA1B), UINT32_C(0x4194D548), UINT32_C(0x267FC37A),
+ UINT32_C(0xFAF3A1D6), UINT32_C(0x13B87C97), UINT32_C(0x70EC2364) } },
+ { { UINT32_C(0x5E12756A), UINT32_C(0x064B565B), UINT32_C(0xAE49C98E),
+ UINT32_C(0x953B7BD1), UINT32_C(0xF7001D91), UINT32_C(0xE0CE8284),
+ UINT32_C(0xF31108D0), UINT32_C(0x1546060B), UINT32_C(0x6779B6E2),
+ UINT32_C(0xDBC2C3F4), UINT32_C(0xE0DD07CF), UINT32_C(0x157AA47D) },
+ { UINT32_C(0xF23B261E), UINT32_C(0xBF4A1C6F), UINT32_C(0x654F4BE5),
+ UINT32_C(0x5B8EED30), UINT32_C(0x6B20CCD8), UINT32_C(0xDF5896D3),
+ UINT32_C(0x559ED23D), UINT32_C(0x56920E2C), UINT32_C(0xFA6E3E27),
+ UINT32_C(0x901F342E), UINT32_C(0x896CA082), UINT32_C(0x745C747C) } },
+ { { UINT32_C(0x2944EC84), UINT32_C(0xDBCCD575), UINT32_C(0xA5FF65FE),
+ UINT32_C(0x54A2A935), UINT32_C(0x1A1319B6), UINT32_C(0x88C92A5E),
+ UINT32_C(0x82DA96C1), UINT32_C(0x9537C28F), UINT32_C(0x35F93C46),
+ UINT32_C(0xB6836474), UINT32_C(0x65B0846C), UINT32_C(0xEC526A1D) },
+ { UINT32_C(0xF382C412), UINT32_C(0x6F12AFBD), UINT32_C(0x9E99FA06),
+ UINT32_C(0x5EBC81D8), UINT32_C(0x869B93BD), UINT32_C(0x97B5D672),
+ UINT32_C(0x377E12AA), UINT32_C(0x2983C310), UINT32_C(0x24D681EA),
+ UINT32_C(0x48759681), UINT32_C(0x287FD767), UINT32_C(0x1E0BD106) } },
+ { { UINT32_C(0x7231247F), UINT32_C(0x0AC75A3E), UINT32_C(0xEF27AD3A),
+ UINT32_C(0x65C20DE6), UINT32_C(0xBD02EEE5), UINT32_C(0x87EB6CF1),
+ UINT32_C(0x00147E03), UINT32_C(0x264ACA7A), UINT32_C(0xAE2A9437),
+ UINT32_C(0xEBC78581), UINT32_C(0x6316BFA5), UINT32_C(0x9929964E) },
+ { UINT32_C(0x9AF207EF), UINT32_C(0xDC09E040), UINT32_C(0x0C9D8658),
+ UINT32_C(0x3ECFFE2D), UINT32_C(0xDFB43D38), UINT32_C(0x547EA735),
+ UINT32_C(0xD04B1B20), UINT32_C(0x5485247B), UINT32_C(0xBFD8B609),
+ UINT32_C(0xB18D3F02), UINT32_C(0xCCE73705), UINT32_C(0xEEB3E805) } },
+ { { UINT32_C(0xDB93850F), UINT32_C(0xDAB1A525), UINT32_C(0x8365B7D5),
+ UINT32_C(0x18ADAA23), UINT32_C(0x113FC8C7), UINT32_C(0x58485C90),
+ UINT32_C(0x348AD323), UINT32_C(0x80C3DBB9), UINT32_C(0xE16ADCA1),
+ UINT32_C(0xAF892FB5), UINT32_C(0x979F005A), UINT32_C(0x2183C879) },
+ { UINT32_C(0x0643A99E), UINT32_C(0x20FA1A94), UINT32_C(0x1A1609CB),
+ UINT32_C(0x2741221C), UINT32_C(0x3C2FBDDC), UINT32_C(0x1C1687E5),
+ UINT32_C(0xD420D6CF), UINT32_C(0xDCCF329E), UINT32_C(0x2B7197D1),
+ UINT32_C(0x75D5577D), UINT32_C(0xC8729D9C), UINT32_C(0x4C3C3875) } },
+ { { UINT32_C(0xE5CBDCB9), UINT32_C(0x5E79F995), UINT32_C(0xA742FCC7),
+ UINT32_C(0x03139824), UINT32_C(0x239EF4A1), UINT32_C(0x6D0C214A),
+ UINT32_C(0x401A2944), UINT32_C(0x53A27952), UINT32_C(0xC10BCDF0),
+ UINT32_C(0xF42A1B34), UINT32_C(0x7CF38061), UINT32_C(0x426BAA43) },
+ { UINT32_C(0xA96AD0C8), UINT32_C(0x16A53139), UINT32_C(0x6BAD5301),
+ UINT32_C(0x627F1D31), UINT32_C(0x4ACCD627), UINT32_C(0x5AF74877),
+ UINT32_C(0xB55B0FB8), UINT32_C(0x3C58A1C5), UINT32_C(0xF4399A6A),
+ UINT32_C(0xFAA57B91), UINT32_C(0xC28094B8), UINT32_C(0xBAD283FB) } },
+ { { UINT32_C(0x83E10A93), UINT32_C(0xBA32AC61), UINT32_C(0xEC06BDB0),
+ UINT32_C(0x1C91F6B4), UINT32_C(0x65F60C93), UINT32_C(0x42E6CFBC),
+ UINT32_C(0x2C0CDCBE), UINT32_C(0xEFE33BC8), UINT32_C(0x4D6414F2),
+ UINT32_C(0xE0FE1D09), UINT32_C(0x76FA5C5B), UINT32_C(0x4C112316) },
+ { UINT32_C(0x2E26200A), UINT32_C(0x812C1DC6), UINT32_C(0xEE879D25),
+ UINT32_C(0xD6C413C5), UINT32_C(0xBCA8BAFE), UINT32_C(0xBEADE255),
+ UINT32_C(0xCE2BA0E7), UINT32_C(0x0EAF4AE2), UINT32_C(0xC4F4408A),
+ UINT32_C(0x66E9FFB0), UINT32_C(0x9782C7AD), UINT32_C(0xB36A86D7) } },
+ { { UINT32_C(0xBAD8D1C7), UINT32_C(0x10FCD1F4), UINT32_C(0x4502F645),
+ UINT32_C(0xC903816A), UINT32_C(0xA503B895), UINT32_C(0x7FAC1CC1),
+ UINT32_C(0x0778900C), UINT32_C(0x8BCD6041), UINT32_C(0x5BCF2784),
+ UINT32_C(0x5A5F2202), UINT32_C(0x10EDB896), UINT32_C(0x9B157E87) },
+ { UINT32_C(0xF602A8B1), UINT32_C(0x4C58DA69), UINT32_C(0x59EC9D7E),
+ UINT32_C(0xD55132F8), UINT32_C(0xA26D4870), UINT32_C(0x155B719A),
+ UINT32_C(0x36441746), UINT32_C(0x25AAFCA3), UINT32_C(0xDD3B6B30),
+ UINT32_C(0x01F83338), UINT32_C(0x551917CC), UINT32_C(0xD52BB5C1) } },
+ { { UINT32_C(0x6135066A), UINT32_C(0xA0B6207B), UINT32_C(0x2AEC8CBD),
+ UINT32_C(0xB3409F84), UINT32_C(0x19D87DF0), UINT32_C(0x5EBFD436),
+ UINT32_C(0xE8526DE2), UINT32_C(0xCB4C209B), UINT32_C(0x21E1A230),
+ UINT32_C(0xD764085B), UINT32_C(0x0899964A), UINT32_C(0x96F91554) },
+ { UINT32_C(0xA57D122A), UINT32_C(0xB0BEC8EF), UINT32_C(0x5D9D0B33),
+ UINT32_C(0xC572EC56), UINT32_C(0xCFA7C72C), UINT32_C(0xEBE2A780),
+ UINT32_C(0x9EF3295C), UINT32_C(0x52D40CDB), UINT32_C(0x0DE74DFE),
+ UINT32_C(0x64004584), UINT32_C(0xC0809716), UINT32_C(0xA6846432) } },
+ { { UINT32_C(0x02C979BC), UINT32_C(0x0D09E8CD), UINT32_C(0x409F4F2A),
+ UINT32_C(0xEC4B21F6), UINT32_C(0x13FB07CA), UINT32_C(0x68125C70),
+ UINT32_C(0x6FDFA72A), UINT32_C(0x1C4CFC17), UINT32_C(0x04539FCD),
+ UINT32_C(0xC9E71B9E), UINT32_C(0x8BA70797), UINT32_C(0x94B7103D) },
+ { UINT32_C(0xB33FDE83), UINT32_C(0x6B81E82F), UINT32_C(0xEABAFD4B),
+ UINT32_C(0x7CA9A8CA), UINT32_C(0xEAB819CE), UINT32_C(0xADD85A67),
+ UINT32_C(0x98E99FFC), UINT32_C(0xAEC25483), UINT32_C(0x274A07B6),
+ UINT32_C(0x938D6440), UINT32_C(0x564A6AA0), UINT32_C(0x0A5C7097) } },
+ { { UINT32_C(0x2F4FCEB6), UINT32_C(0x7284FF50), UINT32_C(0x78D0D5CB),
+ UINT32_C(0x0A28715A), UINT32_C(0xBFCE187C), UINT32_C(0xE70B7014),
+ UINT32_C(0x7A17148D), UINT32_C(0xA6B538F5), UINT32_C(0xDD427166),
+ UINT32_C(0x1DAB07C9), UINT32_C(0x149D23CA), UINT32_C(0x5C5578B0) },
+ { UINT32_C(0x875B5EDE), UINT32_C(0x875E2056), UINT32_C(0x02C893B9),
+ UINT32_C(0xCBF44B6D), UINT32_C(0x5C2993FB), UINT32_C(0x5715A77E),
+ UINT32_C(0x3410597E), UINT32_C(0xAF328146), UINT32_C(0x42DC49DF),
+ UINT32_C(0x65DF418F), UINT32_C(0xA9EE52F6), UINT32_C(0x7AC9C720) } },
+ { { UINT32_C(0x62955486), UINT32_C(0xB1C9AA07), UINT32_C(0x245061D7),
+ UINT32_C(0xCBF35BE3), UINT32_C(0x8CF4DDC0), UINT32_C(0x811E1BD3),
+ UINT32_C(0x948F7C84), UINT32_C(0xD9D4589C), UINT32_C(0xCB0F996D),
+ UINT32_C(0x30D09A0F), UINT32_C(0x590E7704), UINT32_C(0x1A1B3B7A) },
+ { UINT32_C(0x2082768D), UINT32_C(0xA848E349), UINT32_C(0x9A249DF4),
+ UINT32_C(0x9FEBD492), UINT32_C(0x5F20439A), UINT32_C(0x503420AF),
+ UINT32_C(0x8E2BFCD4), UINT32_C(0x0CBE52B6), UINT32_C(0x118C91B2),
+ UINT32_C(0xB1D5E261), UINT32_C(0x71D8F2BC), UINT32_C(0x93CFF6DA) } },
+ { { UINT32_C(0x8AB58944), UINT32_C(0x5F5BC06B), UINT32_C(0x4979882D),
+ UINT32_C(0xE4BED538), UINT32_C(0xD79B0EB1), UINT32_C(0x57C30362),
+ UINT32_C(0xEF7C56D8), UINT32_C(0x391AE2C1), UINT32_C(0xADD98625),
+ UINT32_C(0x28BC2E97), UINT32_C(0x1B257107), UINT32_C(0xFA8E86B8) },
+ { UINT32_C(0x6118C715), UINT32_C(0x5E4859F8), UINT32_C(0x524C71DD),
+ UINT32_C(0x91C83324), UINT32_C(0x6D2F5E6D), UINT32_C(0xFB209243),
+ UINT32_C(0x2A900A43), UINT32_C(0x6B4FE21F), UINT32_C(0x32A73C1F),
+ UINT32_C(0x241F75D6), UINT32_C(0x5AE89613), UINT32_C(0xF5BC4629) } },
+ }
+};
+
+/*-
+ * Finite field inversion.
+ * Computed with Bernstein-Yang algorithm.
+ * https://tches.iacr.org/index.php/TCHES/article/view/8298
+ * Based on https://github.com/mit-plv/fiat-crypto/tree/master/inversion/c
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp384r1_inv(fe_t output, const fe_t t1)
+{
+ int i;
+ fe_t v1, r1, v2;
+ limb_t *r2 = output;
+ limb_t f1[LIMB_CNT + 1], g1[LIMB_CNT + 1], f2[LIMB_CNT + 1],
+ g2[LIMB_CNT + 1];
+ limb_t d2, d1 = 1;
+
+ fe_copy(g1, t1);
+ g1[LIMB_CNT] = 0;
+ fe_copy(f1, const_psat);
+ f1[LIMB_CNT] = 0;
+ fe_copy(r1, const_one);
+ fe_set_zero(v1);
+
+ /* 1110 divstep iterations */
+ for (i = 0; i < 555; i++) {
+ fiat_secp384r1_divstep(&d2, f2, g2, v2, r2, d1, f1, g1, v1, r1);
+ fiat_secp384r1_divstep(&d1, f1, g1, v1, r1, d2, f2, g2, v2, r2);
+ }
+
+ fiat_secp384r1_opp(output, v1);
+ fiat_secp384r1_selectznz(output, f1[LIMB_CNT] >> (LIMB_BITS - 1), v1,
+ output);
+ fiat_secp384r1_mul(output, output, const_divstep);
+}
+
+/*-
+ * Q := 2P, both projective, Q and P same pointers OK
+ * Autogenerated: op3/dbl_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 6
+ * ASSERT: a = -3
+ */
+static void
+point_double(pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X = P->X;
+ const limb_t *Y = P->Y;
+ const limb_t *Z = P->Z;
+ limb_t *X3 = Q->X;
+ limb_t *Y3 = Q->Y;
+ limb_t *Z3 = Q->Z;
+
+ /* the curve arith formula */
+ fiat_secp384r1_square(t0, X);
+ fiat_secp384r1_square(t1, Y);
+ fiat_secp384r1_square(t2, Z);
+ fiat_secp384r1_mul(t3, X, Y);
+ fiat_secp384r1_add(t3, t3, t3);
+ fiat_secp384r1_mul(t4, Y, Z);
+ fiat_secp384r1_mul(Z3, X, Z);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+ fiat_secp384r1_mul(Y3, b, t2);
+ fiat_secp384r1_sub(Y3, Y3, Z3);
+ fiat_secp384r1_add(X3, Y3, Y3);
+ fiat_secp384r1_add(Y3, X3, Y3);
+ fiat_secp384r1_sub(X3, t1, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_mul(Y3, X3, Y3);
+ fiat_secp384r1_mul(X3, X3, t3);
+ fiat_secp384r1_add(t3, t2, t2);
+ fiat_secp384r1_add(t2, t2, t3);
+ fiat_secp384r1_mul(Z3, b, Z3);
+ fiat_secp384r1_sub(Z3, Z3, t2);
+ fiat_secp384r1_sub(Z3, Z3, t0);
+ fiat_secp384r1_add(t3, Z3, Z3);
+ fiat_secp384r1_add(Z3, Z3, t3);
+ fiat_secp384r1_add(t3, t0, t0);
+ fiat_secp384r1_add(t0, t3, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t0, t0, Z3);
+ fiat_secp384r1_add(Y3, Y3, t0);
+ fiat_secp384r1_add(t0, t4, t4);
+ fiat_secp384r1_mul(Z3, t0, Z3);
+ fiat_secp384r1_sub(X3, X3, Z3);
+ fiat_secp384r1_mul(Z3, t0, t1);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+ fiat_secp384r1_add(Z3, Z3, Z3);
+}
+
+/*-
+ * R := Q + P where R and Q are projective, P affine.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_mixed.op3
+ * https://eprint.iacr.org/2015/1060 Alg 5
+ * ASSERT: a = -3
+ */
+static void
+point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ fe_t X3;
+ fe_t Y3;
+ fe_t Z3;
+ limb_t nz;
+
+ /* check P for affine inf */
+ fiat_secp384r1_nonzero(&nz, P->Y);
+
+ /* the curve arith formula */
+ fiat_secp384r1_mul(t0, X1, X2);
+ fiat_secp384r1_mul(t1, Y1, Y2);
+ fiat_secp384r1_add(t3, X2, Y2);
+ fiat_secp384r1_add(t4, X1, Y1);
+ fiat_secp384r1_mul(t3, t3, t4);
+ fiat_secp384r1_add(t4, t0, t1);
+ fiat_secp384r1_sub(t3, t3, t4);
+ fiat_secp384r1_mul(t4, Y2, Z1);
+ fiat_secp384r1_add(t4, t4, Y1);
+ fiat_secp384r1_mul(Y3, X2, Z1);
+ fiat_secp384r1_add(Y3, Y3, X1);
+ fiat_secp384r1_mul(Z3, b, Z1);
+ fiat_secp384r1_sub(X3, Y3, Z3);
+ fiat_secp384r1_add(Z3, X3, X3);
+ fiat_secp384r1_add(X3, X3, Z3);
+ fiat_secp384r1_sub(Z3, t1, X3);
+ fiat_secp384r1_add(X3, t1, X3);
+ fiat_secp384r1_mul(Y3, b, Y3);
+ fiat_secp384r1_add(t1, Z1, Z1);
+ fiat_secp384r1_add(t2, t1, Z1);
+ fiat_secp384r1_sub(Y3, Y3, t2);
+ fiat_secp384r1_sub(Y3, Y3, t0);
+ fiat_secp384r1_add(t1, Y3, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_add(t1, t0, t0);
+ fiat_secp384r1_add(t0, t1, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t1, t4, Y3);
+ fiat_secp384r1_mul(t2, t0, Y3);
+ fiat_secp384r1_mul(Y3, X3, Z3);
+ fiat_secp384r1_add(Y3, Y3, t2);
+ fiat_secp384r1_mul(X3, t3, X3);
+ fiat_secp384r1_sub(X3, X3, t1);
+ fiat_secp384r1_mul(Z3, t4, Z3);
+ fiat_secp384r1_mul(t1, t3, t0);
+ fiat_secp384r1_add(Z3, Z3, t1);
+
+ /* if P is inf, throw all that away and take Q */
+ fiat_secp384r1_selectznz(R->X, nz, Q->X, X3);
+ fiat_secp384r1_selectznz(R->Y, nz, Q->Y, Y3);
+ fiat_secp384r1_selectznz(R->Z, nz, Q->Z, Z3);
+}
+
+/*-
+ * R := Q + P all projective.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 4
+ * ASSERT: a = -3
+ */
+static void
+point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4, t5;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ const limb_t *Z2 = P->Z;
+ limb_t *X3 = R->X;
+ limb_t *Y3 = R->Y;
+ limb_t *Z3 = R->Z;
+
+ /* the curve arith formula */
+ fiat_secp384r1_mul(t0, X1, X2);
+ fiat_secp384r1_mul(t1, Y1, Y2);
+ fiat_secp384r1_mul(t2, Z1, Z2);
+ fiat_secp384r1_add(t3, X1, Y1);
+ fiat_secp384r1_add(t4, X2, Y2);
+ fiat_secp384r1_mul(t3, t3, t4);
+ fiat_secp384r1_add(t4, t0, t1);
+ fiat_secp384r1_sub(t3, t3, t4);
+ fiat_secp384r1_add(t4, Y1, Z1);
+ fiat_secp384r1_add(t5, Y2, Z2);
+ fiat_secp384r1_mul(t4, t4, t5);
+ fiat_secp384r1_add(t5, t1, t2);
+ fiat_secp384r1_sub(t4, t4, t5);
+ fiat_secp384r1_add(X3, X1, Z1);
+ fiat_secp384r1_add(Y3, X2, Z2);
+ fiat_secp384r1_mul(X3, X3, Y3);
+ fiat_secp384r1_add(Y3, t0, t2);
+ fiat_secp384r1_sub(Y3, X3, Y3);
+ fiat_secp384r1_mul(Z3, b, t2);
+ fiat_secp384r1_sub(X3, Y3, Z3);
+ fiat_secp384r1_add(Z3, X3, X3);
+ fiat_secp384r1_add(X3, X3, Z3);
+ fiat_secp384r1_sub(Z3, t1, X3);
+ fiat_secp384r1_add(X3, t1, X3);
+ fiat_secp384r1_mul(Y3, b, Y3);
+ fiat_secp384r1_add(t1, t2, t2);
+ fiat_secp384r1_add(t2, t1, t2);
+ fiat_secp384r1_sub(Y3, Y3, t2);
+ fiat_secp384r1_sub(Y3, Y3, t0);
+ fiat_secp384r1_add(t1, Y3, Y3);
+ fiat_secp384r1_add(Y3, t1, Y3);
+ fiat_secp384r1_add(t1, t0, t0);
+ fiat_secp384r1_add(t0, t1, t0);
+ fiat_secp384r1_sub(t0, t0, t2);
+ fiat_secp384r1_mul(t1, t4, Y3);
+ fiat_secp384r1_mul(t2, t0, Y3);
+ fiat_secp384r1_mul(Y3, X3, Z3);
+ fiat_secp384r1_add(Y3, Y3, t2);
+ fiat_secp384r1_mul(X3, t3, X3);
+ fiat_secp384r1_sub(X3, X3, t1);
+ fiat_secp384r1_mul(Z3, t4, Z3);
+ fiat_secp384r1_mul(t1, t3, t0);
+ fiat_secp384r1_add(Z3, Z3, t1);
+}
+
+/* constants */
+#define RADIX 5
+#define DRADIX (1 << RADIX)
+#define DRADIX_WNAF ((DRADIX) << 1)
+
+/*-
+ * precomp for wnaf scalar multiplication:
+ * precomp[0] = 1P
+ * precomp[1] = 3P
+ * precomp[2] = 5P
+ * precomp[3] = 7P
+ * precomp[4] = 9P
+ * ...
+ */
+static void
+precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P)
+{
+ int i;
+
+ fe_copy(precomp[0].X, P->X);
+ fe_copy(precomp[0].Y, P->Y);
+ fe_copy(precomp[0].Z, const_one);
+ point_double(&precomp[DRADIX / 2 - 1], &precomp[0]);
+
+ for (i = 1; i < DRADIX / 2; i++)
+ point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]);
+}
+
+/* fetch a scalar bit */
+static int
+scalar_get_bit(const unsigned char in[48], int idx)
+{
+ int widx, rshift;
+
+ widx = idx >> 3;
+ rshift = idx & 0x7;
+
+ if (idx < 0 || widx >= 48)
+ return 0;
+
+ return (in[widx] >> rshift) & 0x1;
+}
+
+/*-
+ * Compute "regular" wnaf representation of a scalar.
+ * See "Exponent Recoding and Regular Exponentiation Algorithms",
+ * Tunstall et al., AfricaCrypt 2009, Alg 6.
+ * It forces an odd scalar and outputs digits in
+ * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...}
+ * i.e. signed odd digits with _no zeroes_ -- that makes it "regular".
+ */
+static void
+scalar_rwnaf(int8_t out[77], const unsigned char in[48])
+{
+ int i;
+ int8_t window, d;
+
+ window = (in[0] & (DRADIX_WNAF - 1)) | 1;
+ for (i = 0; i < 76; i++) {
+ d = (window & (DRADIX_WNAF - 1)) - DRADIX;
+ out[i] = d;
+ window = (window - d) >> RADIX;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5;
+ }
+ out[i] = window;
+}
+
+/*-
+ * Compute "textbook" wnaf representation of a scalar.
+ * NB: not constant time
+ */
+static void
+scalar_wnaf(int8_t out[385], const unsigned char in[48])
+{
+ int i;
+ int8_t window, d;
+
+ window = in[0] & (DRADIX_WNAF - 1);
+ for (i = 0; i < 385; i++) {
+ d = 0;
+ if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX))
+ d -= DRADIX_WNAF;
+ out[i] = d;
+ window = (window - d) >> 1;
+ window += scalar_get_bit(in, i + 1 + RADIX) << RADIX;
+ }
+}
+
+/*-
+ * Simultaneous scalar multiplication: interleaved "textbook" wnaf.
+ * NB: not constant time
+ */
+static void
+var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[48],
+ const unsigned char b[48], const pt_aff_t *P)
+{
+ int i, d, is_neg, is_inf = 1, flipped = 0;
+ int8_t anaf[385] = { 0 };
+ int8_t bnaf[385] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_wnaf(anaf, a);
+ scalar_wnaf(bnaf, b);
+
+ for (i = 384; i >= 0; i--) {
+ if (!is_inf)
+ point_double(&Q, &Q);
+ if ((d = bnaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &precomp[d].X);
+ fe_copy(Q.Y, &precomp[d].Y);
+ fe_copy(Q.Z, &precomp[d].Z);
+ is_inf = 0;
+ } else
+ point_add_proj(&Q, &Q, &precomp[d]);
+ }
+ if ((d = anaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &lut_cmb[0][d].X);
+ fe_copy(Q.Y, &lut_cmb[0][d].Y);
+ fe_copy(Q.Z, const_one);
+ is_inf = 0;
+ } else
+ point_add_mixed(&Q, &Q, &lut_cmb[0][d]);
+ }
+ }
+
+ if (is_inf) {
+ /* initialize accumulator to inf: all-zero scalars */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+ }
+
+ if (flipped) {
+ /* correct sign */
+ fiat_secp384r1_opp(Q.Y, Q.Y);
+ }
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Variable point scalar multiplication with "regular" wnaf.
+ * Here "regular" means _no zeroes_, so the sequence of
+ * EC arithmetic ops is fixed.
+ */
+static void
+var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[48],
+ const pt_aff_t *P)
+{
+ int i, j, d, diff, is_neg;
+ int8_t rnaf[77] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_rwnaf(rnaf, scalar);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ /* initialize accumulator to high digit */
+ d = (rnaf[76] - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(Q.X, diff, Q.X, precomp[j].X);
+ fiat_secp384r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y);
+ fiat_secp384r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z);
+ }
+
+ for (i = 75; i >= 0; i--) {
+ for (j = 0; j < RADIX; j++)
+ point_double(&Q, &Q);
+ d = rnaf[i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(lut.X, diff, lut.X, precomp[j].X);
+ fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y);
+ fiat_secp384r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp384r1_opp(out->Y, lut.Y);
+ fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_proj(&Q, &Q, &lut);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, precomp[0].X);
+ fiat_secp384r1_opp(lut.Y, precomp[0].Y);
+ fe_copy(lut.Z, precomp[0].Z);
+ point_add_proj(&lut, &lut, &Q);
+ fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X);
+ fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y);
+ fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Fixed scalar multiplication: comb with interleaving.
+ */
+static void
+fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[48])
+{
+ int i, j, k, d, diff, is_neg = 0;
+ int8_t rnaf[77] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } };
+ pt_aff_t lut = { { 0 }, { 0 } };
+
+ scalar_rwnaf(rnaf, scalar);
+
+ /* initalize accumulator to inf */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ for (i = 3; i >= 0; i--) {
+ for (j = 0; i != 3 && j < RADIX; j++)
+ point_double(&Q, &Q);
+ for (j = 0; j < 21; j++) {
+ if (j * 4 + i > 76)
+ continue;
+ d = rnaf[j * 4 + i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (k = 0; k < DRADIX / 2; k++) {
+ diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp384r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X);
+ fiat_secp384r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp384r1_opp(out->Y, lut.Y);
+ fiat_secp384r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_mixed(&Q, &Q, &lut);
+ }
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, lut_cmb[0][0].X);
+ fiat_secp384r1_opp(lut.Y, lut_cmb[0][0].Y);
+ point_add_mixed(&R, &Q, &lut);
+ fiat_secp384r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X);
+ fiat_secp384r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y);
+ fiat_secp384r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp384r1_inv(Q.Z, Q.Z);
+ fiat_secp384r1_mul(out->X, Q.X, Q.Z);
+ fiat_secp384r1_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char a[48],
+ const unsigned char b[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48])
+{
+ pt_aff_t P;
+
+ fiat_secp384r1_from_bytes(P.X, inx);
+ fiat_secp384r1_from_bytes(P.Y, iny);
+ fiat_secp384r1_to_montgomery(P.X, P.X);
+ fiat_secp384r1_to_montgomery(P.Y, P.Y);
+ /* simultaneous scalar multiplication */
+ var_smul_wnaf_two(&P, a, b, &P);
+
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48])
+{
+ pt_aff_t P;
+
+ /* fixed scmul function */
+ fixed_smul_cmb(&P, scalar);
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48])
+{
+ pt_aff_t P;
+
+ fiat_secp384r1_from_bytes(P.X, inx);
+ fiat_secp384r1_from_bytes(P.Y, iny);
+ fiat_secp384r1_to_montgomery(P.X, P.X);
+ fiat_secp384r1_to_montgomery(P.Y, P.Y);
+ /* var scmul function */
+ var_smul_rwnaf(&P, scalar, &P);
+ fiat_secp384r1_from_montgomery(P.X, P.X);
+ fiat_secp384r1_from_montgomery(P.Y, P.Y);
+ fiat_secp384r1_to_bytes(outx, P.X);
+ fiat_secp384r1_to_bytes(outy, P.Y);
+}
+
+#endif /* __SIZEOF_INT128__ */
diff --git a/security/nss/lib/freebl/ecl/ecp_secp384r1.h b/security/nss/lib/freebl/ecl/ecp_secp384r1.h
new file mode 100644
index 0000000000..87f3c5a255
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp384r1.h
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecp_secp384r1_h_
+#define __ecp_secp384r1_h_
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void point_mul_two_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char a[48],
+ const unsigned char b[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48]);
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_g_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48]);
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_secp384r1(unsigned char outx[48], unsigned char outy[48],
+ const unsigned char scalar[48],
+ const unsigned char inx[48],
+ const unsigned char iny[48]);
+
+#endif
diff --git a/security/nss/lib/freebl/ecl/ecp_secp384r1_wrap.c b/security/nss/lib/freebl/ecl/ecp_secp384r1_wrap.c
new file mode 100644
index 0000000000..26ed14dd91
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp384r1_wrap.c
@@ -0,0 +1,228 @@
+/*-
+ * MIT License
+ * -
+ * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley
+ * -
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * -
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * -
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#undef RADIX
+#include "ecp.h"
+#include "ecp_secp384r1.h"
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/*-
+ * reverse bytes -- total hack
+ */
+#define MP_BE2LE(a) \
+ do { \
+ unsigned char z_bswap; \
+ z_bswap = a[0]; \
+ a[0] = a[47]; \
+ a[47] = z_bswap; \
+ z_bswap = a[1]; \
+ a[1] = a[46]; \
+ a[46] = z_bswap; \
+ z_bswap = a[2]; \
+ a[2] = a[45]; \
+ a[45] = z_bswap; \
+ z_bswap = a[3]; \
+ a[3] = a[44]; \
+ a[44] = z_bswap; \
+ z_bswap = a[4]; \
+ a[4] = a[43]; \
+ a[43] = z_bswap; \
+ z_bswap = a[5]; \
+ a[5] = a[42]; \
+ a[42] = z_bswap; \
+ z_bswap = a[6]; \
+ a[6] = a[41]; \
+ a[41] = z_bswap; \
+ z_bswap = a[7]; \
+ a[7] = a[40]; \
+ a[40] = z_bswap; \
+ z_bswap = a[8]; \
+ a[8] = a[39]; \
+ a[39] = z_bswap; \
+ z_bswap = a[9]; \
+ a[9] = a[38]; \
+ a[38] = z_bswap; \
+ z_bswap = a[10]; \
+ a[10] = a[37]; \
+ a[37] = z_bswap; \
+ z_bswap = a[11]; \
+ a[11] = a[36]; \
+ a[36] = z_bswap; \
+ z_bswap = a[12]; \
+ a[12] = a[35]; \
+ a[35] = z_bswap; \
+ z_bswap = a[13]; \
+ a[13] = a[34]; \
+ a[34] = z_bswap; \
+ z_bswap = a[14]; \
+ a[14] = a[33]; \
+ a[33] = z_bswap; \
+ z_bswap = a[15]; \
+ a[15] = a[32]; \
+ a[32] = z_bswap; \
+ z_bswap = a[16]; \
+ a[16] = a[31]; \
+ a[31] = z_bswap; \
+ z_bswap = a[17]; \
+ a[17] = a[30]; \
+ a[30] = z_bswap; \
+ z_bswap = a[18]; \
+ a[18] = a[29]; \
+ a[29] = z_bswap; \
+ z_bswap = a[19]; \
+ a[19] = a[28]; \
+ a[28] = z_bswap; \
+ z_bswap = a[20]; \
+ a[20] = a[27]; \
+ a[27] = z_bswap; \
+ z_bswap = a[21]; \
+ a[21] = a[26]; \
+ a[26] = z_bswap; \
+ z_bswap = a[22]; \
+ a[22] = a[25]; \
+ a[25] = z_bswap; \
+ z_bswap = a[23]; \
+ a[23] = a[24]; \
+ a[24] = z_bswap; \
+ } while (0)
+
+static mp_err
+point_mul_g_secp384r1_wrap(const mp_int *n, mp_int *out_x,
+ mp_int *out_y, const ECGroup *group)
+{
+ unsigned char b_x[48];
+ unsigned char b_y[48];
+ unsigned char b_n[48];
+ mp_err res;
+
+ ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48));
+ MP_BE2LE(b_n);
+ point_mul_g_secp384r1(b_x, b_y, b_n);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48));
+
+CLEANUP:
+ return res;
+}
+
+static mp_err
+point_mul_secp384r1_wrap(const mp_int *n, const mp_int *in_x,
+ const mp_int *in_y, mp_int *out_x,
+ mp_int *out_y, const ECGroup *group)
+{
+ unsigned char b_x[48];
+ unsigned char b_y[48];
+ unsigned char b_n[48];
+ mp_err res;
+
+ ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL &&
+ out_y != NULL,
+ MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n) > 384 || mp_cmp_z(n) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 48));
+ MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48));
+ MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48));
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_BE2LE(b_n);
+ point_mul_secp384r1(b_x, b_y, b_n, b_x, b_y);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48));
+
+CLEANUP:
+ return res;
+}
+
+static mp_err
+point_mul_two_secp384r1_wrap(const mp_int *n1, const mp_int *n2,
+ const mp_int *in_x,
+ const mp_int *in_y, mp_int *out_x,
+ mp_int *out_y,
+ const ECGroup *group)
+{
+ unsigned char b_x[48];
+ unsigned char b_y[48];
+ unsigned char b_n1[48];
+ unsigned char b_n2[48];
+ mp_err res;
+
+ /* If n2 == NULL or 0, this is just a base-point multiplication. */
+ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ)
+ return point_mul_g_secp384r1_wrap(n1, out_x, out_y, group);
+
+ /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */
+ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ)
+ return point_mul_secp384r1_wrap(n2, in_x, in_y, out_x, out_y, group);
+
+ ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL,
+ MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n1) > 384 || mp_cmp_z(n1) != MP_GT ||
+ mpl_significant_bits(n2) > 384 || mp_cmp_z(n2) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 48));
+ MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 48));
+ MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 48));
+ MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 48));
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_BE2LE(b_n1);
+ MP_BE2LE(b_n2);
+ point_mul_two_secp384r1(b_x, b_y, b_n1, b_n2, b_x, b_y);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 48));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 48));
+
+CLEANUP:
+ return res;
+}
+
+mp_err
+ec_group_set_secp384r1(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P384) {
+ group->base_point_mul = &point_mul_g_secp384r1_wrap;
+ group->point_mul = &point_mul_secp384r1_wrap;
+ group->points_mul = &point_mul_two_secp384r1_wrap;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_secp521r1.c b/security/nss/lib/freebl/ecl/ecp_secp521r1.c
new file mode 100644
index 0000000000..b992506d6e
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp521r1.c
@@ -0,0 +1,11622 @@
+/* Autogenerated: ECCKiila https://gitlab.com/nisec/ecckiila */
+/*-
+ * MIT License
+ * -
+ * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley
+ * -
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * -
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * -
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(__SIZEOF_INT128__) && !defined(PEDANTIC)
+
+#include "ecp_secp521r1.h"
+#include <stdint.h>
+#include <string.h>
+#define LIMB_BITS 64
+#define LIMB_CNT 9
+/* Field elements */
+typedef uint64_t fe_t[LIMB_CNT];
+typedef uint64_t limb_t;
+
+#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t))
+#define fe_set_zero(d) memset(d, 0, sizeof(fe_t))
+
+/* Projective points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+ fe_t Z;
+} pt_prj_t;
+
+/* Affine points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+} pt_aff_t;
+
+/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */
+/*-
+ * MIT License
+ *
+ * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file).
+ * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Autogenerated: unsaturated_solinas --static --use-value-barrier secp521r1 64 9 '2^521 - 1' */
+/* curve description: secp521r1 */
+/* machine_wordsize = 64 (from "64") */
+/* requested operations: (all) */
+/* n = 9 (from "9") */
+/* s-c = 2^521 - [(1, 1)] (from "2^521 - 1") */
+/* tight_bounds_multiplier = 1 (from "") */
+/* */
+/* Computed values: */
+/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1] */
+/* eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0) */
+/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */
+/* balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe] */
+
+#include <stdint.h>
+typedef unsigned char fiat_secp521r1_uint1;
+typedef signed char fiat_secp521r1_int1;
+#ifdef __GNUC__
+#define FIAT_SECP521R1_FIAT_EXTENSION __extension__
+#define FIAT_SECP521R1_FIAT_INLINE __inline__
+#else
+#define FIAT_SECP521R1_FIAT_EXTENSION
+#define FIAT_SECP521R1_FIAT_INLINE
+#endif
+
+FIAT_SECP521R1_FIAT_EXTENSION typedef signed __int128 fiat_secp521r1_int128;
+FIAT_SECP521R1_FIAT_EXTENSION typedef unsigned __int128 fiat_secp521r1_uint128;
+
+/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */
+/* Bounds: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]] */
+typedef uint64_t fiat_secp521r1_loose_field_element[9];
+
+/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */
+/* Bounds: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]] */
+typedef uint64_t fiat_secp521r1_tight_field_element[9];
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+#if !defined(FIAT_SECP521R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint64_t
+fiat_secp521r1_value_barrier_u64(uint64_t a)
+{
+ __asm__(""
+ : "+r"(a)
+ : /* no inputs */);
+ return a;
+}
+#else
+#define fiat_secp521r1_value_barrier_u64(x) (x)
+#endif
+
+/*
+ * The function fiat_secp521r1_addcarryx_u58 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^58
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x3ffffffffffffff]
+ * arg3: [0x0 ~> 0x3ffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x3ffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_addcarryx_u58(uint64_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ uint64_t x1;
+ uint64_t x2;
+ fiat_secp521r1_uint1 x3;
+ x1 = ((arg1 + arg2) + arg3);
+ x2 = (x1 & UINT64_C(0x3ffffffffffffff));
+ x3 = (fiat_secp521r1_uint1)(x1 >> 58);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_subborrowx_u58 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^58
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x3ffffffffffffff]
+ * arg3: [0x0 ~> 0x3ffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x3ffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_subborrowx_u58(uint64_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ int64_t x1;
+ fiat_secp521r1_int1 x2;
+ uint64_t x3;
+ x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
+ x2 = (fiat_secp521r1_int1)(x1 >> 58);
+ x3 = (x1 & UINT64_C(0x3ffffffffffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp521r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp521r1_addcarryx_u57 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^57
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x1ffffffffffffff]
+ * arg3: [0x0 ~> 0x1ffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x1ffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_addcarryx_u57(uint64_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ uint64_t x1;
+ uint64_t x2;
+ fiat_secp521r1_uint1 x3;
+ x1 = ((arg1 + arg2) + arg3);
+ x2 = (x1 & UINT64_C(0x1ffffffffffffff));
+ x3 = (fiat_secp521r1_uint1)(x1 >> 57);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_subborrowx_u57 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^57
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x1ffffffffffffff]
+ * arg3: [0x0 ~> 0x1ffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x1ffffffffffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_subborrowx_u57(uint64_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint64_t arg2, uint64_t arg3)
+{
+ int64_t x1;
+ fiat_secp521r1_int1 x2;
+ uint64_t x3;
+ x1 = ((int64_t)(arg2 - (int64_t)arg1) - (int64_t)arg3);
+ x2 = (fiat_secp521r1_int1)(x1 >> 57);
+ x3 = (x1 & UINT64_C(0x1ffffffffffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp521r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp521r1_cmovznz_u64 is a single-word conditional move.
+ *
+ * Postconditions:
+ * out1 = (if arg1 = 0 then arg2 else arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffffffffffff]
+ * arg3: [0x0 ~> 0xffffffffffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffffffffffff]
+ */
+static void
+fiat_secp521r1_cmovznz_u64(uint64_t *out1,
+ fiat_secp521r1_uint1 arg1, uint64_t arg2,
+ uint64_t arg3)
+{
+ fiat_secp521r1_uint1 x1;
+ uint64_t x2;
+ uint64_t x3;
+ x1 = (!(!arg1));
+ x2 = ((fiat_secp521r1_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
+ x3 = ((fiat_secp521r1_value_barrier_u64(x2) & arg3) |
+ (fiat_secp521r1_value_barrier_u64((~x2)) & arg2));
+ *out1 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_mul(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_loose_field_element arg1,
+ const fiat_secp521r1_loose_field_element arg2)
+{
+ fiat_secp521r1_uint128 x1;
+ fiat_secp521r1_uint128 x2;
+ fiat_secp521r1_uint128 x3;
+ fiat_secp521r1_uint128 x4;
+ fiat_secp521r1_uint128 x5;
+ fiat_secp521r1_uint128 x6;
+ fiat_secp521r1_uint128 x7;
+ fiat_secp521r1_uint128 x8;
+ fiat_secp521r1_uint128 x9;
+ fiat_secp521r1_uint128 x10;
+ fiat_secp521r1_uint128 x11;
+ fiat_secp521r1_uint128 x12;
+ fiat_secp521r1_uint128 x13;
+ fiat_secp521r1_uint128 x14;
+ fiat_secp521r1_uint128 x15;
+ fiat_secp521r1_uint128 x16;
+ fiat_secp521r1_uint128 x17;
+ fiat_secp521r1_uint128 x18;
+ fiat_secp521r1_uint128 x19;
+ fiat_secp521r1_uint128 x20;
+ fiat_secp521r1_uint128 x21;
+ fiat_secp521r1_uint128 x22;
+ fiat_secp521r1_uint128 x23;
+ fiat_secp521r1_uint128 x24;
+ fiat_secp521r1_uint128 x25;
+ fiat_secp521r1_uint128 x26;
+ fiat_secp521r1_uint128 x27;
+ fiat_secp521r1_uint128 x28;
+ fiat_secp521r1_uint128 x29;
+ fiat_secp521r1_uint128 x30;
+ fiat_secp521r1_uint128 x31;
+ fiat_secp521r1_uint128 x32;
+ fiat_secp521r1_uint128 x33;
+ fiat_secp521r1_uint128 x34;
+ fiat_secp521r1_uint128 x35;
+ fiat_secp521r1_uint128 x36;
+ fiat_secp521r1_uint128 x37;
+ fiat_secp521r1_uint128 x38;
+ fiat_secp521r1_uint128 x39;
+ fiat_secp521r1_uint128 x40;
+ fiat_secp521r1_uint128 x41;
+ fiat_secp521r1_uint128 x42;
+ fiat_secp521r1_uint128 x43;
+ fiat_secp521r1_uint128 x44;
+ fiat_secp521r1_uint128 x45;
+ fiat_secp521r1_uint128 x46;
+ fiat_secp521r1_uint128 x47;
+ fiat_secp521r1_uint128 x48;
+ fiat_secp521r1_uint128 x49;
+ fiat_secp521r1_uint128 x50;
+ fiat_secp521r1_uint128 x51;
+ fiat_secp521r1_uint128 x52;
+ fiat_secp521r1_uint128 x53;
+ fiat_secp521r1_uint128 x54;
+ fiat_secp521r1_uint128 x55;
+ fiat_secp521r1_uint128 x56;
+ fiat_secp521r1_uint128 x57;
+ fiat_secp521r1_uint128 x58;
+ fiat_secp521r1_uint128 x59;
+ fiat_secp521r1_uint128 x60;
+ fiat_secp521r1_uint128 x61;
+ fiat_secp521r1_uint128 x62;
+ fiat_secp521r1_uint128 x63;
+ fiat_secp521r1_uint128 x64;
+ fiat_secp521r1_uint128 x65;
+ fiat_secp521r1_uint128 x66;
+ fiat_secp521r1_uint128 x67;
+ fiat_secp521r1_uint128 x68;
+ fiat_secp521r1_uint128 x69;
+ fiat_secp521r1_uint128 x70;
+ fiat_secp521r1_uint128 x71;
+ fiat_secp521r1_uint128 x72;
+ fiat_secp521r1_uint128 x73;
+ fiat_secp521r1_uint128 x74;
+ fiat_secp521r1_uint128 x75;
+ fiat_secp521r1_uint128 x76;
+ fiat_secp521r1_uint128 x77;
+ fiat_secp521r1_uint128 x78;
+ fiat_secp521r1_uint128 x79;
+ fiat_secp521r1_uint128 x80;
+ fiat_secp521r1_uint128 x81;
+ fiat_secp521r1_uint128 x82;
+ fiat_secp521r1_uint128 x83;
+ uint64_t x84;
+ fiat_secp521r1_uint128 x85;
+ fiat_secp521r1_uint128 x86;
+ fiat_secp521r1_uint128 x87;
+ fiat_secp521r1_uint128 x88;
+ fiat_secp521r1_uint128 x89;
+ fiat_secp521r1_uint128 x90;
+ fiat_secp521r1_uint128 x91;
+ fiat_secp521r1_uint128 x92;
+ fiat_secp521r1_uint128 x93;
+ fiat_secp521r1_uint128 x94;
+ uint64_t x95;
+ fiat_secp521r1_uint128 x96;
+ fiat_secp521r1_uint128 x97;
+ uint64_t x98;
+ fiat_secp521r1_uint128 x99;
+ fiat_secp521r1_uint128 x100;
+ uint64_t x101;
+ fiat_secp521r1_uint128 x102;
+ fiat_secp521r1_uint128 x103;
+ uint64_t x104;
+ fiat_secp521r1_uint128 x105;
+ fiat_secp521r1_uint128 x106;
+ uint64_t x107;
+ fiat_secp521r1_uint128 x108;
+ fiat_secp521r1_uint128 x109;
+ uint64_t x110;
+ fiat_secp521r1_uint128 x111;
+ fiat_secp521r1_uint128 x112;
+ uint64_t x113;
+ fiat_secp521r1_uint128 x114;
+ fiat_secp521r1_uint128 x115;
+ uint64_t x116;
+ fiat_secp521r1_uint128 x117;
+ uint64_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ fiat_secp521r1_uint1 x121;
+ uint64_t x122;
+ uint64_t x123;
+ x1 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[8]) * 0x2));
+ x2 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[7]) * 0x2));
+ x3 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[6]) * 0x2));
+ x4 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[5]) * 0x2));
+ x5 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[4]) * 0x2));
+ x6 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[3]) * 0x2));
+ x7 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[2]) * 0x2));
+ x8 = ((fiat_secp521r1_uint128)(arg1[8]) * ((arg2[1]) * 0x2));
+ x9 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[8]) * 0x2));
+ x10 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[7]) * 0x2));
+ x11 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[6]) * 0x2));
+ x12 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[5]) * 0x2));
+ x13 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[4]) * 0x2));
+ x14 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[3]) * 0x2));
+ x15 = ((fiat_secp521r1_uint128)(arg1[7]) * ((arg2[2]) * 0x2));
+ x16 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[8]) * 0x2));
+ x17 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[7]) * 0x2));
+ x18 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[6]) * 0x2));
+ x19 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[5]) * 0x2));
+ x20 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[4]) * 0x2));
+ x21 = ((fiat_secp521r1_uint128)(arg1[6]) * ((arg2[3]) * 0x2));
+ x22 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[8]) * 0x2));
+ x23 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[7]) * 0x2));
+ x24 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[6]) * 0x2));
+ x25 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[5]) * 0x2));
+ x26 = ((fiat_secp521r1_uint128)(arg1[5]) * ((arg2[4]) * 0x2));
+ x27 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[8]) * 0x2));
+ x28 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[7]) * 0x2));
+ x29 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[6]) * 0x2));
+ x30 = ((fiat_secp521r1_uint128)(arg1[4]) * ((arg2[5]) * 0x2));
+ x31 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[8]) * 0x2));
+ x32 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[7]) * 0x2));
+ x33 = ((fiat_secp521r1_uint128)(arg1[3]) * ((arg2[6]) * 0x2));
+ x34 = ((fiat_secp521r1_uint128)(arg1[2]) * ((arg2[8]) * 0x2));
+ x35 = ((fiat_secp521r1_uint128)(arg1[2]) * ((arg2[7]) * 0x2));
+ x36 = ((fiat_secp521r1_uint128)(arg1[1]) * ((arg2[8]) * 0x2));
+ x37 = ((fiat_secp521r1_uint128)(arg1[8]) * (arg2[0]));
+ x38 = ((fiat_secp521r1_uint128)(arg1[7]) * (arg2[1]));
+ x39 = ((fiat_secp521r1_uint128)(arg1[7]) * (arg2[0]));
+ x40 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[2]));
+ x41 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[1]));
+ x42 = ((fiat_secp521r1_uint128)(arg1[6]) * (arg2[0]));
+ x43 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[3]));
+ x44 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[2]));
+ x45 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[1]));
+ x46 = ((fiat_secp521r1_uint128)(arg1[5]) * (arg2[0]));
+ x47 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[4]));
+ x48 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[3]));
+ x49 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[2]));
+ x50 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[1]));
+ x51 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg2[0]));
+ x52 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[5]));
+ x53 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[4]));
+ x54 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[3]));
+ x55 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[2]));
+ x56 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[1]));
+ x57 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg2[0]));
+ x58 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[6]));
+ x59 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[5]));
+ x60 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[4]));
+ x61 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[3]));
+ x62 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[2]));
+ x63 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[1]));
+ x64 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg2[0]));
+ x65 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[7]));
+ x66 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[6]));
+ x67 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[5]));
+ x68 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[4]));
+ x69 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[3]));
+ x70 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[2]));
+ x71 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[1]));
+ x72 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg2[0]));
+ x73 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[8]));
+ x74 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[7]));
+ x75 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[6]));
+ x76 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[5]));
+ x77 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[4]));
+ x78 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[3]));
+ x79 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[2]));
+ x80 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[1]));
+ x81 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg2[0]));
+ x82 = (x81 + (x36 + (x35 + (x33 + (x30 + (x26 + (x21 + (x15 + x8))))))));
+ x83 = (x82 >> 58);
+ x84 = (uint64_t)(x82 & UINT64_C(0x3ffffffffffffff));
+ x85 = (x73 + (x65 + (x58 + (x52 + (x47 + (x43 + (x40 + (x38 + x37))))))));
+ x86 = (x74 + (x66 + (x59 + (x53 + (x48 + (x44 + (x41 + (x39 + x1))))))));
+ x87 = (x75 + (x67 + (x60 + (x54 + (x49 + (x45 + (x42 + (x9 + x2))))))));
+ x88 = (x76 + (x68 + (x61 + (x55 + (x50 + (x46 + (x16 + (x10 + x3))))))));
+ x89 = (x77 + (x69 + (x62 + (x56 + (x51 + (x22 + (x17 + (x11 + x4))))))));
+ x90 = (x78 + (x70 + (x63 + (x57 + (x27 + (x23 + (x18 + (x12 + x5))))))));
+ x91 = (x79 + (x71 + (x64 + (x31 + (x28 + (x24 + (x19 + (x13 + x6))))))));
+ x92 = (x80 + (x72 + (x34 + (x32 + (x29 + (x25 + (x20 + (x14 + x7))))))));
+ x93 = (x83 + x92);
+ x94 = (x93 >> 58);
+ x95 = (uint64_t)(x93 & UINT64_C(0x3ffffffffffffff));
+ x96 = (x94 + x91);
+ x97 = (x96 >> 58);
+ x98 = (uint64_t)(x96 & UINT64_C(0x3ffffffffffffff));
+ x99 = (x97 + x90);
+ x100 = (x99 >> 58);
+ x101 = (uint64_t)(x99 & UINT64_C(0x3ffffffffffffff));
+ x102 = (x100 + x89);
+ x103 = (x102 >> 58);
+ x104 = (uint64_t)(x102 & UINT64_C(0x3ffffffffffffff));
+ x105 = (x103 + x88);
+ x106 = (x105 >> 58);
+ x107 = (uint64_t)(x105 & UINT64_C(0x3ffffffffffffff));
+ x108 = (x106 + x87);
+ x109 = (x108 >> 58);
+ x110 = (uint64_t)(x108 & UINT64_C(0x3ffffffffffffff));
+ x111 = (x109 + x86);
+ x112 = (x111 >> 58);
+ x113 = (uint64_t)(x111 & UINT64_C(0x3ffffffffffffff));
+ x114 = (x112 + x85);
+ x115 = (x114 >> 57);
+ x116 = (uint64_t)(x114 & UINT64_C(0x1ffffffffffffff));
+ x117 = (x84 + x115);
+ x118 = (uint64_t)(x117 >> 58);
+ x119 = (uint64_t)(x117 & UINT64_C(0x3ffffffffffffff));
+ x120 = (x118 + x95);
+ x121 = (fiat_secp521r1_uint1)(x120 >> 58);
+ x122 = (x120 & UINT64_C(0x3ffffffffffffff));
+ x123 = (x121 + x98);
+ out1[0] = x119;
+ out1[1] = x122;
+ out1[2] = x123;
+ out1[3] = x101;
+ out1[4] = x104;
+ out1[5] = x107;
+ out1[6] = x110;
+ out1[7] = x113;
+ out1[8] = x116;
+}
+
+/*
+ * The function fiat_secp521r1_carry_square squares a field element and reduces the result.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * eval arg1) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_square(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_loose_field_element arg1)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ fiat_secp521r1_uint128 x17;
+ fiat_secp521r1_uint128 x18;
+ fiat_secp521r1_uint128 x19;
+ fiat_secp521r1_uint128 x20;
+ fiat_secp521r1_uint128 x21;
+ fiat_secp521r1_uint128 x22;
+ fiat_secp521r1_uint128 x23;
+ fiat_secp521r1_uint128 x24;
+ fiat_secp521r1_uint128 x25;
+ fiat_secp521r1_uint128 x26;
+ fiat_secp521r1_uint128 x27;
+ fiat_secp521r1_uint128 x28;
+ fiat_secp521r1_uint128 x29;
+ fiat_secp521r1_uint128 x30;
+ fiat_secp521r1_uint128 x31;
+ fiat_secp521r1_uint128 x32;
+ fiat_secp521r1_uint128 x33;
+ fiat_secp521r1_uint128 x34;
+ fiat_secp521r1_uint128 x35;
+ fiat_secp521r1_uint128 x36;
+ fiat_secp521r1_uint128 x37;
+ fiat_secp521r1_uint128 x38;
+ fiat_secp521r1_uint128 x39;
+ fiat_secp521r1_uint128 x40;
+ fiat_secp521r1_uint128 x41;
+ fiat_secp521r1_uint128 x42;
+ fiat_secp521r1_uint128 x43;
+ fiat_secp521r1_uint128 x44;
+ fiat_secp521r1_uint128 x45;
+ fiat_secp521r1_uint128 x46;
+ fiat_secp521r1_uint128 x47;
+ fiat_secp521r1_uint128 x48;
+ fiat_secp521r1_uint128 x49;
+ fiat_secp521r1_uint128 x50;
+ fiat_secp521r1_uint128 x51;
+ fiat_secp521r1_uint128 x52;
+ fiat_secp521r1_uint128 x53;
+ fiat_secp521r1_uint128 x54;
+ fiat_secp521r1_uint128 x55;
+ fiat_secp521r1_uint128 x56;
+ fiat_secp521r1_uint128 x57;
+ fiat_secp521r1_uint128 x58;
+ fiat_secp521r1_uint128 x59;
+ fiat_secp521r1_uint128 x60;
+ fiat_secp521r1_uint128 x61;
+ fiat_secp521r1_uint128 x62;
+ fiat_secp521r1_uint128 x63;
+ uint64_t x64;
+ fiat_secp521r1_uint128 x65;
+ fiat_secp521r1_uint128 x66;
+ fiat_secp521r1_uint128 x67;
+ fiat_secp521r1_uint128 x68;
+ fiat_secp521r1_uint128 x69;
+ fiat_secp521r1_uint128 x70;
+ fiat_secp521r1_uint128 x71;
+ fiat_secp521r1_uint128 x72;
+ fiat_secp521r1_uint128 x73;
+ fiat_secp521r1_uint128 x74;
+ uint64_t x75;
+ fiat_secp521r1_uint128 x76;
+ fiat_secp521r1_uint128 x77;
+ uint64_t x78;
+ fiat_secp521r1_uint128 x79;
+ fiat_secp521r1_uint128 x80;
+ uint64_t x81;
+ fiat_secp521r1_uint128 x82;
+ fiat_secp521r1_uint128 x83;
+ uint64_t x84;
+ fiat_secp521r1_uint128 x85;
+ fiat_secp521r1_uint128 x86;
+ uint64_t x87;
+ fiat_secp521r1_uint128 x88;
+ fiat_secp521r1_uint128 x89;
+ uint64_t x90;
+ fiat_secp521r1_uint128 x91;
+ fiat_secp521r1_uint128 x92;
+ uint64_t x93;
+ fiat_secp521r1_uint128 x94;
+ fiat_secp521r1_uint128 x95;
+ uint64_t x96;
+ fiat_secp521r1_uint128 x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ fiat_secp521r1_uint1 x101;
+ uint64_t x102;
+ uint64_t x103;
+ x1 = (arg1[8]);
+ x2 = (x1 * 0x2);
+ x3 = ((arg1[8]) * 0x2);
+ x4 = (arg1[7]);
+ x5 = (x4 * 0x2);
+ x6 = ((arg1[7]) * 0x2);
+ x7 = (arg1[6]);
+ x8 = (x7 * 0x2);
+ x9 = ((arg1[6]) * 0x2);
+ x10 = (arg1[5]);
+ x11 = (x10 * 0x2);
+ x12 = ((arg1[5]) * 0x2);
+ x13 = ((arg1[4]) * 0x2);
+ x14 = ((arg1[3]) * 0x2);
+ x15 = ((arg1[2]) * 0x2);
+ x16 = ((arg1[1]) * 0x2);
+ x17 = ((fiat_secp521r1_uint128)(arg1[8]) * (x1 * 0x2));
+ x18 = ((fiat_secp521r1_uint128)(arg1[7]) * (x2 * 0x2));
+ x19 = ((fiat_secp521r1_uint128)(arg1[7]) * (x4 * 0x2));
+ x20 = ((fiat_secp521r1_uint128)(arg1[6]) * (x2 * 0x2));
+ x21 = ((fiat_secp521r1_uint128)(arg1[6]) * (x5 * 0x2));
+ x22 = ((fiat_secp521r1_uint128)(arg1[6]) * (x7 * 0x2));
+ x23 = ((fiat_secp521r1_uint128)(arg1[5]) * (x2 * 0x2));
+ x24 = ((fiat_secp521r1_uint128)(arg1[5]) * (x5 * 0x2));
+ x25 = ((fiat_secp521r1_uint128)(arg1[5]) * (x8 * 0x2));
+ x26 = ((fiat_secp521r1_uint128)(arg1[5]) * (x10 * 0x2));
+ x27 = ((fiat_secp521r1_uint128)(arg1[4]) * (x2 * 0x2));
+ x28 = ((fiat_secp521r1_uint128)(arg1[4]) * (x5 * 0x2));
+ x29 = ((fiat_secp521r1_uint128)(arg1[4]) * (x8 * 0x2));
+ x30 = ((fiat_secp521r1_uint128)(arg1[4]) * (x11 * 0x2));
+ x31 = ((fiat_secp521r1_uint128)(arg1[4]) * (arg1[4]));
+ x32 = ((fiat_secp521r1_uint128)(arg1[3]) * (x2 * 0x2));
+ x33 = ((fiat_secp521r1_uint128)(arg1[3]) * (x5 * 0x2));
+ x34 = ((fiat_secp521r1_uint128)(arg1[3]) * (x8 * 0x2));
+ x35 = ((fiat_secp521r1_uint128)(arg1[3]) * x12);
+ x36 = ((fiat_secp521r1_uint128)(arg1[3]) * x13);
+ x37 = ((fiat_secp521r1_uint128)(arg1[3]) * (arg1[3]));
+ x38 = ((fiat_secp521r1_uint128)(arg1[2]) * (x2 * 0x2));
+ x39 = ((fiat_secp521r1_uint128)(arg1[2]) * (x5 * 0x2));
+ x40 = ((fiat_secp521r1_uint128)(arg1[2]) * x9);
+ x41 = ((fiat_secp521r1_uint128)(arg1[2]) * x12);
+ x42 = ((fiat_secp521r1_uint128)(arg1[2]) * x13);
+ x43 = ((fiat_secp521r1_uint128)(arg1[2]) * x14);
+ x44 = ((fiat_secp521r1_uint128)(arg1[2]) * (arg1[2]));
+ x45 = ((fiat_secp521r1_uint128)(arg1[1]) * (x2 * 0x2));
+ x46 = ((fiat_secp521r1_uint128)(arg1[1]) * x6);
+ x47 = ((fiat_secp521r1_uint128)(arg1[1]) * x9);
+ x48 = ((fiat_secp521r1_uint128)(arg1[1]) * x12);
+ x49 = ((fiat_secp521r1_uint128)(arg1[1]) * x13);
+ x50 = ((fiat_secp521r1_uint128)(arg1[1]) * x14);
+ x51 = ((fiat_secp521r1_uint128)(arg1[1]) * x15);
+ x52 = ((fiat_secp521r1_uint128)(arg1[1]) * (arg1[1]));
+ x53 = ((fiat_secp521r1_uint128)(arg1[0]) * x3);
+ x54 = ((fiat_secp521r1_uint128)(arg1[0]) * x6);
+ x55 = ((fiat_secp521r1_uint128)(arg1[0]) * x9);
+ x56 = ((fiat_secp521r1_uint128)(arg1[0]) * x12);
+ x57 = ((fiat_secp521r1_uint128)(arg1[0]) * x13);
+ x58 = ((fiat_secp521r1_uint128)(arg1[0]) * x14);
+ x59 = ((fiat_secp521r1_uint128)(arg1[0]) * x15);
+ x60 = ((fiat_secp521r1_uint128)(arg1[0]) * x16);
+ x61 = ((fiat_secp521r1_uint128)(arg1[0]) * (arg1[0]));
+ x62 = (x61 + (x45 + (x39 + (x34 + x30))));
+ x63 = (x62 >> 58);
+ x64 = (uint64_t)(x62 & UINT64_C(0x3ffffffffffffff));
+ x65 = (x53 + (x46 + (x40 + (x35 + x31))));
+ x66 = (x54 + (x47 + (x41 + (x36 + x17))));
+ x67 = (x55 + (x48 + (x42 + (x37 + x18))));
+ x68 = (x56 + (x49 + (x43 + (x20 + x19))));
+ x69 = (x57 + (x50 + (x44 + (x23 + x21))));
+ x70 = (x58 + (x51 + (x27 + (x24 + x22))));
+ x71 = (x59 + (x52 + (x32 + (x28 + x25))));
+ x72 = (x60 + (x38 + (x33 + (x29 + x26))));
+ x73 = (x63 + x72);
+ x74 = (x73 >> 58);
+ x75 = (uint64_t)(x73 & UINT64_C(0x3ffffffffffffff));
+ x76 = (x74 + x71);
+ x77 = (x76 >> 58);
+ x78 = (uint64_t)(x76 & UINT64_C(0x3ffffffffffffff));
+ x79 = (x77 + x70);
+ x80 = (x79 >> 58);
+ x81 = (uint64_t)(x79 & UINT64_C(0x3ffffffffffffff));
+ x82 = (x80 + x69);
+ x83 = (x82 >> 58);
+ x84 = (uint64_t)(x82 & UINT64_C(0x3ffffffffffffff));
+ x85 = (x83 + x68);
+ x86 = (x85 >> 58);
+ x87 = (uint64_t)(x85 & UINT64_C(0x3ffffffffffffff));
+ x88 = (x86 + x67);
+ x89 = (x88 >> 58);
+ x90 = (uint64_t)(x88 & UINT64_C(0x3ffffffffffffff));
+ x91 = (x89 + x66);
+ x92 = (x91 >> 58);
+ x93 = (uint64_t)(x91 & UINT64_C(0x3ffffffffffffff));
+ x94 = (x92 + x65);
+ x95 = (x94 >> 57);
+ x96 = (uint64_t)(x94 & UINT64_C(0x1ffffffffffffff));
+ x97 = (x64 + x95);
+ x98 = (uint64_t)(x97 >> 58);
+ x99 = (uint64_t)(x97 & UINT64_C(0x3ffffffffffffff));
+ x100 = (x98 + x75);
+ x101 = (fiat_secp521r1_uint1)(x100 >> 58);
+ x102 = (x100 & UINT64_C(0x3ffffffffffffff));
+ x103 = (x101 + x78);
+ out1[0] = x99;
+ out1[1] = x102;
+ out1[2] = x103;
+ out1[3] = x81;
+ out1[4] = x84;
+ out1[5] = x87;
+ out1[6] = x90;
+ out1[7] = x93;
+ out1[8] = x96;
+}
+
+/*
+ * The function fiat_secp521r1_carry_add adds two field elements.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 + eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_add(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1,
+ const fiat_secp521r1_tight_field_element arg2)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ x1 = ((arg1[0]) + (arg2[0]));
+ x2 = ((x1 >> 58) + ((arg1[1]) + (arg2[1])));
+ x3 = ((x2 >> 58) + ((arg1[2]) + (arg2[2])));
+ x4 = ((x3 >> 58) + ((arg1[3]) + (arg2[3])));
+ x5 = ((x4 >> 58) + ((arg1[4]) + (arg2[4])));
+ x6 = ((x5 >> 58) + ((arg1[5]) + (arg2[5])));
+ x7 = ((x6 >> 58) + ((arg1[6]) + (arg2[6])));
+ x8 = ((x7 >> 58) + ((arg1[7]) + (arg2[7])));
+ x9 = ((x8 >> 58) + ((arg1[8]) + (arg2[8])));
+ x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57));
+ x11 = ((fiat_secp521r1_uint1)(x10 >> 58) +
+ (x2 & UINT64_C(0x3ffffffffffffff)));
+ x12 = (x10 & UINT64_C(0x3ffffffffffffff));
+ x13 = (x11 & UINT64_C(0x3ffffffffffffff));
+ x14 = ((fiat_secp521r1_uint1)(x11 >> 58) +
+ (x3 & UINT64_C(0x3ffffffffffffff)));
+ x15 = (x4 & UINT64_C(0x3ffffffffffffff));
+ x16 = (x5 & UINT64_C(0x3ffffffffffffff));
+ x17 = (x6 & UINT64_C(0x3ffffffffffffff));
+ x18 = (x7 & UINT64_C(0x3ffffffffffffff));
+ x19 = (x8 & UINT64_C(0x3ffffffffffffff));
+ x20 = (x9 & UINT64_C(0x1ffffffffffffff));
+ out1[0] = x12;
+ out1[1] = x13;
+ out1[2] = x14;
+ out1[3] = x15;
+ out1[4] = x16;
+ out1[5] = x17;
+ out1[6] = x18;
+ out1[7] = x19;
+ out1[8] = x20;
+}
+
+/*
+ * The function fiat_secp521r1_carry_sub subtracts two field elements.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 - eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_sub(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1,
+ const fiat_secp521r1_tight_field_element arg2)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ x1 = ((UINT64_C(0x7fffffffffffffe) + (arg1[0])) - (arg2[0]));
+ x2 = ((x1 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[1])) - (arg2[1])));
+ x3 = ((x2 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[2])) - (arg2[2])));
+ x4 = ((x3 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[3])) - (arg2[3])));
+ x5 = ((x4 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[4])) - (arg2[4])));
+ x6 = ((x5 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[5])) - (arg2[5])));
+ x7 = ((x6 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[6])) - (arg2[6])));
+ x8 = ((x7 >> 58) + ((UINT64_C(0x7fffffffffffffe) + (arg1[7])) - (arg2[7])));
+ x9 = ((x8 >> 58) + ((UINT64_C(0x3fffffffffffffe) + (arg1[8])) - (arg2[8])));
+ x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) + (x9 >> 57));
+ x11 = ((fiat_secp521r1_uint1)(x10 >> 58) +
+ (x2 & UINT64_C(0x3ffffffffffffff)));
+ x12 = (x10 & UINT64_C(0x3ffffffffffffff));
+ x13 = (x11 & UINT64_C(0x3ffffffffffffff));
+ x14 = ((fiat_secp521r1_uint1)(x11 >> 58) +
+ (x3 & UINT64_C(0x3ffffffffffffff)));
+ x15 = (x4 & UINT64_C(0x3ffffffffffffff));
+ x16 = (x5 & UINT64_C(0x3ffffffffffffff));
+ x17 = (x6 & UINT64_C(0x3ffffffffffffff));
+ x18 = (x7 & UINT64_C(0x3ffffffffffffff));
+ x19 = (x8 & UINT64_C(0x3ffffffffffffff));
+ x20 = (x9 & UINT64_C(0x1ffffffffffffff));
+ out1[0] = x12;
+ out1[1] = x13;
+ out1[2] = x14;
+ out1[3] = x15;
+ out1[4] = x16;
+ out1[5] = x17;
+ out1[6] = x18;
+ out1[7] = x19;
+ out1[8] = x20;
+}
+
+/*
+ * The function fiat_secp521r1_carry_opp negates a field element.
+ *
+ * Postconditions:
+ * eval out1 mod m = -eval arg1 mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_opp(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ x1 = (UINT64_C(0x7fffffffffffffe) - (arg1[0]));
+ x2 = ((fiat_secp521r1_uint1)(x1 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[1])));
+ x3 = ((fiat_secp521r1_uint1)(x2 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[2])));
+ x4 = ((fiat_secp521r1_uint1)(x3 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[3])));
+ x5 = ((fiat_secp521r1_uint1)(x4 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[4])));
+ x6 = ((fiat_secp521r1_uint1)(x5 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[5])));
+ x7 = ((fiat_secp521r1_uint1)(x6 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[6])));
+ x8 = ((fiat_secp521r1_uint1)(x7 >> 58) +
+ (UINT64_C(0x7fffffffffffffe) - (arg1[7])));
+ x9 = ((fiat_secp521r1_uint1)(x8 >> 58) +
+ (UINT64_C(0x3fffffffffffffe) - (arg1[8])));
+ x10 = ((x1 & UINT64_C(0x3ffffffffffffff)) +
+ (uint64_t)(fiat_secp521r1_uint1)(x9 >> 57));
+ x11 = ((fiat_secp521r1_uint1)(x10 >> 58) +
+ (x2 & UINT64_C(0x3ffffffffffffff)));
+ x12 = (x10 & UINT64_C(0x3ffffffffffffff));
+ x13 = (x11 & UINT64_C(0x3ffffffffffffff));
+ x14 = ((fiat_secp521r1_uint1)(x11 >> 58) +
+ (x3 & UINT64_C(0x3ffffffffffffff)));
+ x15 = (x4 & UINT64_C(0x3ffffffffffffff));
+ x16 = (x5 & UINT64_C(0x3ffffffffffffff));
+ x17 = (x6 & UINT64_C(0x3ffffffffffffff));
+ x18 = (x7 & UINT64_C(0x3ffffffffffffff));
+ x19 = (x8 & UINT64_C(0x3ffffffffffffff));
+ x20 = (x9 & UINT64_C(0x1ffffffffffffff));
+ out1[0] = x12;
+ out1[1] = x13;
+ out1[2] = x14;
+ out1[3] = x15;
+ out1[4] = x16;
+ out1[5] = x17;
+ out1[6] = x18;
+ out1[7] = x19;
+ out1[8] = x20;
+}
+
+/*
+ * The function fiat_secp521r1_selectznz is a multi-limb conditional select.
+ *
+ * Postconditions:
+ * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+ */
+static void
+fiat_secp521r1_selectznz(uint64_t out1[9],
+ fiat_secp521r1_uint1 arg1,
+ const uint64_t arg2[9],
+ const uint64_t arg3[9])
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ fiat_secp521r1_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
+ fiat_secp521r1_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
+ fiat_secp521r1_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
+ fiat_secp521r1_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
+ fiat_secp521r1_cmovznz_u64(&x5, arg1, (arg2[4]), (arg3[4]));
+ fiat_secp521r1_cmovznz_u64(&x6, arg1, (arg2[5]), (arg3[5]));
+ fiat_secp521r1_cmovznz_u64(&x7, arg1, (arg2[6]), (arg3[6]));
+ fiat_secp521r1_cmovznz_u64(&x8, arg1, (arg2[7]), (arg3[7]));
+ fiat_secp521r1_cmovznz_u64(&x9, arg1, (arg2[8]), (arg3[8]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+ out1[6] = x7;
+ out1[7] = x8;
+ out1[8] = x9;
+}
+
+/*
+ * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order.
+ *
+ * Postconditions:
+ * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65]
+ *
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+ */
+static void
+fiat_secp521r1_to_bytes(
+ uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1)
+{
+ uint64_t x1;
+ fiat_secp521r1_uint1 x2;
+ uint64_t x3;
+ fiat_secp521r1_uint1 x4;
+ uint64_t x5;
+ fiat_secp521r1_uint1 x6;
+ uint64_t x7;
+ fiat_secp521r1_uint1 x8;
+ uint64_t x9;
+ fiat_secp521r1_uint1 x10;
+ uint64_t x11;
+ fiat_secp521r1_uint1 x12;
+ uint64_t x13;
+ fiat_secp521r1_uint1 x14;
+ uint64_t x15;
+ fiat_secp521r1_uint1 x16;
+ uint64_t x17;
+ fiat_secp521r1_uint1 x18;
+ uint64_t x19;
+ uint64_t x20;
+ fiat_secp521r1_uint1 x21;
+ uint64_t x22;
+ fiat_secp521r1_uint1 x23;
+ uint64_t x24;
+ fiat_secp521r1_uint1 x25;
+ uint64_t x26;
+ fiat_secp521r1_uint1 x27;
+ uint64_t x28;
+ fiat_secp521r1_uint1 x29;
+ uint64_t x30;
+ fiat_secp521r1_uint1 x31;
+ uint64_t x32;
+ fiat_secp521r1_uint1 x33;
+ uint64_t x34;
+ fiat_secp521r1_uint1 x35;
+ uint64_t x36;
+ fiat_secp521r1_uint1 x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint8_t x44;
+ uint64_t x45;
+ uint8_t x46;
+ uint64_t x47;
+ uint8_t x48;
+ uint64_t x49;
+ uint8_t x50;
+ uint64_t x51;
+ uint8_t x52;
+ uint64_t x53;
+ uint8_t x54;
+ uint64_t x55;
+ uint8_t x56;
+ uint8_t x57;
+ uint64_t x58;
+ uint8_t x59;
+ uint64_t x60;
+ uint8_t x61;
+ uint64_t x62;
+ uint8_t x63;
+ uint64_t x64;
+ uint8_t x65;
+ uint64_t x66;
+ uint8_t x67;
+ uint64_t x68;
+ uint8_t x69;
+ uint64_t x70;
+ uint8_t x71;
+ uint8_t x72;
+ uint64_t x73;
+ uint8_t x74;
+ uint64_t x75;
+ uint8_t x76;
+ uint64_t x77;
+ uint8_t x78;
+ uint64_t x79;
+ uint8_t x80;
+ uint64_t x81;
+ uint8_t x82;
+ uint64_t x83;
+ uint8_t x84;
+ uint64_t x85;
+ uint8_t x86;
+ uint8_t x87;
+ uint64_t x88;
+ uint8_t x89;
+ uint64_t x90;
+ uint8_t x91;
+ uint64_t x92;
+ uint8_t x93;
+ uint64_t x94;
+ uint8_t x95;
+ uint64_t x96;
+ uint8_t x97;
+ uint64_t x98;
+ uint8_t x99;
+ uint64_t x100;
+ uint8_t x101;
+ uint8_t x102;
+ uint8_t x103;
+ uint64_t x104;
+ uint8_t x105;
+ uint64_t x106;
+ uint8_t x107;
+ uint64_t x108;
+ uint8_t x109;
+ uint64_t x110;
+ uint8_t x111;
+ uint64_t x112;
+ uint8_t x113;
+ uint64_t x114;
+ uint8_t x115;
+ uint8_t x116;
+ uint64_t x117;
+ uint8_t x118;
+ uint64_t x119;
+ uint8_t x120;
+ uint64_t x121;
+ uint8_t x122;
+ uint64_t x123;
+ uint8_t x124;
+ uint64_t x125;
+ uint8_t x126;
+ uint64_t x127;
+ uint8_t x128;
+ uint64_t x129;
+ uint8_t x130;
+ uint8_t x131;
+ uint64_t x132;
+ uint8_t x133;
+ uint64_t x134;
+ uint8_t x135;
+ uint64_t x136;
+ uint8_t x137;
+ uint64_t x138;
+ uint8_t x139;
+ uint64_t x140;
+ uint8_t x141;
+ uint64_t x142;
+ uint8_t x143;
+ uint64_t x144;
+ uint8_t x145;
+ uint8_t x146;
+ uint64_t x147;
+ uint8_t x148;
+ uint64_t x149;
+ uint8_t x150;
+ uint64_t x151;
+ uint8_t x152;
+ uint64_t x153;
+ uint8_t x154;
+ uint64_t x155;
+ uint8_t x156;
+ uint64_t x157;
+ uint8_t x158;
+ uint64_t x159;
+ uint8_t x160;
+ uint8_t x161;
+ uint8_t x162;
+ uint64_t x163;
+ uint8_t x164;
+ uint64_t x165;
+ uint8_t x166;
+ uint64_t x167;
+ uint8_t x168;
+ uint64_t x169;
+ uint8_t x170;
+ uint64_t x171;
+ uint8_t x172;
+ uint64_t x173;
+ uint8_t x174;
+ fiat_secp521r1_uint1 x175;
+ fiat_secp521r1_subborrowx_u58(&x1, &x2, 0x0, (arg1[0]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x3, &x4, x2, (arg1[1]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x5, &x6, x4, (arg1[2]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x7, &x8, x6, (arg1[3]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x9, &x10, x8, (arg1[4]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x11, &x12, x10, (arg1[5]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x13, &x14, x12, (arg1[6]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u58(&x15, &x16, x14, (arg1[7]),
+ UINT64_C(0x3ffffffffffffff));
+ fiat_secp521r1_subborrowx_u57(&x17, &x18, x16, (arg1[8]),
+ UINT64_C(0x1ffffffffffffff));
+ fiat_secp521r1_cmovznz_u64(&x19, x18, 0x0, UINT64_C(0xffffffffffffffff));
+ fiat_secp521r1_addcarryx_u58(&x20, &x21, 0x0, x1,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x22, &x23, x21, x3,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x24, &x25, x23, x5,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x26, &x27, x25, x7,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x28, &x29, x27, x9,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x30, &x31, x29, x11,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x32, &x33, x31, x13,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u58(&x34, &x35, x33, x15,
+ (x19 & UINT64_C(0x3ffffffffffffff)));
+ fiat_secp521r1_addcarryx_u57(&x36, &x37, x35, x17,
+ (x19 & UINT64_C(0x1ffffffffffffff)));
+ x38 = (x34 << 6);
+ x39 = (x32 << 4);
+ x40 = (x30 << 2);
+ x41 = (x26 << 6);
+ x42 = (x24 << 4);
+ x43 = (x22 << 2);
+ x44 = (uint8_t)(x20 & UINT8_C(0xff));
+ x45 = (x20 >> 8);
+ x46 = (uint8_t)(x45 & UINT8_C(0xff));
+ x47 = (x45 >> 8);
+ x48 = (uint8_t)(x47 & UINT8_C(0xff));
+ x49 = (x47 >> 8);
+ x50 = (uint8_t)(x49 & UINT8_C(0xff));
+ x51 = (x49 >> 8);
+ x52 = (uint8_t)(x51 & UINT8_C(0xff));
+ x53 = (x51 >> 8);
+ x54 = (uint8_t)(x53 & UINT8_C(0xff));
+ x55 = (x53 >> 8);
+ x56 = (uint8_t)(x55 & UINT8_C(0xff));
+ x57 = (uint8_t)(x55 >> 8);
+ x58 = (x43 + (uint64_t)x57);
+ x59 = (uint8_t)(x58 & UINT8_C(0xff));
+ x60 = (x58 >> 8);
+ x61 = (uint8_t)(x60 & UINT8_C(0xff));
+ x62 = (x60 >> 8);
+ x63 = (uint8_t)(x62 & UINT8_C(0xff));
+ x64 = (x62 >> 8);
+ x65 = (uint8_t)(x64 & UINT8_C(0xff));
+ x66 = (x64 >> 8);
+ x67 = (uint8_t)(x66 & UINT8_C(0xff));
+ x68 = (x66 >> 8);
+ x69 = (uint8_t)(x68 & UINT8_C(0xff));
+ x70 = (x68 >> 8);
+ x71 = (uint8_t)(x70 & UINT8_C(0xff));
+ x72 = (uint8_t)(x70 >> 8);
+ x73 = (x42 + (uint64_t)x72);
+ x74 = (uint8_t)(x73 & UINT8_C(0xff));
+ x75 = (x73 >> 8);
+ x76 = (uint8_t)(x75 & UINT8_C(0xff));
+ x77 = (x75 >> 8);
+ x78 = (uint8_t)(x77 & UINT8_C(0xff));
+ x79 = (x77 >> 8);
+ x80 = (uint8_t)(x79 & UINT8_C(0xff));
+ x81 = (x79 >> 8);
+ x82 = (uint8_t)(x81 & UINT8_C(0xff));
+ x83 = (x81 >> 8);
+ x84 = (uint8_t)(x83 & UINT8_C(0xff));
+ x85 = (x83 >> 8);
+ x86 = (uint8_t)(x85 & UINT8_C(0xff));
+ x87 = (uint8_t)(x85 >> 8);
+ x88 = (x41 + (uint64_t)x87);
+ x89 = (uint8_t)(x88 & UINT8_C(0xff));
+ x90 = (x88 >> 8);
+ x91 = (uint8_t)(x90 & UINT8_C(0xff));
+ x92 = (x90 >> 8);
+ x93 = (uint8_t)(x92 & UINT8_C(0xff));
+ x94 = (x92 >> 8);
+ x95 = (uint8_t)(x94 & UINT8_C(0xff));
+ x96 = (x94 >> 8);
+ x97 = (uint8_t)(x96 & UINT8_C(0xff));
+ x98 = (x96 >> 8);
+ x99 = (uint8_t)(x98 & UINT8_C(0xff));
+ x100 = (x98 >> 8);
+ x101 = (uint8_t)(x100 & UINT8_C(0xff));
+ x102 = (uint8_t)(x100 >> 8);
+ x103 = (uint8_t)(x28 & UINT8_C(0xff));
+ x104 = (x28 >> 8);
+ x105 = (uint8_t)(x104 & UINT8_C(0xff));
+ x106 = (x104 >> 8);
+ x107 = (uint8_t)(x106 & UINT8_C(0xff));
+ x108 = (x106 >> 8);
+ x109 = (uint8_t)(x108 & UINT8_C(0xff));
+ x110 = (x108 >> 8);
+ x111 = (uint8_t)(x110 & UINT8_C(0xff));
+ x112 = (x110 >> 8);
+ x113 = (uint8_t)(x112 & UINT8_C(0xff));
+ x114 = (x112 >> 8);
+ x115 = (uint8_t)(x114 & UINT8_C(0xff));
+ x116 = (uint8_t)(x114 >> 8);
+ x117 = (x40 + (uint64_t)x116);
+ x118 = (uint8_t)(x117 & UINT8_C(0xff));
+ x119 = (x117 >> 8);
+ x120 = (uint8_t)(x119 & UINT8_C(0xff));
+ x121 = (x119 >> 8);
+ x122 = (uint8_t)(x121 & UINT8_C(0xff));
+ x123 = (x121 >> 8);
+ x124 = (uint8_t)(x123 & UINT8_C(0xff));
+ x125 = (x123 >> 8);
+ x126 = (uint8_t)(x125 & UINT8_C(0xff));
+ x127 = (x125 >> 8);
+ x128 = (uint8_t)(x127 & UINT8_C(0xff));
+ x129 = (x127 >> 8);
+ x130 = (uint8_t)(x129 & UINT8_C(0xff));
+ x131 = (uint8_t)(x129 >> 8);
+ x132 = (x39 + (uint64_t)x131);
+ x133 = (uint8_t)(x132 & UINT8_C(0xff));
+ x134 = (x132 >> 8);
+ x135 = (uint8_t)(x134 & UINT8_C(0xff));
+ x136 = (x134 >> 8);
+ x137 = (uint8_t)(x136 & UINT8_C(0xff));
+ x138 = (x136 >> 8);
+ x139 = (uint8_t)(x138 & UINT8_C(0xff));
+ x140 = (x138 >> 8);
+ x141 = (uint8_t)(x140 & UINT8_C(0xff));
+ x142 = (x140 >> 8);
+ x143 = (uint8_t)(x142 & UINT8_C(0xff));
+ x144 = (x142 >> 8);
+ x145 = (uint8_t)(x144 & UINT8_C(0xff));
+ x146 = (uint8_t)(x144 >> 8);
+ x147 = (x38 + (uint64_t)x146);
+ x148 = (uint8_t)(x147 & UINT8_C(0xff));
+ x149 = (x147 >> 8);
+ x150 = (uint8_t)(x149 & UINT8_C(0xff));
+ x151 = (x149 >> 8);
+ x152 = (uint8_t)(x151 & UINT8_C(0xff));
+ x153 = (x151 >> 8);
+ x154 = (uint8_t)(x153 & UINT8_C(0xff));
+ x155 = (x153 >> 8);
+ x156 = (uint8_t)(x155 & UINT8_C(0xff));
+ x157 = (x155 >> 8);
+ x158 = (uint8_t)(x157 & UINT8_C(0xff));
+ x159 = (x157 >> 8);
+ x160 = (uint8_t)(x159 & UINT8_C(0xff));
+ x161 = (uint8_t)(x159 >> 8);
+ x162 = (uint8_t)(x36 & UINT8_C(0xff));
+ x163 = (x36 >> 8);
+ x164 = (uint8_t)(x163 & UINT8_C(0xff));
+ x165 = (x163 >> 8);
+ x166 = (uint8_t)(x165 & UINT8_C(0xff));
+ x167 = (x165 >> 8);
+ x168 = (uint8_t)(x167 & UINT8_C(0xff));
+ x169 = (x167 >> 8);
+ x170 = (uint8_t)(x169 & UINT8_C(0xff));
+ x171 = (x169 >> 8);
+ x172 = (uint8_t)(x171 & UINT8_C(0xff));
+ x173 = (x171 >> 8);
+ x174 = (uint8_t)(x173 & UINT8_C(0xff));
+ x175 = (fiat_secp521r1_uint1)(x173 >> 8);
+ out1[0] = x44;
+ out1[1] = x46;
+ out1[2] = x48;
+ out1[3] = x50;
+ out1[4] = x52;
+ out1[5] = x54;
+ out1[6] = x56;
+ out1[7] = x59;
+ out1[8] = x61;
+ out1[9] = x63;
+ out1[10] = x65;
+ out1[11] = x67;
+ out1[12] = x69;
+ out1[13] = x71;
+ out1[14] = x74;
+ out1[15] = x76;
+ out1[16] = x78;
+ out1[17] = x80;
+ out1[18] = x82;
+ out1[19] = x84;
+ out1[20] = x86;
+ out1[21] = x89;
+ out1[22] = x91;
+ out1[23] = x93;
+ out1[24] = x95;
+ out1[25] = x97;
+ out1[26] = x99;
+ out1[27] = x101;
+ out1[28] = x102;
+ out1[29] = x103;
+ out1[30] = x105;
+ out1[31] = x107;
+ out1[32] = x109;
+ out1[33] = x111;
+ out1[34] = x113;
+ out1[35] = x115;
+ out1[36] = x118;
+ out1[37] = x120;
+ out1[38] = x122;
+ out1[39] = x124;
+ out1[40] = x126;
+ out1[41] = x128;
+ out1[42] = x130;
+ out1[43] = x133;
+ out1[44] = x135;
+ out1[45] = x137;
+ out1[46] = x139;
+ out1[47] = x141;
+ out1[48] = x143;
+ out1[49] = x145;
+ out1[50] = x148;
+ out1[51] = x150;
+ out1[52] = x152;
+ out1[53] = x154;
+ out1[54] = x156;
+ out1[55] = x158;
+ out1[56] = x160;
+ out1[57] = x161;
+ out1[58] = x162;
+ out1[59] = x164;
+ out1[60] = x166;
+ out1[61] = x168;
+ out1[62] = x170;
+ out1[63] = x172;
+ out1[64] = x174;
+ out1[65] = x175;
+}
+
+/*
+ * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order.
+ *
+ * Postconditions:
+ * eval out1 mod m = bytes_eval arg1 mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+ */
+static void
+fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1,
+ const uint8_t arg1[66])
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint8_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ uint64_t x21;
+ uint64_t x22;
+ uint64_t x23;
+ uint64_t x24;
+ uint64_t x25;
+ uint64_t x26;
+ uint64_t x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint8_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ uint64_t x45;
+ uint64_t x46;
+ uint64_t x47;
+ uint64_t x48;
+ uint64_t x49;
+ uint64_t x50;
+ uint64_t x51;
+ uint64_t x52;
+ uint64_t x53;
+ uint64_t x54;
+ uint64_t x55;
+ uint64_t x56;
+ uint64_t x57;
+ uint64_t x58;
+ uint64_t x59;
+ uint64_t x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint8_t x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint8_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ uint64_t x82;
+ uint64_t x83;
+ uint8_t x84;
+ uint64_t x85;
+ uint64_t x86;
+ uint64_t x87;
+ uint64_t x88;
+ uint64_t x89;
+ uint64_t x90;
+ uint64_t x91;
+ uint64_t x92;
+ uint8_t x93;
+ uint64_t x94;
+ uint64_t x95;
+ uint64_t x96;
+ uint64_t x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ uint64_t x101;
+ uint64_t x102;
+ uint64_t x103;
+ uint64_t x104;
+ uint64_t x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint8_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ uint64_t x117;
+ uint8_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ uint64_t x121;
+ uint64_t x122;
+ uint64_t x123;
+ uint64_t x124;
+ uint64_t x125;
+ uint64_t x126;
+ uint8_t x127;
+ uint64_t x128;
+ uint64_t x129;
+ uint64_t x130;
+ uint64_t x131;
+ uint64_t x132;
+ uint64_t x133;
+ uint64_t x134;
+ uint64_t x135;
+ uint64_t x136;
+ uint64_t x137;
+ uint64_t x138;
+ uint64_t x139;
+ uint64_t x140;
+ uint64_t x141;
+ x1 = ((uint64_t)(fiat_secp521r1_uint1)(arg1[65]) << 56);
+ x2 = ((uint64_t)(arg1[64]) << 48);
+ x3 = ((uint64_t)(arg1[63]) << 40);
+ x4 = ((uint64_t)(arg1[62]) << 32);
+ x5 = ((uint64_t)(arg1[61]) << 24);
+ x6 = ((uint64_t)(arg1[60]) << 16);
+ x7 = ((uint64_t)(arg1[59]) << 8);
+ x8 = (arg1[58]);
+ x9 = ((uint64_t)(arg1[57]) << 50);
+ x10 = ((uint64_t)(arg1[56]) << 42);
+ x11 = ((uint64_t)(arg1[55]) << 34);
+ x12 = ((uint64_t)(arg1[54]) << 26);
+ x13 = ((uint64_t)(arg1[53]) << 18);
+ x14 = ((uint64_t)(arg1[52]) << 10);
+ x15 = ((uint64_t)(arg1[51]) << 2);
+ x16 = ((uint64_t)(arg1[50]) << 52);
+ x17 = ((uint64_t)(arg1[49]) << 44);
+ x18 = ((uint64_t)(arg1[48]) << 36);
+ x19 = ((uint64_t)(arg1[47]) << 28);
+ x20 = ((uint64_t)(arg1[46]) << 20);
+ x21 = ((uint64_t)(arg1[45]) << 12);
+ x22 = ((uint64_t)(arg1[44]) << 4);
+ x23 = ((uint64_t)(arg1[43]) << 54);
+ x24 = ((uint64_t)(arg1[42]) << 46);
+ x25 = ((uint64_t)(arg1[41]) << 38);
+ x26 = ((uint64_t)(arg1[40]) << 30);
+ x27 = ((uint64_t)(arg1[39]) << 22);
+ x28 = ((uint64_t)(arg1[38]) << 14);
+ x29 = ((uint64_t)(arg1[37]) << 6);
+ x30 = ((uint64_t)(arg1[36]) << 56);
+ x31 = ((uint64_t)(arg1[35]) << 48);
+ x32 = ((uint64_t)(arg1[34]) << 40);
+ x33 = ((uint64_t)(arg1[33]) << 32);
+ x34 = ((uint64_t)(arg1[32]) << 24);
+ x35 = ((uint64_t)(arg1[31]) << 16);
+ x36 = ((uint64_t)(arg1[30]) << 8);
+ x37 = (arg1[29]);
+ x38 = ((uint64_t)(arg1[28]) << 50);
+ x39 = ((uint64_t)(arg1[27]) << 42);
+ x40 = ((uint64_t)(arg1[26]) << 34);
+ x41 = ((uint64_t)(arg1[25]) << 26);
+ x42 = ((uint64_t)(arg1[24]) << 18);
+ x43 = ((uint64_t)(arg1[23]) << 10);
+ x44 = ((uint64_t)(arg1[22]) << 2);
+ x45 = ((uint64_t)(arg1[21]) << 52);
+ x46 = ((uint64_t)(arg1[20]) << 44);
+ x47 = ((uint64_t)(arg1[19]) << 36);
+ x48 = ((uint64_t)(arg1[18]) << 28);
+ x49 = ((uint64_t)(arg1[17]) << 20);
+ x50 = ((uint64_t)(arg1[16]) << 12);
+ x51 = ((uint64_t)(arg1[15]) << 4);
+ x52 = ((uint64_t)(arg1[14]) << 54);
+ x53 = ((uint64_t)(arg1[13]) << 46);
+ x54 = ((uint64_t)(arg1[12]) << 38);
+ x55 = ((uint64_t)(arg1[11]) << 30);
+ x56 = ((uint64_t)(arg1[10]) << 22);
+ x57 = ((uint64_t)(arg1[9]) << 14);
+ x58 = ((uint64_t)(arg1[8]) << 6);
+ x59 = ((uint64_t)(arg1[7]) << 56);
+ x60 = ((uint64_t)(arg1[6]) << 48);
+ x61 = ((uint64_t)(arg1[5]) << 40);
+ x62 = ((uint64_t)(arg1[4]) << 32);
+ x63 = ((uint64_t)(arg1[3]) << 24);
+ x64 = ((uint64_t)(arg1[2]) << 16);
+ x65 = ((uint64_t)(arg1[1]) << 8);
+ x66 = (arg1[0]);
+ x67 = (x65 + (uint64_t)x66);
+ x68 = (x64 + x67);
+ x69 = (x63 + x68);
+ x70 = (x62 + x69);
+ x71 = (x61 + x70);
+ x72 = (x60 + x71);
+ x73 = (x59 + x72);
+ x74 = (x73 & UINT64_C(0x3ffffffffffffff));
+ x75 = (uint8_t)(x73 >> 58);
+ x76 = (x58 + (uint64_t)x75);
+ x77 = (x57 + x76);
+ x78 = (x56 + x77);
+ x79 = (x55 + x78);
+ x80 = (x54 + x79);
+ x81 = (x53 + x80);
+ x82 = (x52 + x81);
+ x83 = (x82 & UINT64_C(0x3ffffffffffffff));
+ x84 = (uint8_t)(x82 >> 58);
+ x85 = (x51 + (uint64_t)x84);
+ x86 = (x50 + x85);
+ x87 = (x49 + x86);
+ x88 = (x48 + x87);
+ x89 = (x47 + x88);
+ x90 = (x46 + x89);
+ x91 = (x45 + x90);
+ x92 = (x91 & UINT64_C(0x3ffffffffffffff));
+ x93 = (uint8_t)(x91 >> 58);
+ x94 = (x44 + (uint64_t)x93);
+ x95 = (x43 + x94);
+ x96 = (x42 + x95);
+ x97 = (x41 + x96);
+ x98 = (x40 + x97);
+ x99 = (x39 + x98);
+ x100 = (x38 + x99);
+ x101 = (x36 + (uint64_t)x37);
+ x102 = (x35 + x101);
+ x103 = (x34 + x102);
+ x104 = (x33 + x103);
+ x105 = (x32 + x104);
+ x106 = (x31 + x105);
+ x107 = (x30 + x106);
+ x108 = (x107 & UINT64_C(0x3ffffffffffffff));
+ x109 = (uint8_t)(x107 >> 58);
+ x110 = (x29 + (uint64_t)x109);
+ x111 = (x28 + x110);
+ x112 = (x27 + x111);
+ x113 = (x26 + x112);
+ x114 = (x25 + x113);
+ x115 = (x24 + x114);
+ x116 = (x23 + x115);
+ x117 = (x116 & UINT64_C(0x3ffffffffffffff));
+ x118 = (uint8_t)(x116 >> 58);
+ x119 = (x22 + (uint64_t)x118);
+ x120 = (x21 + x119);
+ x121 = (x20 + x120);
+ x122 = (x19 + x121);
+ x123 = (x18 + x122);
+ x124 = (x17 + x123);
+ x125 = (x16 + x124);
+ x126 = (x125 & UINT64_C(0x3ffffffffffffff));
+ x127 = (uint8_t)(x125 >> 58);
+ x128 = (x15 + (uint64_t)x127);
+ x129 = (x14 + x128);
+ x130 = (x13 + x129);
+ x131 = (x12 + x130);
+ x132 = (x11 + x131);
+ x133 = (x10 + x132);
+ x134 = (x9 + x133);
+ x135 = (x7 + (uint64_t)x8);
+ x136 = (x6 + x135);
+ x137 = (x5 + x136);
+ x138 = (x4 + x137);
+ x139 = (x3 + x138);
+ x140 = (x2 + x139);
+ x141 = (x1 + x140);
+ out1[0] = x74;
+ out1[1] = x83;
+ out1[2] = x92;
+ out1[3] = x100;
+ out1[4] = x108;
+ out1[5] = x117;
+ out1[6] = x126;
+ out1[7] = x134;
+ out1[8] = x141;
+}
+
+/* END verbatim fiat code */
+
+/* curve-related constants */
+
+static const limb_t const_one[9] = {
+ UINT64_C(0x0000000000000001), UINT64_C(0x0000000000000000),
+ UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000),
+ UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000),
+ UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000),
+ UINT64_C(0x0000000000000000)
+};
+
+static const limb_t const_b[9] = {
+ UINT64_C(0x03451FD46B503F00), UINT64_C(0x00F7E20F4B0D3C7B),
+ UINT64_C(0x000BD3BB1BF07357), UINT64_C(0x0147B1FA4DEC594B),
+ UINT64_C(0x018EF109E1561939), UINT64_C(0x026CC57CEE2D2264),
+ UINT64_C(0x00540EEA2DA725B9), UINT64_C(0x02687E4A688682DA),
+ UINT64_C(0x0051953EB9618E1C)
+};
+
+/* LUT for scalar multiplication by comb interleaving */
+static const pt_aff_t lut_cmb[13][16] = {
+ {
+ { { UINT64_C(0x017E7E31C2E5BD66), UINT64_C(0x022CF0615A90A6FE),
+ UINT64_C(0x00127A2FFA8DE334), UINT64_C(0x01DFBF9D64A3F877),
+ UINT64_C(0x006B4D3DBAA14B5E), UINT64_C(0x014FED487E0A2BD8),
+ UINT64_C(0x015B4429C6481390), UINT64_C(0x03A73678FB2D988E),
+ UINT64_C(0x00C6858E06B70404) },
+ { UINT64_C(0x00BE94769FD16650), UINT64_C(0x031C21A89CB09022),
+ UINT64_C(0x039013FAD0761353), UINT64_C(0x02657BD099031542),
+ UINT64_C(0x03273E662C97EE72), UINT64_C(0x01E6D11A05EBEF45),
+ UINT64_C(0x03D1BD998F544495), UINT64_C(0x03001172297ED0B1),
+ UINT64_C(0x011839296A789A3B) } },
+ { { UINT64_C(0x01919D2EDE37AD7D), UINT64_C(0x0124218B0CBA8169),
+ UINT64_C(0x03D16B59FE21BAEB), UINT64_C(0x0128E920C814769A),
+ UINT64_C(0x012D7A8DD1AD3F16), UINT64_C(0x008F66AE796B5E84),
+ UINT64_C(0x0159479B52A6E5B1), UINT64_C(0x0065776475A992D6),
+ UINT64_C(0x01A73D352443DE29) },
+ { UINT64_C(0x03588CA1EE86C0E5), UINT64_C(0x01726F24E9641097),
+ UINT64_C(0x00ED1DEC3C70CF10), UINT64_C(0x033E3715D6C0B56B),
+ UINT64_C(0x03A355CEEC2E2DD4), UINT64_C(0x02A740C5F4BE2AC7),
+ UINT64_C(0x03814F2F1557FA82), UINT64_C(0x0377665E7E1B1B2A),
+ UINT64_C(0x013E9B03B97DFA62) } },
+ { { UINT64_C(0x01AB5096EC8F3078), UINT64_C(0x01F879B624C5CE35),
+ UINT64_C(0x03EAF137E79A329D), UINT64_C(0x01B578C0508DC44B),
+ UINT64_C(0x00F177ACE4383C0C), UINT64_C(0x014FC34933C0F6AE),
+ UINT64_C(0x00EB0BF7A596EFDB), UINT64_C(0x00CB1CF6F0CE4701),
+ UINT64_C(0x00652BF3C52927A4) },
+ { UINT64_C(0x033CC3E8DEB090CB), UINT64_C(0x0001C95CD53DFE05),
+ UINT64_C(0x000211CF5FF79D1F), UINT64_C(0x03241CB3CDD0C455),
+ UINT64_C(0x01A0347087BB6897), UINT64_C(0x01CB80147B7605F2),
+ UINT64_C(0x00112911CD8FE8E8), UINT64_C(0x035BB228ADCC452A),
+ UINT64_C(0x015BE6EF1BDD6601) } },
+ { { UINT64_C(0x01CEAD882816ECD4), UINT64_C(0x014FD43F70986680),
+ UINT64_C(0x01F30DCE3BBC46F9), UINT64_C(0x002AFF1A6363269B),
+ UINT64_C(0x02F7114C5D8C308D), UINT64_C(0x01520C8A3C0634B0),
+ UINT64_C(0x0073A0C5F22E0E8F), UINT64_C(0x018D1BBAD97F682C),
+ UINT64_C(0x0056D5D1D99D5B7F) },
+ { UINT64_C(0x006B8BC90525251B), UINT64_C(0x019C4A9777BF1ED7),
+ UINT64_C(0x0234591CE1A5F9E7), UINT64_C(0x024F37B278AE548E),
+ UINT64_C(0x0226CBDE556BD0F2), UINT64_C(0x02093C375C76F662),
+ UINT64_C(0x0168478B5C582D02), UINT64_C(0x0284434760C5E8E7),
+ UINT64_C(0x003D2D1B7D9BAAA2) } },
+ { { UINT64_C(0x0345627967CBE207), UINT64_C(0x002EAF61734A1987),
+ UINT64_C(0x016DF725A318F4F5), UINT64_C(0x00E584D368D7CF15),
+ UINT64_C(0x01B8C6B6657429E1), UINT64_C(0x0221D1A64B12AC51),
+ UINT64_C(0x016D488ED34541B9), UINT64_C(0x00609A8BD6FC55C5),
+ UINT64_C(0x01585389E359E1E2) },
+ { UINT64_C(0x02A0EA86B9AD2A4E), UINT64_C(0x030ABA4A2203CD0E),
+ UINT64_C(0x02ECF4ABFD87D736), UINT64_C(0x01D5815EB2103FD5),
+ UINT64_C(0x023DDB446E0D69E5), UINT64_C(0x03873AEDB2096E89),
+ UINT64_C(0x02E938E3088A654E), UINT64_C(0x03CE7C2D5555E89E),
+ UINT64_C(0x002A2E618C9A8AED) } },
+ { { UINT64_C(0x00C0E02DDA0CDB9A), UINT64_C(0x030093E9326A40BB),
+ UINT64_C(0x01AEBE3191085015), UINT64_C(0x00CC998F686F466C),
+ UINT64_C(0x00F2991652F3DBC5), UINT64_C(0x0305E12550FBCB15),
+ UINT64_C(0x00315CFED5DC7ED7), UINT64_C(0x03FD51BC68E55CED),
+ UINT64_C(0x008A75841259FDED) },
+ { UINT64_C(0x00874F92CE48C808), UINT64_C(0x032038FD2066D756),
+ UINT64_C(0x0331914A95336DCA), UINT64_C(0x003A2D0A92ACE248),
+ UINT64_C(0x00E0B9B82B1BC8A9), UINT64_C(0x002F4124FB4BA575),
+ UINT64_C(0x00FB2293ADD56621), UINT64_C(0x00A6127432A1DC15),
+ UINT64_C(0x0096FB303FCBBA21) } },
+ { { UINT64_C(0x0087848D32FBCDA7), UINT64_C(0x030EC02ACE3BFE06),
+ UINT64_C(0x025E79AB88EE94BE), UINT64_C(0x002380F265A8D542),
+ UINT64_C(0x02AF5B866132C459), UINT64_C(0x006D308E13BB74AF),
+ UINT64_C(0x024861A93F736CDE), UINT64_C(0x02B6735E1974AD24),
+ UINT64_C(0x007E3E98F984C396) },
+ { UINT64_C(0x011A01FB022A71C9), UINT64_C(0x027AABE445FA7DCA),
+ UINT64_C(0x01D351CBFBBC3619), UINT64_C(0x0160E2F1D8FC9B7F),
+ UINT64_C(0x025C1E212AC1BD5D), UINT64_C(0x03550871A71E99EB),
+ UINT64_C(0x02D5A08CED50A386), UINT64_C(0x03B6A468649B6A8F),
+ UINT64_C(0x0108EE58EB6D781F) } },
+ { { UINT64_C(0x01AFE337BCB8DB55), UINT64_C(0x0365A6078FE4AF7A),
+ UINT64_C(0x03D1C8FC0331D9B8), UINT64_C(0x009F6F403FF9E1D6),
+ UINT64_C(0x02DF128E11B91CCE), UINT64_C(0x01028214B5A5ED4C),
+ UINT64_C(0x014300FB8FBCC30B), UINT64_C(0x0197C105563F151B),
+ UINT64_C(0x006B6AD89ABCB924) },
+ { UINT64_C(0x02343480A1475465), UINT64_C(0x036433111AAF7655),
+ UINT64_C(0x022232C96C99246F), UINT64_C(0x0322651C2A008523),
+ UINT64_C(0x0197485ED57E9062), UINT64_C(0x02B4832E92D8841A),
+ UINT64_C(0x02DBF63DF0496A9B), UINT64_C(0x0075A9F399348CCF),
+ UINT64_C(0x01B468DA27157139) } },
+ { { UINT64_C(0x02F817A853110AE0), UINT64_C(0x00C10ABC3469041D),
+ UINT64_C(0x0399B5681380FF8C), UINT64_C(0x0399D3F80A1F7D39),
+ UINT64_C(0x0269250858760A69), UINT64_C(0x03E8ACED3599493C),
+ UINT64_C(0x023906A99EE9E269), UINT64_C(0x03684E82E1D19164),
+ UINT64_C(0x01B00DDB707F130E) },
+ { UINT64_C(0x01B9CB7C70E64647), UINT64_C(0x00156530ADD57D4D),
+ UINT64_C(0x0357F16ADF420E69), UINT64_C(0x013BDB742FC34BD9),
+ UINT64_C(0x0322A1323DF9DA56), UINT64_C(0x01A6442A635A2B0A),
+ UINT64_C(0x01DD106B799534CF), UINT64_C(0x01DB6F04475392BB),
+ UINT64_C(0x0085683F1D7DB165) } },
+ { { UINT64_C(0x00FF0B2418D6A19B), UINT64_C(0x03D0C79C96EF791E),
+ UINT64_C(0x0157D7A45970DFEC), UINT64_C(0x0258D899A59E48C9),
+ UINT64_C(0x033790E7F1FA3B30), UINT64_C(0x0177D51FBFFC2B36),
+ UINT64_C(0x021A07245B77E075), UINT64_C(0x00D21F03E5230B56),
+ UINT64_C(0x00998DCCE486419C) },
+ { UINT64_C(0x01091A695BFD0575), UINT64_C(0x013627AA7EFF912A),
+ UINT64_C(0x039991631C377F5A), UINT64_C(0x00FFCBAE33E6C3B0),
+ UINT64_C(0x036545772773AD96), UINT64_C(0x02DEF3D2B3143BB8),
+ UINT64_C(0x01B245D67D28AEE2), UINT64_C(0x03B5730E50925D4D),
+ UINT64_C(0x0137D5DA0626A021) } },
+ { { UINT64_C(0x02EF399693C8C9ED), UINT64_C(0x032480E4E91B4B50),
+ UINT64_C(0x03EAED827D75B37A), UINT64_C(0x02B9358A8C276525),
+ UINT64_C(0x019C467FA946257E), UINT64_C(0x03B457A606548F9D),
+ UINT64_C(0x02D3B10268BB98C2), UINT64_C(0x034BECF321542167),
+ UINT64_C(0x01A1CBB2C11A742B) },
+ { UINT64_C(0x020BC43C9CBA4DF5), UINT64_C(0x02C3C5D92732D879),
+ UINT64_C(0x03A372C63EEC57C9), UINT64_C(0x014F6920CA56FAD0),
+ UINT64_C(0x036BAFA7F7DF741A), UINT64_C(0x01464F9B06028A5B),
+ UINT64_C(0x000CE62E83C0059C), UINT64_C(0x00F520B04B69F179),
+ UINT64_C(0x011A209D7D4F8EEB) } },
+ { { UINT64_C(0x01C6A5ECE2AF535C), UINT64_C(0x007C6B09AB9601A8),
+ UINT64_C(0x038E9A5EC53E207E), UINT64_C(0x03F26BD6C2BFA78F),
+ UINT64_C(0x010CDD45101F6F83), UINT64_C(0x0217ECA0924348D3),
+ UINT64_C(0x0147B8EEE7A39BA7), UINT64_C(0x024DDB6C72B3B17D),
+ UINT64_C(0x01AE0B275D729015) },
+ { UINT64_C(0x0015C3536FA0D000), UINT64_C(0x02D1142A348E15B6),
+ UINT64_C(0x0327BB07DD0C2213), UINT64_C(0x0187BA5FF3D0F09E),
+ UINT64_C(0x0044C2DC0E108433), UINT64_C(0x0034160CAD0C591E),
+ UINT64_C(0x028471C7D759FF89), UINT64_C(0x00E019A28A163F01),
+ UINT64_C(0x00F2C97A825E5385) } },
+ { { UINT64_C(0x038C2460BF70ACE0), UINT64_C(0x0383AC70974FEC4F),
+ UINT64_C(0x03E2AA648FF27E41), UINT64_C(0x0245F0DBB9355BA1),
+ UINT64_C(0x005499994AA91856), UINT64_C(0x006C41EC471DCB23),
+ UINT64_C(0x01FF9D2007310265), UINT64_C(0x0060D28D61D29BD7),
+ UINT64_C(0x0154E84C6D5C5A9A) },
+ { UINT64_C(0x0325BCE404C78230), UINT64_C(0x038A9519CB9ADB50),
+ UINT64_C(0x0370A6A5972F5EED), UINT64_C(0x00D5CBEF06834788),
+ UINT64_C(0x00151666A6DEE354), UINT64_C(0x0008A831FD9B0A22),
+ UINT64_C(0x0360D3F15A923EB0), UINT64_C(0x011CEB88A8A3E02E),
+ UINT64_C(0x00CD0FDCE9171910) } },
+ { { UINT64_C(0x017643017002D68B), UINT64_C(0x01581124BB115A0D),
+ UINT64_C(0x03AEDA0D3163CB21), UINT64_C(0x00F69C67520D44D4),
+ UINT64_C(0x03E135854D80B212), UINT64_C(0x0393E18B0CFCD461),
+ UINT64_C(0x01E646F8739535D0), UINT64_C(0x02DA9D8A9353AE22),
+ UINT64_C(0x0160373EDF8218F9) },
+ { UINT64_C(0x03E6AECA5D90B740), UINT64_C(0x03FF9C27516B2CFC),
+ UINT64_C(0x034F4A8BB572E463), UINT64_C(0x007B64BAF1504EE1),
+ UINT64_C(0x021A1B22011EFA49), UINT64_C(0x03D4B0EED295BDE3),
+ UINT64_C(0x006A3FA9FD193C5C), UINT64_C(0x038717960A1006B0),
+ UINT64_C(0x00F1597050014DCF) } },
+ { { UINT64_C(0x003927618EDA25DC), UINT64_C(0x0361657547DB658B),
+ UINT64_C(0x02B8E847FFB9EF33), UINT64_C(0x001A1DB5CA45000E),
+ UINT64_C(0x037664A1305CA9BC), UINT64_C(0x0218997B0A2FBCE3),
+ UINT64_C(0x01A085FF9F45131E), UINT64_C(0x00A1F6CF07EFF2D9),
+ UINT64_C(0x0174C644D6C94B68) },
+ { UINT64_C(0x007BBBC4821A0C30), UINT64_C(0x02649F09BAEFEF46),
+ UINT64_C(0x0332D706D303F067), UINT64_C(0x0254B383642D4309),
+ UINT64_C(0x0395AD34B7BE0E21), UINT64_C(0x02D9107F2D73D7AD),
+ UINT64_C(0x037B7820233EF8FC), UINT64_C(0x0279A016B3256D06),
+ UINT64_C(0x011AF3A7C2F87F41) } },
+ { { UINT64_C(0x0257D0E0C16A8803), UINT64_C(0x03ED792238920488),
+ UINT64_C(0x001AC09CD6B220DC), UINT64_C(0x02A4132750A7F053),
+ UINT64_C(0x00A5E7726CD65543), UINT64_C(0x01F0A9985C982A0F),
+ UINT64_C(0x0307B7DB57458965), UINT64_C(0x01985401A96336DC),
+ UINT64_C(0x00D8E9920CF30F0C) },
+ { UINT64_C(0x024677C739792D19), UINT64_C(0x02F65F1ED50C62B2),
+ UINT64_C(0x0068CAE4CC263AA1), UINT64_C(0x00C913451E404E6A),
+ UINT64_C(0x00BED1AA30F76B8C), UINT64_C(0x03C4320182BBEDCB),
+ UINT64_C(0x00A30EC8B5406328), UINT64_C(0x00E61F7C2704E885),
+ UINT64_C(0x0127B023B5454A66) } },
+ },
+ {
+ { { UINT64_C(0x00E9E114E43C6A8B), UINT64_C(0x027E2C20105A2E23),
+ UINT64_C(0x03D5B5FA745094EE), UINT64_C(0x01337080223BD7FF),
+ UINT64_C(0x00D8CCA5AD4589D8), UINT64_C(0x0132DCA140336E19),
+ UINT64_C(0x0302098FAB8EE167), UINT64_C(0x00625B5791BF1AAD),
+ UINT64_C(0x01ECCAEB2EF79CDB) },
+ { UINT64_C(0x01886BBC26B04438), UINT64_C(0x004F43B6559C663D),
+ UINT64_C(0x035D8CA99B91E616), UINT64_C(0x01354ED06659D27A),
+ UINT64_C(0x0054DF4765586194), UINT64_C(0x021052BBF70881C7),
+ UINT64_C(0x031C5EA1F6288A8B), UINT64_C(0x018AC1ACD36CBDFF),
+ UINT64_C(0x002E5EDF2873FF52) } },
+ { { UINT64_C(0x0192DA26804ED5E3), UINT64_C(0x019DEC17F31925DE),
+ UINT64_C(0x01585208EBD95AC4), UINT64_C(0x039C6674D066C682),
+ UINT64_C(0x000715A11D1C0CFA), UINT64_C(0x032AD56C1F218BD5),
+ UINT64_C(0x0310FABD23E934F9), UINT64_C(0x009AF7F574942B50),
+ UINT64_C(0x005E0976782CAEF4) },
+ { UINT64_C(0x038B0A7A2A7D5B37), UINT64_C(0x0315653FB7DA77BD),
+ UINT64_C(0x023F157F76616F31), UINT64_C(0x03C8C103329ACAE7),
+ UINT64_C(0x005A72502EE9CFA2), UINT64_C(0x03204345A2A46FC3),
+ UINT64_C(0x03666DC71F8A5B63), UINT64_C(0x01671725C07390A9),
+ UINT64_C(0x01E82DA80D6C216A) } },
+ { { UINT64_C(0x02F28395A29D2024), UINT64_C(0x031A09859C9B6A2D),
+ UINT64_C(0x0047073FD20F177A), UINT64_C(0x03D961594C7CA571),
+ UINT64_C(0x019555237A9B2EC3), UINT64_C(0x029EFFFB7289E9D9),
+ UINT64_C(0x008D541E497546F7), UINT64_C(0x0270E93D46DCEE94),
+ UINT64_C(0x00396B23A204BEFD) },
+ { UINT64_C(0x024295052DDD93A9), UINT64_C(0x0081670F33C07709),
+ UINT64_C(0x00B1D851D4CDFDA9), UINT64_C(0x014DF8329142BB29),
+ UINT64_C(0x00CDDB9A15F7FCFB), UINT64_C(0x0225454F3A1F5B86),
+ UINT64_C(0x01A46C8B126C191D), UINT64_C(0x03D3D3229D104DF9),
+ UINT64_C(0x018B36AD8A91DE12) } },
+ { { UINT64_C(0x008FA75A590E92D6), UINT64_C(0x02223AFBB681AD2D),
+ UINT64_C(0x000DD9E71FEC0AB1), UINT64_C(0x03B4A988F4ABFEC5),
+ UINT64_C(0x02BDD3FD9A8FB4C8), UINT64_C(0x037A5B9AD9171110),
+ UINT64_C(0x0225D2934ADB68F2), UINT64_C(0x008BA6F5E067B164),
+ UINT64_C(0x014EA0A8B0C5768B) },
+ { UINT64_C(0x000AB8407662F537), UINT64_C(0x02F781E22DFF31BF),
+ UINT64_C(0x03E22656A1F21F75), UINT64_C(0x01054C62C579B73D),
+ UINT64_C(0x0177A8529E6C1116), UINT64_C(0x03211865DCC5D46F),
+ UINT64_C(0x012706123E7C2723), UINT64_C(0x0396C31AADED99AB),
+ UINT64_C(0x01637E315762AAD0) } },
+ { { UINT64_C(0x03847D336B9839DA), UINT64_C(0x02200E98133D266E),
+ UINT64_C(0x0039A8261B62D7DC), UINT64_C(0x033295F072A9D5EA),
+ UINT64_C(0x000C3FE4DCCB2B2A), UINT64_C(0x03907B7861011A91),
+ UINT64_C(0x023BC0EFEDB5EE58), UINT64_C(0x0288D6CD63BC03CD),
+ UINT64_C(0x01280E54E8A553CA) },
+ { UINT64_C(0x036493BB1C1962CE), UINT64_C(0x0361F9CAD30DAC24),
+ UINT64_C(0x023856E058F7248C), UINT64_C(0x01EBC4CE9BBA1951),
+ UINT64_C(0x00FE14205169D78D), UINT64_C(0x01237D85837C8C98),
+ UINT64_C(0x017C4E2A95E40B90), UINT64_C(0x004E446F2E2C7819),
+ UINT64_C(0x0007FA80EDA9F2C8) } },
+ { { UINT64_C(0x009A65815D2BF9A7), UINT64_C(0x027CB047E8DF8668),
+ UINT64_C(0x0391C32A60456677), UINT64_C(0x01CBC26A69AB3F09),
+ UINT64_C(0x0334D4D8DE25229B), UINT64_C(0x0383C0FA69B0DD79),
+ UINT64_C(0x01D206CDCC54B9E2), UINT64_C(0x02E51DE738338588),
+ UINT64_C(0x006112D5229EA977) },
+ { UINT64_C(0x03CE85BEE20C30CB), UINT64_C(0x02FEBC02D12BC9D5),
+ UINT64_C(0x02AEDC3A968E7052), UINT64_C(0x02090B846E5AD878),
+ UINT64_C(0x00E4B6AEE2DDC2E3), UINT64_C(0x00269BE91139208A),
+ UINT64_C(0x02FEA688006D25C9), UINT64_C(0x002F5EFACF2F785D),
+ UINT64_C(0x009FE82D05CAC96A) } },
+ { { UINT64_C(0x02EE8F69AB2E6D92), UINT64_C(0x0213F64F73B9A354),
+ UINT64_C(0x000A9DDA2E925D3C), UINT64_C(0x0192E31297313B4F),
+ UINT64_C(0x02B3145C4DD947AF), UINT64_C(0x03401B39394615DA),
+ UINT64_C(0x01C98D9DFBE6AE7D), UINT64_C(0x02BB8069EC7A7746),
+ UINT64_C(0x00A8BDC9CF002A7B) },
+ { UINT64_C(0x00A3BF702EB71C5F), UINT64_C(0x00A25EDAE6446CE2),
+ UINT64_C(0x00108D65D5F288B8), UINT64_C(0x02FF972C1494ABED),
+ UINT64_C(0x0398342A5B4A102C), UINT64_C(0x00CD83A6E3855FF3),
+ UINT64_C(0x02D6848441981C93), UINT64_C(0x0335A209E0E8D9AA),
+ UINT64_C(0x01ED6F04D42258A5) } },
+ { { UINT64_C(0x01FC3B47C1490429), UINT64_C(0x01B9A21B27B6F4B1),
+ UINT64_C(0x0193FF421EE32901), UINT64_C(0x03CC9F551147E445),
+ UINT64_C(0x01773B6B14BB7010), UINT64_C(0x005040A2326FD6EA),
+ UINT64_C(0x01949206C0BB7211), UINT64_C(0x02643DEA7E3C37CC),
+ UINT64_C(0x01725F6694BF623F) },
+ { UINT64_C(0x014D9BD8587CA374), UINT64_C(0x020B8D6C1F3C983C),
+ UINT64_C(0x0395B0E3A7CCCE2F), UINT64_C(0x0071FCA214298293),
+ UINT64_C(0x038CF96F2462B942), UINT64_C(0x00DD1C97E2E6BCA4),
+ UINT64_C(0x00DEC4ACF114C9D6), UINT64_C(0x005DCE68C5288587),
+ UINT64_C(0x017B1DC591DEA2A9) } },
+ { { UINT64_C(0x01A03D95A3ACF0F9), UINT64_C(0x0123031B8850C86B),
+ UINT64_C(0x0269AB94408A086E), UINT64_C(0x0181DEF245438334),
+ UINT64_C(0x00AB4F62CC0E7BA5), UINT64_C(0x0294A03CC0C2A98D),
+ UINT64_C(0x02234FBFCCAA23F6), UINT64_C(0x0304B9AF501D1961),
+ UINT64_C(0x0037258E9F9B8667) },
+ { UINT64_C(0x0344657939436D81), UINT64_C(0x010709812083B7CE),
+ UINT64_C(0x00DBCA5B5A81714D), UINT64_C(0x00396E25D33E3896),
+ UINT64_C(0x00C0A65FA9547A23), UINT64_C(0x03F6796EDC3F72D8),
+ UINT64_C(0x022AA55EA0053589), UINT64_C(0x031E838C917FDA1B),
+ UINT64_C(0x014AF707C515D93F) } },
+ { { UINT64_C(0x00E48C0436C8D427), UINT64_C(0x02A85992128BD380),
+ UINT64_C(0x03861C4538E26A42), UINT64_C(0x027A6E7784D042DB),
+ UINT64_C(0x0129555575E66B0A), UINT64_C(0x017362D6E2713125),
+ UINT64_C(0x00A08F82306ED961), UINT64_C(0x007FCDDA0F78CBC0),
+ UINT64_C(0x010F4598B67DA097) },
+ { UINT64_C(0x03448C05AD400463), UINT64_C(0x03CB26D3975CCFCD),
+ UINT64_C(0x0067B9FD99A88F1D), UINT64_C(0x001F257A56DADDC1),
+ UINT64_C(0x03AEAFB6384BA84C), UINT64_C(0x0010C9301FE7F887),
+ UINT64_C(0x03D65C213A46C68C), UINT64_C(0x029BB4A1F8A5E81E),
+ UINT64_C(0x00C1838AFD6E3F39) } },
+ { { UINT64_C(0x03CE07505924C15F), UINT64_C(0x0043A08ED31A1B99),
+ UINT64_C(0x0339C4C25E8B8B88), UINT64_C(0x0380DFF73DEBF4DA),
+ UINT64_C(0x031FBA11E366BE60), UINT64_C(0x001D2B7C0FA8BD42),
+ UINT64_C(0x009DE3ACE8B8A24D), UINT64_C(0x02B5F07FB5B5BD4F),
+ UINT64_C(0x018247CA534C6F7F) },
+ { UINT64_C(0x01E0A02B3DBEEE78), UINT64_C(0x001E200666AB15CD),
+ UINT64_C(0x0186BEA684E8C48E), UINT64_C(0x00F3F1894CDB68E0),
+ UINT64_C(0x032ECC59DF1BBB85), UINT64_C(0x02D06C53B9B53209),
+ UINT64_C(0x004A86739B90C8A8), UINT64_C(0x03AD8A97D98C89BC),
+ UINT64_C(0x00F01344204A1E2F) } },
+ { { UINT64_C(0x03582A68690F8C80), UINT64_C(0x012E151E3D7485DA),
+ UINT64_C(0x02527AD70F6AC0B4), UINT64_C(0x018B935CB107A3CD),
+ UINT64_C(0x036AA37D7A7E3625), UINT64_C(0x034CFB229578C67F),
+ UINT64_C(0x00A3FBC7740B7E16), UINT64_C(0x03D0C73BF6F5756D),
+ UINT64_C(0x009FFA51FEAC33FA) },
+ { UINT64_C(0x0208A8D791982847), UINT64_C(0x03EDDBD997642B6C),
+ UINT64_C(0x025D551977914C26), UINT64_C(0x02DD352759CA1376),
+ UINT64_C(0x00654090371E1000), UINT64_C(0x004AC720BEC03C34),
+ UINT64_C(0x03C06BE7F6C95884), UINT64_C(0x01FA475777DF0765),
+ UINT64_C(0x00A99275E15E46C7) } },
+ { { UINT64_C(0x016A50E0A643409F), UINT64_C(0x0122617180184D38),
+ UINT64_C(0x0105E92945AC97AB), UINT64_C(0x01A1B865FE31BAD8),
+ UINT64_C(0x033E0DC143E2D46B), UINT64_C(0x03DD157DF58A1946),
+ UINT64_C(0x02DF8E8C2EC7FB6D), UINT64_C(0x00E031916AFF1478),
+ UINT64_C(0x017A7BE92C9A8A1C) },
+ { UINT64_C(0x02063F9B1AF2F29D), UINT64_C(0x0275AF845DF62346),
+ UINT64_C(0x010016B05B22BD9F), UINT64_C(0x03772DD9DE8A3F70),
+ UINT64_C(0x011B489BE6C04500), UINT64_C(0x0122DEDE177B839E),
+ UINT64_C(0x008B9ED1DBF81860), UINT64_C(0x00CDA67D0D8CEDC1),
+ UINT64_C(0x01984030C18BF083) } },
+ { { UINT64_C(0x02791762137B93A2), UINT64_C(0x01F9DE3C5491E823),
+ UINT64_C(0x01E50243877F23E4), UINT64_C(0x0144F0B0081F37BC),
+ UINT64_C(0x00D7A781DD6DE5E2), UINT64_C(0x036A5EFE959E26D1),
+ UINT64_C(0x03A51922038AEEA2), UINT64_C(0x0054D452C10BD4F0),
+ UINT64_C(0x01B8A51151884AEF) },
+ { UINT64_C(0x0241D85F77A00331), UINT64_C(0x023528AF19A313C4),
+ UINT64_C(0x0176DFC98292A79E), UINT64_C(0x03AADEBB4F7B06B1),
+ UINT64_C(0x00DAB141E4CE727F), UINT64_C(0x0388E18953348B42),
+ UINT64_C(0x03FD5A751265E468), UINT64_C(0x024673750B3DB1AB),
+ UINT64_C(0x00E57DD5F1A23923) } },
+ { { UINT64_C(0x019D69A891328CE4), UINT64_C(0x008F01053E7A765C),
+ UINT64_C(0x030B5EE16F612292), UINT64_C(0x020A99C1AB590289),
+ UINT64_C(0x01D62D438BE82D64), UINT64_C(0x037D8D3250B87A70),
+ UINT64_C(0x03ACF90A3316DB71), UINT64_C(0x011F2D638816284F),
+ UINT64_C(0x000D63B1CF94E578) },
+ { UINT64_C(0x026288694B620A88), UINT64_C(0x01D7EC9688B643F5),
+ UINT64_C(0x0329AC344C36F494), UINT64_C(0x01F7C91E725E18A1),
+ UINT64_C(0x02FEB98C58EA0341), UINT64_C(0x00A508DDA6BF1EC5),
+ UINT64_C(0x00733B2463BD7A85), UINT64_C(0x0384EBC8AB299B36),
+ UINT64_C(0x00074909BD45312A) } },
+ { { UINT64_C(0x03E08C2C5C95FF29), UINT64_C(0x00C670644C808211),
+ UINT64_C(0x012D8021671FE338), UINT64_C(0x039F033363AA44CD),
+ UINT64_C(0x0337E7DB83662796), UINT64_C(0x03DDF327E2706223),
+ UINT64_C(0x005FBC050700CAFA), UINT64_C(0x020FC3C9D5CBB556),
+ UINT64_C(0x0105E1BC0BF33DC4) },
+ { UINT64_C(0x03F3D06894519732), UINT64_C(0x029248D001BE65FE),
+ UINT64_C(0x011EC77A8F1A11E3), UINT64_C(0x0365A31B2279F38F),
+ UINT64_C(0x014E2577747A12CD), UINT64_C(0x0160E01F73DAA243),
+ UINT64_C(0x01E3B9CC567EDCCC), UINT64_C(0x03E1B7F6A7B42960),
+ UINT64_C(0x01809B863B2F3F5A) } },
+ },
+ {
+ { { UINT64_C(0x0373B24CDED2EB86), UINT64_C(0x02402CCFAA26116F),
+ UINT64_C(0x005073857CDB6102), UINT64_C(0x01AE6F89575C7623),
+ UINT64_C(0x022FF653B3A939A7), UINT64_C(0x0190B7CB0A3545D4),
+ UINT64_C(0x02353B26D8170467), UINT64_C(0x003C64522D17855F),
+ UINT64_C(0x01E5D565F776B34F) },
+ { UINT64_C(0x025185A2C4B5DE1E), UINT64_C(0x02B3AFFAB7E382B2),
+ UINT64_C(0x0194B86479736527), UINT64_C(0x026B4BE5E81594AE),
+ UINT64_C(0x01D6960578E25220), UINT64_C(0x00993E60F26C1FF2),
+ UINT64_C(0x019B938479BA949D), UINT64_C(0x01FCA32034CAD7A3),
+ UINT64_C(0x017759280D580A6A) } },
+ { { UINT64_C(0x02346AE90C2CA70B), UINT64_C(0x013757CC55F070F5),
+ UINT64_C(0x017E107D86CA7681), UINT64_C(0x005AD490EBA565E0),
+ UINT64_C(0x02C9C614514CB60C), UINT64_C(0x03BEAF2AC475AF2B),
+ UINT64_C(0x008C591B4CE3CC44), UINT64_C(0x014A9DDFA491CE57),
+ UINT64_C(0x001268735793A719) },
+ { UINT64_C(0x007F97B31426994D), UINT64_C(0x01A96DF191B418F1),
+ UINT64_C(0x027DF055755518F4), UINT64_C(0x025DAAC2254C5D3C),
+ UINT64_C(0x0262D34E340FC2C3), UINT64_C(0x01F14824C8F72557),
+ UINT64_C(0x02A4819301BACB9F), UINT64_C(0x0268E03E6BEAB510),
+ UINT64_C(0x00EA805018D6E199) } },
+ { { UINT64_C(0x00FEA5E6ABEE1F7B), UINT64_C(0x00538DB9B2D8E2D4),
+ UINT64_C(0x0305BA64218318A0), UINT64_C(0x022BD39A67AA3F20),
+ UINT64_C(0x01157632723B17F9), UINT64_C(0x00C8DAAF646E78C9),
+ UINT64_C(0x0158EFBD367A27CD), UINT64_C(0x011375E95CB4F12F),
+ UINT64_C(0x005E40D5A4D44054) },
+ { UINT64_C(0x0297475C1D71A4FA), UINT64_C(0x03C1DABD876A7908),
+ UINT64_C(0x0038CB20D99CAE76), UINT64_C(0x03D63A3A005959E9),
+ UINT64_C(0x02AF78B93B764B6F), UINT64_C(0x0109A0342CFC2D30),
+ UINT64_C(0x01C301BEC294E434), UINT64_C(0x01972384DAD5FD67),
+ UINT64_C(0x01C3F5C9DF46F8D3) } },
+ { { UINT64_C(0x03C115A0432574BE), UINT64_C(0x01495DBDA1F302E5),
+ UINT64_C(0x010568069CC94673), UINT64_C(0x000A2EEAB0E37751),
+ UINT64_C(0x033EE9D566902CC4), UINT64_C(0x006B34AFED584340),
+ UINT64_C(0x02B50803E9B165A1), UINT64_C(0x03E38D1CBBEC3EC2),
+ UINT64_C(0x0023CF19CC14F82C) },
+ { UINT64_C(0x01CCAAFE462EC0F0), UINT64_C(0x02E714845D028EE6),
+ UINT64_C(0x02DCB47FF5021595), UINT64_C(0x030908AA9B079880),
+ UINT64_C(0x00371B5A69854385), UINT64_C(0x0185FE540E9AE9FF),
+ UINT64_C(0x02EE86F4F1A83CE4), UINT64_C(0x03AB730574E67F57),
+ UINT64_C(0x01F85953DB252C4B) } },
+ { { UINT64_C(0x02EC254BFD8CB3CC), UINT64_C(0x01DFEE8DE5F7858B),
+ UINT64_C(0x019C8AD2711F9096), UINT64_C(0x00B1E57CC4C26707),
+ UINT64_C(0x03511BB53983E402), UINT64_C(0x02A4019CDD626E9F),
+ UINT64_C(0x03BA2E0AC5C44D84), UINT64_C(0x00A965FE7663AD49),
+ UINT64_C(0x01739420DA2DD7E5) },
+ { UINT64_C(0x001E59C7B82FB619), UINT64_C(0x007B29CCEEF8AD83),
+ UINT64_C(0x02907C71BFFAE931), UINT64_C(0x003F110EC15CB5CF),
+ UINT64_C(0x02A76ECA58531793), UINT64_C(0x02D8D0EB5EA2FA03),
+ UINT64_C(0x0302231943B524FC), UINT64_C(0x01EBC24F8F0A0C29),
+ UINT64_C(0x019802CBF5F3CE73) } },
+ { { UINT64_C(0x01852168BF26ECDA), UINT64_C(0x03BA5FFA1597B73C),
+ UINT64_C(0x00E55E47A88BF735), UINT64_C(0x03EF5511C575EFAA),
+ UINT64_C(0x03BEAAED274CB2F4), UINT64_C(0x01A2B7AEE5E82012),
+ UINT64_C(0x00161524928CEDED), UINT64_C(0x0243FB8CEB1DB1ED),
+ UINT64_C(0x00A939AAE7662875) },
+ { UINT64_C(0x035FC996431E0BB4), UINT64_C(0x03871F05A029588C),
+ UINT64_C(0x024685D44F302D5A), UINT64_C(0x03D65DBBB0A24C64),
+ UINT64_C(0x031CCDBD89C13824), UINT64_C(0x03EEC80794841ADF),
+ UINT64_C(0x02BDD19433E827DB), UINT64_C(0x025D0DEF338BCA12),
+ UINT64_C(0x019DD1E057A3957F) } },
+ { { UINT64_C(0x028221686CEBC7BE), UINT64_C(0x00550CAC829C5C56),
+ UINT64_C(0x024473DA711003E5), UINT64_C(0x01D2D356A63016BD),
+ UINT64_C(0x016B5C937B93F5AA), UINT64_C(0x016BA509AE911631),
+ UINT64_C(0x03BB387F2983AA08), UINT64_C(0x0087050F624145D1),
+ UINT64_C(0x00430D39E6B578E6) },
+ { UINT64_C(0x02E690EFE2E3859D), UINT64_C(0x021D189217E0C7B9),
+ UINT64_C(0x03BC89797B1B794C), UINT64_C(0x01D6B16B566AB9D7),
+ UINT64_C(0x02935CEB8993E4D1), UINT64_C(0x03C0BF4C7D6967AE),
+ UINT64_C(0x00EA7B0862929371), UINT64_C(0x014624F22194B5D9),
+ UINT64_C(0x00D68221B3478C47) } },
+ { { UINT64_C(0x03BEC558C2EB8133), UINT64_C(0x031106A5F911659D),
+ UINT64_C(0x00D07C39AEFB3CBE), UINT64_C(0x02F06E730A651F25),
+ UINT64_C(0x0183C527F019A937), UINT64_C(0x0153E778C8608775),
+ UINT64_C(0x0214C61DB43A7203), UINT64_C(0x00CD284ED5892F97),
+ UINT64_C(0x0198EB083CFD5B2B) },
+ { UINT64_C(0x0393B136D6835A15), UINT64_C(0x03ED1013491B6647),
+ UINT64_C(0x00702068040A8E55), UINT64_C(0x0136DD3C55BF5BE4),
+ UINT64_C(0x03D053D6F8B28F3A), UINT64_C(0x00FAF9585D310B40),
+ UINT64_C(0x002690874B88A2A9), UINT64_C(0x02651384F1D8C181),
+ UINT64_C(0x00E5D3BFA7EC53DE) } },
+ { { UINT64_C(0x033F039A91D85118), UINT64_C(0x03A170E9A74E89EC),
+ UINT64_C(0x03EBE8F17E2B4C68), UINT64_C(0x032E08DD52962FFF),
+ UINT64_C(0x01F682C887362E38), UINT64_C(0x02848A835A72A2EE),
+ UINT64_C(0x00AFA36F7A88966F), UINT64_C(0x02D505E8ED473B2D),
+ UINT64_C(0x007B6EF0E4DAA123) },
+ { UINT64_C(0x03F322E8CD472029), UINT64_C(0x009B31F349123C63),
+ UINT64_C(0x024396A463AE29B2), UINT64_C(0x035A559411C8D9B7),
+ UINT64_C(0x0302AAF84FEF53A7), UINT64_C(0x00322717487DC79C),
+ UINT64_C(0x02CA6AE27A92266C), UINT64_C(0x03E6B6580391B525),
+ UINT64_C(0x00647CC677EE4353) } },
+ { { UINT64_C(0x0015F4FB3CE12393), UINT64_C(0x013D9CD65B87D1CA),
+ UINT64_C(0x03ED1458BDACF05A), UINT64_C(0x011BC2A44D7A03F7),
+ UINT64_C(0x00D1E2748EE247CF), UINT64_C(0x025C05134193D6D7),
+ UINT64_C(0x03D8D4701057B20F), UINT64_C(0x03CD86409D914C19),
+ UINT64_C(0x0123EE9725146150) },
+ { UINT64_C(0x03B85772CCE5DBF5), UINT64_C(0x024E60E34E33C627),
+ UINT64_C(0x00CEB58FBCFD7F20), UINT64_C(0x0213A9AF85D15B81),
+ UINT64_C(0x00879FD075FE76EA), UINT64_C(0x01883D1962AC7DA6),
+ UINT64_C(0x0041CDD770D92E82), UINT64_C(0x024CF83E19940701),
+ UINT64_C(0x0001A7D69F562E49) } },
+ { { UINT64_C(0x03F06D3661D1EEDB), UINT64_C(0x01062600B09B6B3E),
+ UINT64_C(0x01A0A640D07EFC7A), UINT64_C(0x0317F67E20F296A1),
+ UINT64_C(0x034843017C701C3C), UINT64_C(0x033891152A103E33),
+ UINT64_C(0x01C00AE12BC93968), UINT64_C(0x0280A3403412AA1F),
+ UINT64_C(0x0111DA6A8E2C4EE1) },
+ { UINT64_C(0x0138BBADC5A4238D), UINT64_C(0x02BB1A5504498DAF),
+ UINT64_C(0x03D55FD7A02F99F7), UINT64_C(0x030B36D2716AAE98),
+ UINT64_C(0x00846799916170BE), UINT64_C(0x021843A1130EBD86),
+ UINT64_C(0x01602A0048ED7277), UINT64_C(0x010F628883F5C170),
+ UINT64_C(0x00A879F20138FE97) } },
+ { { UINT64_C(0x010B697E6BB71E17), UINT64_C(0x00A5FF1EE44F8A1A),
+ UINT64_C(0x02F0A65F0594ADDF), UINT64_C(0x01B97DFF3B989E00),
+ UINT64_C(0x02EBB1D34E1BC0B6), UINT64_C(0x0318AB0F908D45CA),
+ UINT64_C(0x006D84E0ECA51F49), UINT64_C(0x022CBEFDFAF29F0C),
+ UINT64_C(0x019FF3250EDA2D48) },
+ { UINT64_C(0x0247BD9A1791633D), UINT64_C(0x001017CA6D44DB39),
+ UINT64_C(0x001392DBCF3C08AE), UINT64_C(0x00BBFD8C9245DBED),
+ UINT64_C(0x03C6094D363A2A9B), UINT64_C(0x0026C46C1B980722),
+ UINT64_C(0x014C00915831C495), UINT64_C(0x03480A51EA642A61),
+ UINT64_C(0x018A2CD0EE26C545) } },
+ { { UINT64_C(0x00179F4F97812A25), UINT64_C(0x02A5E9E3F33BC581),
+ UINT64_C(0x000BD5248493D239), UINT64_C(0x02B7DE8E94D0B6E5),
+ UINT64_C(0x01D8674B49C2359A), UINT64_C(0x020163E368BE3C3B),
+ UINT64_C(0x0332717F9505C7C1), UINT64_C(0x035A143000B7EC9C),
+ UINT64_C(0x00C999A3E0BCCAF1) },
+ { UINT64_C(0x007B047729EF75E3), UINT64_C(0x02CC12EE110A5B9B),
+ UINT64_C(0x0330E2E6286E55F0), UINT64_C(0x00C6FC4CB1CD5C12),
+ UINT64_C(0x014B93EA65F0CCE4), UINT64_C(0x01E5A20D3788D937),
+ UINT64_C(0x039AB1AC6BF17BFB), UINT64_C(0x0397FE82B1886D3A),
+ UINT64_C(0x000C112A21CE8FCD) } },
+ { { UINT64_C(0x02B7C1C48CF8D334), UINT64_C(0x0078EAF1E0B9AA5A),
+ UINT64_C(0x0397B9A209EF9EF0), UINT64_C(0x001CFFAFD847B222),
+ UINT64_C(0x0321A14F818F0142), UINT64_C(0x0214D3F98F9D0ED8),
+ UINT64_C(0x011305B71C04D0D3), UINT64_C(0x03DE98EACA808006),
+ UINT64_C(0x01360AA21413198A) },
+ { UINT64_C(0x028D3F07FD51E170), UINT64_C(0x023F03474306CBA2),
+ UINT64_C(0x034205D496752F99), UINT64_C(0x02D4BC03F380060F),
+ UINT64_C(0x01E2CE3EBF008299), UINT64_C(0x03EE2B7C9CF44A54),
+ UINT64_C(0x022CB7C6BCE06379), UINT64_C(0x03934E9100F4AD3F),
+ UINT64_C(0x001B8D6D7EA30D7F) } },
+ { { UINT64_C(0x0175E6F14594D02E), UINT64_C(0x0107CFBBB666C104),
+ UINT64_C(0x0043C920F3FC7184), UINT64_C(0x01D3F596321DF679),
+ UINT64_C(0x034FBFA8E62660AC), UINT64_C(0x02F07B7B2F64B7D6),
+ UINT64_C(0x020B7A4B1CB30890), UINT64_C(0x0027370AF3A01ACE),
+ UINT64_C(0x004C3DF94ED57F1B) },
+ { UINT64_C(0x02F7E28D420891BB), UINT64_C(0x00A165AF3355D551),
+ UINT64_C(0x03E2077F4C7840E2), UINT64_C(0x010A42F1F956CFC2),
+ UINT64_C(0x01586FF6FC545309), UINT64_C(0x00E2A2E3F8A44D6A),
+ UINT64_C(0x01BCD7CFAB0CD9EA), UINT64_C(0x02CD7B5AA257EF8B),
+ UINT64_C(0x01E161EB6461E56F) } },
+ { { UINT64_C(0x03AA1E440B1B7656), UINT64_C(0x02DB3F4D449DEBD4),
+ UINT64_C(0x025617A010F1A335), UINT64_C(0x010C03757E20D72C),
+ UINT64_C(0x01EA95F9EFACD59B), UINT64_C(0x0126D8DDDE17B239),
+ UINT64_C(0x02DBF2D291F6AEC7), UINT64_C(0x02F6100FC8834353),
+ UINT64_C(0x00C18C83BB58FB77) },
+ { UINT64_C(0x03754C15A7EEE80E), UINT64_C(0x00247AB9412690FE),
+ UINT64_C(0x016E9C7BD742F5DF), UINT64_C(0x02361FAE95827D75),
+ UINT64_C(0x029E41CC30EA15A1), UINT64_C(0x005F53D5863CB83F),
+ UINT64_C(0x0025C9FC701A2B9B), UINT64_C(0x0389C7702E9DAFBA),
+ UINT64_C(0x00ED3C35310B5895) } },
+ },
+ {
+ { { UINT64_C(0x0373C85A8201C48B), UINT64_C(0x000BE293272BB8C3),
+ UINT64_C(0x0299641D84048EF5), UINT64_C(0x012EE83CEE0A37DD),
+ UINT64_C(0x00D6A81ED893F8A3), UINT64_C(0x01988A5103EE9A5B),
+ UINT64_C(0x01495F90BE6C8319), UINT64_C(0x00954437A6A3C821),
+ UINT64_C(0x010E12D843E6580B) },
+ { UINT64_C(0x007820FBE51DE678), UINT64_C(0x013364C5E0C684D4),
+ UINT64_C(0x009D1721196C2E40), UINT64_C(0x01933769A5FD2063),
+ UINT64_C(0x00BAB8B58BEFA01A), UINT64_C(0x012866F6B7334CBC),
+ UINT64_C(0x025340A51AC6E1FB), UINT64_C(0x03B1135009A4FD38),
+ UINT64_C(0x018AD6567590AFBB) } },
+ { { UINT64_C(0x03F7CC1DCD9C3B89), UINT64_C(0x03F2238DF027BB54),
+ UINT64_C(0x014C7FD4BA95DD01), UINT64_C(0x01DBD8CC489F6AB6),
+ UINT64_C(0x03A6066BFEA7BAB5), UINT64_C(0x0065E8AD52465D5E),
+ UINT64_C(0x03E8F9DA8D525106), UINT64_C(0x001A6869F0B37603),
+ UINT64_C(0x016D47A0587C292E) },
+ { UINT64_C(0x0374FC0618A5170B), UINT64_C(0x0152FB1A3C0C1CC0),
+ UINT64_C(0x01710A373C6A380E), UINT64_C(0x00845789535E37A3),
+ UINT64_C(0x035D0DA356C25D05), UINT64_C(0x00C2670CA5FED688),
+ UINT64_C(0x010367DAE1D930AA), UINT64_C(0x0109B528D8B5E2DD),
+ UINT64_C(0x0160EAA2FD7C6C7E) } },
+ { { UINT64_C(0x02EB058989126FAC), UINT64_C(0x03391866A50E5BF0),
+ UINT64_C(0x0249D99C7ECCC796), UINT64_C(0x031F124A928D03B2),
+ UINT64_C(0x0106FA952E20ED57), UINT64_C(0x001BC6E7D0224A59),
+ UINT64_C(0x00CE05E4690915C9), UINT64_C(0x020A90266CA1AD52),
+ UINT64_C(0x0094293617B76FE5) },
+ { UINT64_C(0x034B04313831CD9D), UINT64_C(0x03B7732D91E90928),
+ UINT64_C(0x014A1E82A9C3D51E), UINT64_C(0x02AEC53126F32DDD),
+ UINT64_C(0x028AC8F7A359BD6C), UINT64_C(0x01B3A0EDE3DB4B4B),
+ UINT64_C(0x028EB875F2FBF434), UINT64_C(0x01AE764FB3A07035),
+ UINT64_C(0x006701271A1304D0) } },
+ { { UINT64_C(0x0015B0C258BC45E5), UINT64_C(0x00500CF779654876),
+ UINT64_C(0x00D61185031EC91A), UINT64_C(0x0237D26B8AB4ABC0),
+ UINT64_C(0x0303DB5DD0B1113F), UINT64_C(0x02C21386988E1A69),
+ UINT64_C(0x002A78FA27F52A38), UINT64_C(0x02373FFEB8A111FB),
+ UINT64_C(0x01ED316A4A837D78) },
+ { UINT64_C(0x02151FA30AE71753), UINT64_C(0x018559984522D236),
+ UINT64_C(0x02AA1CED8D6E9D2C), UINT64_C(0x0336B3277D457875),
+ UINT64_C(0x01FEB5FD684C784F), UINT64_C(0x0312F506AD5C57EB),
+ UINT64_C(0x026506BE8AA4F453), UINT64_C(0x0334630A573CB20E),
+ UINT64_C(0x00AA6EBCFBE68959) } },
+ { { UINT64_C(0x0339D37CD0D9229F), UINT64_C(0x0170E57961291D98),
+ UINT64_C(0x029AE28566E91600), UINT64_C(0x02402C0C57E9B401),
+ UINT64_C(0x01EC520A49429756), UINT64_C(0x02A2CF079E7747FF),
+ UINT64_C(0x03751BAC838751C0), UINT64_C(0x021ED034A3B7C53C),
+ UINT64_C(0x0118500D09678BBC) },
+ { UINT64_C(0x007E207E14E4C072), UINT64_C(0x039277F4D05B1F1F),
+ UINT64_C(0x02A052EAB5B31E63), UINT64_C(0x02B6A467E3451DEA),
+ UINT64_C(0x001613AC11B73C00), UINT64_C(0x00C5A6FA0FE24B0C),
+ UINT64_C(0x034F01404D69886A), UINT64_C(0x00324E28B3CA9FD4),
+ UINT64_C(0x005A3181E5A8A0B8) } },
+ { { UINT64_C(0x02CE6BA9219403A6), UINT64_C(0x030DFB5CBE0CA405),
+ UINT64_C(0x039D700EFB6B4704), UINT64_C(0x0365CAD8F9D06BE7),
+ UINT64_C(0x00FE6873B0456CD8), UINT64_C(0x0090EC1026095A01),
+ UINT64_C(0x016F3A2CC5EC6B62), UINT64_C(0x001AD035AE2286FC),
+ UINT64_C(0x018819632B44D890) },
+ { UINT64_C(0x039574FA6B48EFBA), UINT64_C(0x029D9BE545F8EFA2),
+ UINT64_C(0x00F42C7789B73AA2), UINT64_C(0x03CB90D731504D3E),
+ UINT64_C(0x0202ACD7E2DE6E8A), UINT64_C(0x02C8AD45BF6E2A24),
+ UINT64_C(0x0067A40E7FC99B4D), UINT64_C(0x03E0738CFADACE29),
+ UINT64_C(0x01177C98831102AA) } },
+ { { UINT64_C(0x030A8610AC5E165D), UINT64_C(0x014AA32172C55EC2),
+ UINT64_C(0x027CE551CABE6211), UINT64_C(0x02477F69861DB6E6),
+ UINT64_C(0x01E8FF337E7E36EC), UINT64_C(0x0054ACDF3E1C9EF7),
+ UINT64_C(0x03DED626009E6F01), UINT64_C(0x02E49BFEF7555C32),
+ UINT64_C(0x002E4F1C3DB00152) },
+ { UINT64_C(0x0332D8B606C8A9BC), UINT64_C(0x03AD929E6D810A1A),
+ UINT64_C(0x02C0030394592734), UINT64_C(0x02442FE9824BDA03),
+ UINT64_C(0x03CBAC9513FF99FB), UINT64_C(0x03B3D4E910EDA5AD),
+ UINT64_C(0x005A6F83029FFE7F), UINT64_C(0x02F6FF8D9E1F29A6),
+ UINT64_C(0x0188A1C08A99132D) } },
+ { { UINT64_C(0x001F1A68F391B195), UINT64_C(0x00F016D21D573BA5),
+ UINT64_C(0x00EB4A4B11B13F56), UINT64_C(0x0390443801100BE8),
+ UINT64_C(0x00CDF1786689F09F), UINT64_C(0x008708E6F68D807B),
+ UINT64_C(0x00CFC70B63E2B318), UINT64_C(0x02DA65CABECA51A9),
+ UINT64_C(0x01BB4CC16417876B) },
+ { UINT64_C(0x002270E155C4416F), UINT64_C(0x0275E82A3EE6287C),
+ UINT64_C(0x019550DEBAE641A6), UINT64_C(0x0189E9D792313D48),
+ UINT64_C(0x022E11801B0D93FC), UINT64_C(0x006308C9DD555E4E),
+ UINT64_C(0x02F9EBC6E275E976), UINT64_C(0x00011D5E55FC63C6),
+ UINT64_C(0x01D3E16AA048085F) } },
+ { { UINT64_C(0x01C6845EE45C5FF5), UINT64_C(0x03B6D8ADC4E97112),
+ UINT64_C(0x0068C305E2731ED0), UINT64_C(0x037AFCABEDF2C8B5),
+ UINT64_C(0x016C0203DF9F154E), UINT64_C(0x03FF6DCCA97B1A6C),
+ UINT64_C(0x019D691BB5C8CD06), UINT64_C(0x022C5EA48F6FE25F),
+ UINT64_C(0x00553B7F4065FABA) },
+ { UINT64_C(0x006009B918BF712A), UINT64_C(0x0087FAC6655FF7A7),
+ UINT64_C(0x039DB19E2FDB3477), UINT64_C(0x014389D0D15C2072),
+ UINT64_C(0x02B3AB48E4A3E0DF), UINT64_C(0x00D55CD68B325E8D),
+ UINT64_C(0x020332F2B62898A4), UINT64_C(0x019DB12158F6D4D6),
+ UINT64_C(0x010E1F4D65633E42) } },
+ { { UINT64_C(0x035FDBF97A66FBB8), UINT64_C(0x0397FDA15F48E249),
+ UINT64_C(0x0314912B73A0AD12), UINT64_C(0x018B5A1F5856CC06),
+ UINT64_C(0x026DB1F90C057E46), UINT64_C(0x02BC203FE8141974),
+ UINT64_C(0x032698D0DBE8152C), UINT64_C(0x01BC802ED9745CEA),
+ UINT64_C(0x00B1E80CFCF35D14) },
+ { UINT64_C(0x026A4890175570A1), UINT64_C(0x03DEFA508892558E),
+ UINT64_C(0x00D274862CB6E1EF), UINT64_C(0x02F12D3DF3D2916D),
+ UINT64_C(0x01D9AF2100AA8841), UINT64_C(0x024123BB5E94517B),
+ UINT64_C(0x00CEA1686B604BBF), UINT64_C(0x007E9A1A2F8E072B),
+ UINT64_C(0x012919949C3170DE) } },
+ { { UINT64_C(0x028CFBD7509B3F23), UINT64_C(0x0341392CF0D37CE2),
+ UINT64_C(0x03BB3B849E04FCBA), UINT64_C(0x004BCCA7E7C71C3F),
+ UINT64_C(0x007EAF927839C8E2), UINT64_C(0x0061602F3DAFE11E),
+ UINT64_C(0x01D0F1831E9A3AE7), UINT64_C(0x032630A59BC245BA),
+ UINT64_C(0x00C9122EE0775F54) },
+ { UINT64_C(0x027706840C226E2C), UINT64_C(0x021FC974C3A78386),
+ UINT64_C(0x0254E3803EE94792), UINT64_C(0x02763098FB07712F),
+ UINT64_C(0x03085BE39396F8D2), UINT64_C(0x039CDBB83C0DCAE5),
+ UINT64_C(0x0275170CD909C685), UINT64_C(0x02A48EFA2F7CBD9D),
+ UINT64_C(0x0151800A47F18A8F) } },
+ { { UINT64_C(0x0266B421EDA35EBF), UINT64_C(0x016EE661AEE22D67),
+ UINT64_C(0x02189CC63A33934C), UINT64_C(0x02035BBEEF2E6505),
+ UINT64_C(0x03A21BDAB12827FF), UINT64_C(0x010837E5E86E37F7),
+ UINT64_C(0x000889F4FF18C641), UINT64_C(0x00B83D668CF5F701),
+ UINT64_C(0x00A90A0E4C84A45C) },
+ { UINT64_C(0x014A9DB7546020F0), UINT64_C(0x026B8123F183E007),
+ UINT64_C(0x014172F8A29A74BC), UINT64_C(0x03ECB113DDF05CC6),
+ UINT64_C(0x0056019B554AE591), UINT64_C(0x01C3E5A8AC670B45),
+ UINT64_C(0x0328112932236FCD), UINT64_C(0x0147D09F4CAD8D13),
+ UINT64_C(0x007CA80EB751C2E8) } },
+ { { UINT64_C(0x03260C3CA6A09384), UINT64_C(0x01A2DAEF9F24A534),
+ UINT64_C(0x01FA415780AE38B6), UINT64_C(0x02FE728B02BEADE2),
+ UINT64_C(0x031F71486AA63A4A), UINT64_C(0x021F907074346F6D),
+ UINT64_C(0x00225A4DA564511F), UINT64_C(0x02CC4C97BC497C99),
+ UINT64_C(0x01C2DD5CCD878296) },
+ { UINT64_C(0x03CD4A619B2264B8), UINT64_C(0x03093FC7F1583EA2),
+ UINT64_C(0x02B47AD7D9A2FB6F), UINT64_C(0x00C0D0B440BCA2A9),
+ UINT64_C(0x00B22B3DB051C447), UINT64_C(0x01CEC4D502303875),
+ UINT64_C(0x0340F66A4D33C79A), UINT64_C(0x00C02F44477E4379),
+ UINT64_C(0x01A54038DE4CD448) } },
+ { { UINT64_C(0x036F26FDD184B415), UINT64_C(0x0077144A843CA00F),
+ UINT64_C(0x012DE3D50936A2A0), UINT64_C(0x00F1A915BEF669FD),
+ UINT64_C(0x02A728B908D36285), UINT64_C(0x023009A8F3585930),
+ UINT64_C(0x01AFE37F5F6903E6), UINT64_C(0x015BE42AC69043A0),
+ UINT64_C(0x0029A3961324FE67) },
+ { UINT64_C(0x03744629EA87B468), UINT64_C(0x01B1B421D820F115),
+ UINT64_C(0x009DEF11D39EF564), UINT64_C(0x002A1D3B4419573F),
+ UINT64_C(0x00558617DEFBD955), UINT64_C(0x03E4BE19D9F46F14),
+ UINT64_C(0x012A38F1BF3ED4C3), UINT64_C(0x00B5C5CD4AC51A53),
+ UINT64_C(0x00A0E10EBF360168) } },
+ { { UINT64_C(0x011616DEF784F95B), UINT64_C(0x02677312C6AD8D2D),
+ UINT64_C(0x03F3EF6B22617C90), UINT64_C(0x029E26932332F57D),
+ UINT64_C(0x0285AE820DE6D58A), UINT64_C(0x014C9337216D597B),
+ UINT64_C(0x00A6F170854E55AF), UINT64_C(0x010EA56E5DFB91ED),
+ UINT64_C(0x012F8DBABA868C11) },
+ { UINT64_C(0x015249FC91DCCF70), UINT64_C(0x0306C5CB46C7DD02),
+ UINT64_C(0x021954201045F6CB), UINT64_C(0x00E2B058688BC602),
+ UINT64_C(0x002D5DDCF79B78E3), UINT64_C(0x03AF429058EAD023),
+ UINT64_C(0x016A3FA5F7DB5234), UINT64_C(0x01EAFE34B82E4D26),
+ UINT64_C(0x0095115BD2F5AE74) } },
+ { { UINT64_C(0x01C1741308F9B528), UINT64_C(0x011456D2FA27C256),
+ UINT64_C(0x029EE8BA38AC33BC), UINT64_C(0x0162AD2DF7E46CB7),
+ UINT64_C(0x01239C1DD2198564), UINT64_C(0x00D634D586B52D14),
+ UINT64_C(0x00362033A3D5AE2B), UINT64_C(0x00F403720300250C),
+ UINT64_C(0x0134664850978D32) },
+ { UINT64_C(0x032ECC2C4837554E), UINT64_C(0x008F4BC077701F7F),
+ UINT64_C(0x002D0F7435107071), UINT64_C(0x015A21A6D90E61B2),
+ UINT64_C(0x03E1B78AD2E928DC), UINT64_C(0x02A2214D7306E1AF),
+ UINT64_C(0x01C4FCA92A1694C1), UINT64_C(0x00656FBD23561E1B),
+ UINT64_C(0x013FF3454072CB98) } },
+ },
+ {
+ { { UINT64_C(0x003C182D851368EE), UINT64_C(0x0128CF55F2467CB0),
+ UINT64_C(0x00767E333ACE3BB9), UINT64_C(0x011F65D379FE73C3),
+ UINT64_C(0x038B18FA5C037C7D), UINT64_C(0x01B3CD7DFA5B80B3),
+ UINT64_C(0x0086C596F1A3E912), UINT64_C(0x00A8AD1EBFF700CD),
+ UINT64_C(0x00E12C370BFEEC8C) },
+ { UINT64_C(0x00E5DE2C18A3F84B), UINT64_C(0x02D9CB8AB50B28B7),
+ UINT64_C(0x01D7EDD0731B2C4B), UINT64_C(0x0328A026B1FAD960),
+ UINT64_C(0x02189B0FF8B6CA46), UINT64_C(0x03FD18C777A3B6E8),
+ UINT64_C(0x0004BCBA72EE3E81), UINT64_C(0x0214C7D12A3F1BC4),
+ UINT64_C(0x01CA103DD1B9C887) } },
+ { { UINT64_C(0x00A781D5DE024391), UINT64_C(0x01D4AC6B9AA04C66),
+ UINT64_C(0x0298088919924A4E), UINT64_C(0x02295F237B9E2B5F),
+ UINT64_C(0x0228FA8EA8570017), UINT64_C(0x01AE7F1814C6B59C),
+ UINT64_C(0x008FF64625C08899), UINT64_C(0x002A626C4EECF6A1),
+ UINT64_C(0x0118A9AD8CEFC12E) },
+ { UINT64_C(0x014B05DA9E9AB68C), UINT64_C(0x036EDCE530984903),
+ UINT64_C(0x03147DF5F527C318), UINT64_C(0x0196BC1DED347CDD),
+ UINT64_C(0x01BB4AC96E14A591), UINT64_C(0x03C4F3EDF23B9460),
+ UINT64_C(0x03547D14C90381B8), UINT64_C(0x03693FA10D27208C),
+ UINT64_C(0x003B75AA5EA458F7) } },
+ { { UINT64_C(0x02779CC419496A3E), UINT64_C(0x01D3BB2E4FE62409),
+ UINT64_C(0x032F4C70FCAE21C4), UINT64_C(0x013310DA0ECE14A3),
+ UINT64_C(0x03F3B3593FC9DDBB), UINT64_C(0x0051822EF8CFB99D),
+ UINT64_C(0x012D89EA3AE1C997), UINT64_C(0x00D12E2856922EAE),
+ UINT64_C(0x00E81549D787C4C8) },
+ { UINT64_C(0x02337896D4B88B67), UINT64_C(0x00A59FC2D1584FBE),
+ UINT64_C(0x02FAA1ED2840EB09), UINT64_C(0x02061203F2AA6499),
+ UINT64_C(0x03BF834C1997385E), UINT64_C(0x02274588F3F24162),
+ UINT64_C(0x001CC1FD4A622D5A), UINT64_C(0x0044FEAA4FA76E84),
+ UINT64_C(0x00B3619A1E813DA3) } },
+ { { UINT64_C(0x0276BEE0D076683D), UINT64_C(0x030210C875AFAF69),
+ UINT64_C(0x0011EDC7657E64F0), UINT64_C(0x02488D3166D94F20),
+ UINT64_C(0x011EA313A85E0E01), UINT64_C(0x032E12BF7FFAF1B4),
+ UINT64_C(0x00327C5A8CCEF85B), UINT64_C(0x0252EF23E4C30C4E),
+ UINT64_C(0x01CC6A9EB749B839) },
+ { UINT64_C(0x02B00795BB99594F), UINT64_C(0x01F383BC6F8BE7AA),
+ UINT64_C(0x00760524F18BF5F2), UINT64_C(0x013AA36073E7DDA9),
+ UINT64_C(0x025A0A5A67DE0097), UINT64_C(0x01A61B644AB9486A),
+ UINT64_C(0x0313B98AABF5EA94), UINT64_C(0x003BB89B65E51F0D),
+ UINT64_C(0x01776B040E0F32AB) } },
+ { { UINT64_C(0x01721BA5B2662A6A), UINT64_C(0x0215447AF117F66C),
+ UINT64_C(0x03DB83ECC5D3D99A), UINT64_C(0x0215A6C6CE2794E3),
+ UINT64_C(0x010BE3489ECF31F8), UINT64_C(0x012B3FA3634CDEF2),
+ UINT64_C(0x017C1F03CDFBCD8A), UINT64_C(0x02EE6A91A626677E),
+ UINT64_C(0x003FF1568F6BE74E) },
+ { UINT64_C(0x01995519CD76A58E), UINT64_C(0x02DC3A3040585EF5),
+ UINT64_C(0x0061DDCAE3A68494), UINT64_C(0x025E1A1EF3C2AAA5),
+ UINT64_C(0x00CA54B0D55B6CE8), UINT64_C(0x00543A97F9E4CC22),
+ UINT64_C(0x01F7F09EDEFF8BFA), UINT64_C(0x00168473D37DD44E),
+ UINT64_C(0x00FE410E086ACD40) } },
+ { { UINT64_C(0x006AF7630DA09D54), UINT64_C(0x010ABA844C57F2B5),
+ UINT64_C(0x03C9AC1832567F47), UINT64_C(0x00B3CFD3C603E8BB),
+ UINT64_C(0x01A04969EEACA1C9), UINT64_C(0x02E57B7E17E4591D),
+ UINT64_C(0x03E68AB3619DA17B), UINT64_C(0x00ECCA930F030279),
+ UINT64_C(0x01B2C98B4036BF1D) },
+ { UINT64_C(0x0077C78B045007F6), UINT64_C(0x03CCE2791A0C0815),
+ UINT64_C(0x01688DB89F24D07A), UINT64_C(0x0017DBDDD43EAD41),
+ UINT64_C(0x033A80BF740D6693), UINT64_C(0x02F768ED65974242),
+ UINT64_C(0x026B74A3E2B11EFF), UINT64_C(0x023E110BE2C45B38),
+ UINT64_C(0x00B98CD56F7AB2CD) } },
+ { { UINT64_C(0x0383E5A50FB0D3ED), UINT64_C(0x034513587B8AB555),
+ UINT64_C(0x03B1C6783B97BD45), UINT64_C(0x0062B781B344D4E1),
+ UINT64_C(0x00FD5DFB5083FED9), UINT64_C(0x00CF4B880197BC29),
+ UINT64_C(0x02084C42BE014183), UINT64_C(0x01C81317B056C149),
+ UINT64_C(0x016318E131F69642) },
+ { UINT64_C(0x019B4B41240FA002), UINT64_C(0x0312BAA4E914151E),
+ UINT64_C(0x0180907D9FACF5B0), UINT64_C(0x007774B33895C1D0),
+ UINT64_C(0x017E17EBCCA7FA72), UINT64_C(0x030812EEB0BC890A),
+ UINT64_C(0x02294B1CB2912B73), UINT64_C(0x03835B7F1FA5A17D),
+ UINT64_C(0x001712AC45AB3EC9) } },
+ { { UINT64_C(0x006603D4F696BA83), UINT64_C(0x00D22CAFE710B52F),
+ UINT64_C(0x00A86019255DD155), UINT64_C(0x03D9E86EE758D999),
+ UINT64_C(0x024051D5CE463A6D), UINT64_C(0x02906D0203D86E6E),
+ UINT64_C(0x02B53E1EA3B77733), UINT64_C(0x01298EBA501720C6),
+ UINT64_C(0x00A49AB3D5669F64) },
+ { UINT64_C(0x00C3477F5E8C01EF), UINT64_C(0x02CFF8B3EED1F46C),
+ UINT64_C(0x02588DBF2A1259EE), UINT64_C(0x01BC0AE8F9969F27),
+ UINT64_C(0x0284232123DA5F9F), UINT64_C(0x03E79C894325C436),
+ UINT64_C(0x00FE809311DA7F3B), UINT64_C(0x0102255D12EBA535),
+ UINT64_C(0x01F50E25AE34114E) } },
+ { { UINT64_C(0x0277D803646C1FB6), UINT64_C(0x02488A5E5052BBB1),
+ UINT64_C(0x000391356EAC8F11), UINT64_C(0x01646437C00A834F),
+ UINT64_C(0x02EAB8F940B93B40), UINT64_C(0x024958DF1C74ED20),
+ UINT64_C(0x03F2F1AF37BD1D73), UINT64_C(0x011FE3F5381F17F4),
+ UINT64_C(0x00EF826DAE390184) },
+ { UINT64_C(0x00D2D6B4BA78B572), UINT64_C(0x0073D6C96322203E),
+ UINT64_C(0x018C7B2E976AA1E5), UINT64_C(0x026E3F6920E5F016),
+ UINT64_C(0x01E846537687AFF5), UINT64_C(0x017563948203FD81),
+ UINT64_C(0x019F1D17DABC8810), UINT64_C(0x00F8ED530C4E3A67),
+ UINT64_C(0x0196F10721B62324) } },
+ { { UINT64_C(0x032F87D12878503F), UINT64_C(0x03648B98DC48ECC8),
+ UINT64_C(0x0184FD4C8EF53242), UINT64_C(0x01333846A9EEDB04),
+ UINT64_C(0x02C1DF317872BBBF), UINT64_C(0x002D6E1FAF12E7FB),
+ UINT64_C(0x039480C808CCDA38), UINT64_C(0x02845D8F6413B928),
+ UINT64_C(0x01979462C493957E) },
+ { UINT64_C(0x02E38CCA2947A480), UINT64_C(0x00298B225770DDF9),
+ UINT64_C(0x02859B366A105BC5), UINT64_C(0x00C80C32E8803179),
+ UINT64_C(0x01DEC1627A49675D), UINT64_C(0x018FD7B10ED2384C),
+ UINT64_C(0x00CE729C9A700811), UINT64_C(0x00B9251157C6408C),
+ UINT64_C(0x00D18FB5EDB29090) } },
+ { { UINT64_C(0x0019C27F1002FA40), UINT64_C(0x0187B6686A1976EA),
+ UINT64_C(0x03089E6ABFDCA1BA), UINT64_C(0x01E3A9276DAB6A31),
+ UINT64_C(0x01010381B56E1374), UINT64_C(0x02059C3444CA22AD),
+ UINT64_C(0x0340D48C52418852), UINT64_C(0x001C397FEACAD014),
+ UINT64_C(0x00A9B91476DE1E3B) },
+ { UINT64_C(0x01B18811D2203C97), UINT64_C(0x006802C3244A5143),
+ UINT64_C(0x034CC7484B00B0C2), UINT64_C(0x02D138E88D39FE0E),
+ UINT64_C(0x00035A355C8D48A2), UINT64_C(0x01257073943DE7F1),
+ UINT64_C(0x003B2AA49BD592AC), UINT64_C(0x03D7C1DBA4418663),
+ UINT64_C(0x01A24E3A67DAF410) } },
+ { { UINT64_C(0x02B819FA06A8409F), UINT64_C(0x004A52ACCE9D798F),
+ UINT64_C(0x0342BCE5E942F51F), UINT64_C(0x01499CF92BE85899),
+ UINT64_C(0x03ACD69B9655760D), UINT64_C(0x020F4E9A7813F0D0),
+ UINT64_C(0x03880853D5E05E02), UINT64_C(0x02B0666045F612A7),
+ UINT64_C(0x00302D53FFFEEF1D) },
+ { UINT64_C(0x025294489593BC03), UINT64_C(0x013D42D26192AAEB),
+ UINT64_C(0x010D09630D5F95E5), UINT64_C(0x02152684A6D53F7C),
+ UINT64_C(0x022DD5DAD7C7B4A8), UINT64_C(0x02966500C48498D3),
+ UINT64_C(0x03D763E4EB3C2E33), UINT64_C(0x027FAC6AFEDC5F61),
+ UINT64_C(0x0074EA2C83E52FE7) } },
+ { { UINT64_C(0x01DB9F78868172DA), UINT64_C(0x0100A5C0A0C25D2E),
+ UINT64_C(0x023587D7C3E66CE7), UINT64_C(0x0234D19B042FCCD7),
+ UINT64_C(0x0059721B0F60680E), UINT64_C(0x03A0B2DF23AB3A42),
+ UINT64_C(0x0177AFB700329CAC), UINT64_C(0x03D5A5CFAF392AE7),
+ UINT64_C(0x00CF59BC96ECDBA2) },
+ { UINT64_C(0x03CE38933BF1C993), UINT64_C(0x0388C35CC45F89F5),
+ UINT64_C(0x039286D1ED3DB46C), UINT64_C(0x0061947308D0F830),
+ UINT64_C(0x0307100E3F7C9C8E), UINT64_C(0x00967048E8CC7CC9),
+ UINT64_C(0x03CAD0590370F457), UINT64_C(0x0110D9420ECE3996),
+ UINT64_C(0x009955E94586B830) } },
+ { { UINT64_C(0x03B6822745F0E5DA), UINT64_C(0x03120B5D07E9C6A5),
+ UINT64_C(0x01F88B173B2A0839), UINT64_C(0x0245CA639869EE96),
+ UINT64_C(0x0199F585B26F8120), UINT64_C(0x01D2153C5D41B782),
+ UINT64_C(0x009EAD730F2E3B2D), UINT64_C(0x007E27FEF3F3388E),
+ UINT64_C(0x01DD0BBF32960B2B) },
+ { UINT64_C(0x0298F45E5931C0F0), UINT64_C(0x012A6F48D3898EAD),
+ UINT64_C(0x01EFD537B310CFED), UINT64_C(0x030390CD48666C4B),
+ UINT64_C(0x01DCF41DD16073BB), UINT64_C(0x035CF923EABD525A),
+ UINT64_C(0x00DDF48F41B47311), UINT64_C(0x0316E0000BFFF7E2),
+ UINT64_C(0x003C6A0632821286) } },
+ { { UINT64_C(0x006FA434852228CC), UINT64_C(0x03EE279533E093C6),
+ UINT64_C(0x03C215EE36B974E7), UINT64_C(0x02FA330552481892),
+ UINT64_C(0x01ABFC67F3C2F700), UINT64_C(0x000945F47832719D),
+ UINT64_C(0x01BA378921E29D68), UINT64_C(0x0364936B83B66609),
+ UINT64_C(0x0137B7B2011DE260) },
+ { UINT64_C(0x00A7EBAC8BA1E090), UINT64_C(0x0343E15BB9BADFCE),
+ UINT64_C(0x01C5AFA1059527D8), UINT64_C(0x039CE94C694D78AB),
+ UINT64_C(0x020EE7FF8C758AFB), UINT64_C(0x03859CF409F61041),
+ UINT64_C(0x033F2682BABD9F38), UINT64_C(0x0344ED7AA22D40CE),
+ UINT64_C(0x00C59BE4543774E1) } },
+ { { UINT64_C(0x01B5777A8F1CAC2C), UINT64_C(0x001A1BB0AB5E6822),
+ UINT64_C(0x011BC043646DAF27), UINT64_C(0x03F711C68F6A2900),
+ UINT64_C(0x001C279115DF5830), UINT64_C(0x017D6649CFD4D909),
+ UINT64_C(0x02270B8E48C4FC60), UINT64_C(0x01D402B5FB5683E0),
+ UINT64_C(0x001F8DB87807BBF7) },
+ { UINT64_C(0x00C9DAC0A9244F78), UINT64_C(0x02B03A3698AE7AB0),
+ UINT64_C(0x02CCF3FF50BC045B), UINT64_C(0x03BCD2148E821FFF),
+ UINT64_C(0x035E87616BD7E71C), UINT64_C(0x034B54F4034B6093),
+ UINT64_C(0x02C5BEA4BCD01770), UINT64_C(0x0219F4B5BD513DB4),
+ UINT64_C(0x01DF5AC58C13B575) } },
+ },
+ {
+ { { UINT64_C(0x019885D110E10587), UINT64_C(0x0225E6982614E90C),
+ UINT64_C(0x03FE389B08EF52DA), UINT64_C(0x02986A5F6773FA41),
+ UINT64_C(0x02D7E3FB92A3A338), UINT64_C(0x02804DB8E96B46A6),
+ UINT64_C(0x02ED29A77A3BFC07), UINT64_C(0x021EDA658D1622A9),
+ UINT64_C(0x00DC41F148BEEF47) },
+ { UINT64_C(0x00671195EBF698BD), UINT64_C(0x02DA5978A5D3B8AE),
+ UINT64_C(0x0067084C20702323), UINT64_C(0x01BAE92F07B45047),
+ UINT64_C(0x01EECFF9A6840B39), UINT64_C(0x00B5A0A6F615E949),
+ UINT64_C(0x02CE02C0AFAD4F4D), UINT64_C(0x02CCCE13BD8C56FD),
+ UINT64_C(0x001BC38FE857CCC6) } },
+ { { UINT64_C(0x00081356B6965640), UINT64_C(0x006CE26431E83C07),
+ UINT64_C(0x01BA4874007EE7A0), UINT64_C(0x02537377BE8BDCBF),
+ UINT64_C(0x0248DB2FA66BD85D), UINT64_C(0x028C676B603EF79F),
+ UINT64_C(0x011FB7160B2BE1C4), UINT64_C(0x02E60E65885FEFB9),
+ UINT64_C(0x012B85F1B13BE0ED) },
+ { UINT64_C(0x0353AA14ECFB1D0D), UINT64_C(0x01FF0DDD82885F37),
+ UINT64_C(0x0331E99B56FBDDD7), UINT64_C(0x03AEB28F8419966F),
+ UINT64_C(0x021F907EA8D0F042), UINT64_C(0x013BD7D21430856E),
+ UINT64_C(0x0386870C6BB892CA), UINT64_C(0x03E04B0EFADCEFFA),
+ UINT64_C(0x007C04B740BD4123) } },
+ { { UINT64_C(0x0003B2CD3E0BF039), UINT64_C(0x00C735DA6B8581E9),
+ UINT64_C(0x0012D9341E1131F3), UINT64_C(0x03D2B2BBE7116022),
+ UINT64_C(0x00A056CCF73BDC37), UINT64_C(0x027C9AA3BBBDE400),
+ UINT64_C(0x02165FF6E36E8907), UINT64_C(0x0139C88969C85A96),
+ UINT64_C(0x00C7B0F49EEA4A8D) },
+ { UINT64_C(0x01F03CD678EAF6EB), UINT64_C(0x01BF3F1E8FBD78DF),
+ UINT64_C(0x00857FD3BFA434E9), UINT64_C(0x008641B0E586D15E),
+ UINT64_C(0x021227FC18AF0795), UINT64_C(0x022F892EEA381B7A),
+ UINT64_C(0x00B3FA1F0F06E680), UINT64_C(0x01EAB02BC55C4EE1),
+ UINT64_C(0x01116BB9BA45D30F) } },
+ { { UINT64_C(0x03B557A9EDCBF5E2), UINT64_C(0x00B1DFD3ECC7A54C),
+ UINT64_C(0x02DCE258E5A7E8D4), UINT64_C(0x00CA7703C434FC01),
+ UINT64_C(0x038801282507AB56), UINT64_C(0x025FD9FA5A9E7C74),
+ UINT64_C(0x0084D0CBBC9F71D9), UINT64_C(0x00D621CCEBB93EC1),
+ UINT64_C(0x007E0D7D26AF06B2) },
+ { UINT64_C(0x02584763447D2B4B), UINT64_C(0x00E02402AF814CEB),
+ UINT64_C(0x01A0946A66DEBE3C), UINT64_C(0x025BDCD462246772),
+ UINT64_C(0x032E9062B0C5E215), UINT64_C(0x037BCF49D9FBECDC),
+ UINT64_C(0x001F56138C539278), UINT64_C(0x000AEA3CABF951BB),
+ UINT64_C(0x007AA80F0C621590) } },
+ { { UINT64_C(0x00B8EEBBBD959BD9), UINT64_C(0x001BE3997D083340),
+ UINT64_C(0x01B3F063154C5C54), UINT64_C(0x0258C476F7A9A983),
+ UINT64_C(0x0042A485E75D36E5), UINT64_C(0x034928BB28AF526A),
+ UINT64_C(0x01BA009661FE033D), UINT64_C(0x039E10035E2FEDA5),
+ UINT64_C(0x01AFFCC1198129AF) },
+ { UINT64_C(0x030AD5348384E611), UINT64_C(0x01579499B7C9277C),
+ UINT64_C(0x01969EE33931346F), UINT64_C(0x025C5C1EBDB572DA),
+ UINT64_C(0x033A65D217266A39), UINT64_C(0x026F0D4AD6360EAB),
+ UINT64_C(0x037599346289BDA2), UINT64_C(0x0092404E9E02CE9C),
+ UINT64_C(0x01D0C694EC0434A7) } },
+ { { UINT64_C(0x0099723AA10FBD04), UINT64_C(0x03F7E7474E4B9E21),
+ UINT64_C(0x03ECBDF12C367638), UINT64_C(0x009B6D83C1B5EFBE),
+ UINT64_C(0x03E6CE2FC3522A5D), UINT64_C(0x0083A6DEF388FDCF),
+ UINT64_C(0x0001D8542F4EA36B), UINT64_C(0x035D032BD68C8381),
+ UINT64_C(0x0131DF4BF7A79938) },
+ { UINT64_C(0x008A14C7B9493BE8), UINT64_C(0x0273BD54452391FF),
+ UINT64_C(0x035758B804AAD2E8), UINT64_C(0x0218D8B66AABA8CD),
+ UINT64_C(0x0013BC5120CE58B7), UINT64_C(0x027C6BF5C3CF36BB),
+ UINT64_C(0x0325B4A1E773C0D4), UINT64_C(0x01C2F7A449EA2D3B),
+ UINT64_C(0x01C6E6D30CAF29F6) } },
+ { { UINT64_C(0x0321B0EB2DAA2FB7), UINT64_C(0x001AF441996ABD26),
+ UINT64_C(0x0075B82E9704E625), UINT64_C(0x00FD42C4DDFBEF6D),
+ UINT64_C(0x0199707C61408809), UINT64_C(0x017F62CF54E5FBA8),
+ UINT64_C(0x03E8914D3356B6E7), UINT64_C(0x010B415870E01C17),
+ UINT64_C(0x01B8D0304825F773) },
+ { UINT64_C(0x01AA92433FDAA949), UINT64_C(0x01186BD47A9D105F),
+ UINT64_C(0x03D995A63573F12F), UINT64_C(0x032129C097A55B0D),
+ UINT64_C(0x01817B31A05D6C77), UINT64_C(0x03D1CAF9B4BCAF81),
+ UINT64_C(0x01524CCC3B01B281), UINT64_C(0x0296DAA6FDAA7E18),
+ UINT64_C(0x002F1DC74BE29F0C) } },
+ { { UINT64_C(0x02171F9BDC8D6167), UINT64_C(0x03D306F736B287BD),
+ UINT64_C(0x021943224F5B91BE), UINT64_C(0x02B6BA63BB681A7A),
+ UINT64_C(0x003527F99B16E603), UINT64_C(0x00CC933DC7095468),
+ UINT64_C(0x0265D81677BFCEEF), UINT64_C(0x028AA225CE78ABEA),
+ UINT64_C(0x00837C63F321EE01) },
+ { UINT64_C(0x00A4B775684BF04E), UINT64_C(0x00AB33042AB3CA3F),
+ UINT64_C(0x019796F5B70DA12B), UINT64_C(0x00CD06B6726983AD),
+ UINT64_C(0x002698B98D097375), UINT64_C(0x03BB3A2632FF6007),
+ UINT64_C(0x00B02BB6915F2608), UINT64_C(0x0267E64CB1F79BA2),
+ UINT64_C(0x01DAB183858DB0F4) } },
+ { { UINT64_C(0x01D545A21757C756), UINT64_C(0x001D934F1E31FF52),
+ UINT64_C(0x023B0285CE4B1861), UINT64_C(0x031354B83A06220D),
+ UINT64_C(0x017177FFE06AFE14), UINT64_C(0x019E6D07584A960E),
+ UINT64_C(0x0119B9405A4BEA49), UINT64_C(0x019D70486EC70531),
+ UINT64_C(0x00D7844A95DDF521) },
+ { UINT64_C(0x02045C5C7288CF7B), UINT64_C(0x00677CB68405B1B1),
+ UINT64_C(0x01845055E3EA0793), UINT64_C(0x035EFB9C55059FBD),
+ UINT64_C(0x038843F3AF91E7EA), UINT64_C(0x00822747CA170235),
+ UINT64_C(0x037B132A90F3A94C), UINT64_C(0x00526CF439B472A8),
+ UINT64_C(0x00132F18D93B62FB) } },
+ { { UINT64_C(0x01D84FC9D0CF69E7), UINT64_C(0x006503AA38D2A5EE),
+ UINT64_C(0x03A94DFC118DD98F), UINT64_C(0x03B7F19AE7F392FF),
+ UINT64_C(0x007287A7DC1849A3), UINT64_C(0x00067A7B188F6CE5),
+ UINT64_C(0x02A347BDE0D7D087), UINT64_C(0x0268E88CC6AAFE02),
+ UINT64_C(0x010F44A365B11B99) },
+ { UINT64_C(0x018F73AC92AE7427), UINT64_C(0x0371CC00B812BB06),
+ UINT64_C(0x0093D3088101FF62), UINT64_C(0x00C8613B7544141B),
+ UINT64_C(0x01AF7C6201945AC7), UINT64_C(0x030C7CA555FE097F),
+ UINT64_C(0x025B2E6EDA00AB31), UINT64_C(0x0214A3B6A76443D0),
+ UINT64_C(0x0040A360259C7CDD) } },
+ { { UINT64_C(0x006047E27F3DE4D2), UINT64_C(0x01FC4A47DA6A0A53),
+ UINT64_C(0x015A543BD0BC352A), UINT64_C(0x014AACDA98A2B65E),
+ UINT64_C(0x036FE6BD165C71A3), UINT64_C(0x02DF772BAC823A1F),
+ UINT64_C(0x00416598B2CD1443), UINT64_C(0x032CA3B1D0CAEDD0),
+ UINT64_C(0x0032FB284CCCEF17) },
+ { UINT64_C(0x006DC83E96A2607F), UINT64_C(0x013B7280B80B6341),
+ UINT64_C(0x004551B88CA47813), UINT64_C(0x01849A56EE6AB37F),
+ UINT64_C(0x00C3074BC3D0074A), UINT64_C(0x0049915404661EF6),
+ UINT64_C(0x017F0B8543807006), UINT64_C(0x01235802E0AA61E9),
+ UINT64_C(0x016866C456C5454B) } },
+ { { UINT64_C(0x0397A466381DC2A6), UINT64_C(0x00CD4D54FE413A43),
+ UINT64_C(0x0320035D8FD47311), UINT64_C(0x03FEF7B90109A77E),
+ UINT64_C(0x01FF2C161A6CFCBA), UINT64_C(0x014089BF152955D6),
+ UINT64_C(0x00595A7ADB79909F), UINT64_C(0x02E10BC4FB022F89),
+ UINT64_C(0x012739D14BF39AB2) },
+ { UINT64_C(0x03045804E123BA29), UINT64_C(0x037196AFA31BDBE1),
+ UINT64_C(0x01A3BADADE7D8795), UINT64_C(0x005FE72D3736F1F7),
+ UINT64_C(0x00B261A79C9F5DAE), UINT64_C(0x00CC055F3C4A27EA),
+ UINT64_C(0x018DD7C9E5958FC2), UINT64_C(0x0096748344CCC75E),
+ UINT64_C(0x0065ADD88400A218) } },
+ { { UINT64_C(0x033557744356B52C), UINT64_C(0x03DD368D0EA0209F),
+ UINT64_C(0x02EA630FD3CCDE4D), UINT64_C(0x037A07B902382B40),
+ UINT64_C(0x000B7AF2CF41C092), UINT64_C(0x0221D85556DCC533),
+ UINT64_C(0x03C92114F14EA6E1), UINT64_C(0x006813B827858B16),
+ UINT64_C(0x011933B0203B754D) },
+ { UINT64_C(0x03A2396D5A659158), UINT64_C(0x0350A8E07708486E),
+ UINT64_C(0x0306EEBAE2B49C8B), UINT64_C(0x00EC9E65F76A5B29),
+ UINT64_C(0x03CECDD7F9A47F6A), UINT64_C(0x024DB8B97AA04533),
+ UINT64_C(0x028D089D2C8EBEAE), UINT64_C(0x01959F5D1CB2E7ED),
+ UINT64_C(0x0024A23BD4403D34) } },
+ { { UINT64_C(0x038B31C4EED9CDF5), UINT64_C(0x0185AFF2C98A930A),
+ UINT64_C(0x0245E4B7D7DD3E7E), UINT64_C(0x00232AA32609076B),
+ UINT64_C(0x023F2A9E6F982A24), UINT64_C(0x03087A8E3FF2F39E),
+ UINT64_C(0x02F6CA050121ACCC), UINT64_C(0x03568930B3D90B8C),
+ UINT64_C(0x01C922F3A5335B36) },
+ { UINT64_C(0x032AD6EEE92B1FE6), UINT64_C(0x02FC436D7BD6B2C7),
+ UINT64_C(0x023EDD35035286A3), UINT64_C(0x003D77B6144EB9BC),
+ UINT64_C(0x0304C9A105C2BAEE), UINT64_C(0x01ADB987C7CA786C),
+ UINT64_C(0x0132676ADD1D742E), UINT64_C(0x02A9E9CB749E88B9),
+ UINT64_C(0x00A99A53E3A5AC0A) } },
+ { { UINT64_C(0x03639306E80DE633), UINT64_C(0x01AB767B97949EED),
+ UINT64_C(0x006F4BAA789B6820), UINT64_C(0x039D5F497550BD7A),
+ UINT64_C(0x00B4B2B380BC772D), UINT64_C(0x03022AD28F3A1DD0),
+ UINT64_C(0x0017950F61ACF7EB), UINT64_C(0x019CAC6E06DC1B93),
+ UINT64_C(0x008470E16670F97A) },
+ { UINT64_C(0x03C11D39EE5D0D74), UINT64_C(0x01C090F08CC26FEC),
+ UINT64_C(0x0006AD970C46C574), UINT64_C(0x015907C555DF013E),
+ UINT64_C(0x0070AB35D20A91F0), UINT64_C(0x00C0481F822220A4),
+ UINT64_C(0x03A92E8B413E83FE), UINT64_C(0x00C3982C5F8D922E),
+ UINT64_C(0x017CB1B97D4ED7B4) } },
+ { { UINT64_C(0x0057D40664DA7708), UINT64_C(0x00D1DC31FC3ED514),
+ UINT64_C(0x01C1C72DE7D6ECFF), UINT64_C(0x00DAEABFA1F9C5DE),
+ UINT64_C(0x0027EE8200E32455), UINT64_C(0x00F2A2064D51F4F3),
+ UINT64_C(0x0087C336FD335B37), UINT64_C(0x0350C7F9A0D4FC4D),
+ UINT64_C(0x01D53465439099CD) },
+ { UINT64_C(0x01B27DD4E9031706), UINT64_C(0x0197F1275CBBB42C),
+ UINT64_C(0x015ABB1962BC7CE5), UINT64_C(0x015AEBA4FCC2D21C),
+ UINT64_C(0x01DB34AC91849D8B), UINT64_C(0x02168D50E8D52313),
+ UINT64_C(0x024C7BCFFA60FB49), UINT64_C(0x00653790EC4A5122),
+ UINT64_C(0x0021ECA115250E74) } },
+ },
+ {
+ { { UINT64_C(0x01017ED5F1C86157), UINT64_C(0x01C5FACEAAF3291A),
+ UINT64_C(0x01980E57AC2978AD), UINT64_C(0x012E4C78C1EF8537),
+ UINT64_C(0x019080B37DC2F0DA), UINT64_C(0x0104D379379FF55E),
+ UINT64_C(0x0019CF345BF6F641), UINT64_C(0x01CE7973781C9EB0),
+ UINT64_C(0x00E6B4E5C2E7863E) },
+ { UINT64_C(0x014E085628E15F36), UINT64_C(0x03113ED189D82402),
+ UINT64_C(0x0198521CB21CCF92), UINT64_C(0x03CB794E55F64866),
+ UINT64_C(0x01B6C417EBCEDCD4), UINT64_C(0x001D79C7600B1BE5),
+ UINT64_C(0x02EC6810EA41A2B6), UINT64_C(0x0083606535BEC6E7),
+ UINT64_C(0x01CA8E7CD41F2E03) } },
+ { { UINT64_C(0x01BA87BAF1C9C2EC), UINT64_C(0x00D55499AAADC0DE),
+ UINT64_C(0x019712C990B590E5), UINT64_C(0x00384B1ACA78C747),
+ UINT64_C(0x03563BCAB01E0B5D), UINT64_C(0x0190C274005354FF),
+ UINT64_C(0x00B9D6C425986F2F), UINT64_C(0x038E491D7F2754C6),
+ UINT64_C(0x01B202739C50FF59) },
+ { UINT64_C(0x03F58DFC16F1CACC), UINT64_C(0x00EE939AC23381A2),
+ UINT64_C(0x020399FE184301C9), UINT64_C(0x0351F7998C95E6D7),
+ UINT64_C(0x03713D0FEFC9F67B), UINT64_C(0x02651504977BC9CC),
+ UINT64_C(0x039962831BD8B37B), UINT64_C(0x03398A2CADA7CFCE),
+ UINT64_C(0x00D4F08A7E5A3118) } },
+ { { UINT64_C(0x03C9826425A2D6F0), UINT64_C(0x00ECC054CD119CA9),
+ UINT64_C(0x00C8AF9373A85F21), UINT64_C(0x03167F72CB478C61),
+ UINT64_C(0x01CE9F2616361F7A), UINT64_C(0x03FB08CCEB9E536B),
+ UINT64_C(0x0319FD98C00E9131), UINT64_C(0x0010725A47005067),
+ UINT64_C(0x01D7C9A8F84C990D) },
+ { UINT64_C(0x029CA261BAF35FA1), UINT64_C(0x0220865C1BFEF071),
+ UINT64_C(0x0115DF412660A5A4), UINT64_C(0x02257646F5EF524C),
+ UINT64_C(0x019648D3BF5907D4), UINT64_C(0x03B8287D6BB4E923),
+ UINT64_C(0x00C1831BA518EF96), UINT64_C(0x01147F1EC444000D),
+ UINT64_C(0x001BEB2743E8CF72) } },
+ { { UINT64_C(0x017385BC9719C87C), UINT64_C(0x038E9A8AC23E84A0),
+ UINT64_C(0x03B86FA4168B29E6), UINT64_C(0x0259140D286A2701),
+ UINT64_C(0x0248D5F9426712B4), UINT64_C(0x01E876B4EE205101),
+ UINT64_C(0x016F0D598FB30248), UINT64_C(0x020D4EEE450E3327),
+ UINT64_C(0x0075F0EB2FEC4E8C) },
+ { UINT64_C(0x02999066B392D834), UINT64_C(0x03A4F34FCBCA75D9),
+ UINT64_C(0x029F3E28ABFA2CC4), UINT64_C(0x0207E1A7B58B1513),
+ UINT64_C(0x036C4EE93B0C1C40), UINT64_C(0x038D0C53869B6127),
+ UINT64_C(0x02203321AF3FCDF2), UINT64_C(0x0016E986CD98C912),
+ UINT64_C(0x019AB5DBF8618B76) } },
+ { { UINT64_C(0x02775F5E811FA55B), UINT64_C(0x002FF97CDF8F7EDE),
+ UINT64_C(0x00AA05F646486F8F), UINT64_C(0x0357ABB8FF5CB222),
+ UINT64_C(0x0047A8176117A59D), UINT64_C(0x01ED8538F6CBC1A6),
+ UINT64_C(0x0209FE9034A7F53F), UINT64_C(0x0364120EC4B9D3CF),
+ UINT64_C(0x019B67A37C660EDC) },
+ { UINT64_C(0x0038B0D828C7A5B7), UINT64_C(0x015D9C74EAC7C806),
+ UINT64_C(0x0118152AAA9222B5), UINT64_C(0x01B83339A6AA2783),
+ UINT64_C(0x01993B4601A314EF), UINT64_C(0x0325A7A416B3D288),
+ UINT64_C(0x019D7FD16DD01F3A), UINT64_C(0x021D190386BFFC60),
+ UINT64_C(0x011CF2C0B0E2A983) } },
+ { { UINT64_C(0x00D7DE7D18D8BE36), UINT64_C(0x02F0734BAAC04BF5),
+ UINT64_C(0x0048BB9E44C3F40B), UINT64_C(0x035994B7094672F1),
+ UINT64_C(0x02BD0CFD78BD4138), UINT64_C(0x0015A28B8F06A61A),
+ UINT64_C(0x014D5DF2A7F95274), UINT64_C(0x028141F42EAB92B1),
+ UINT64_C(0x00B25EF25C149754) },
+ { UINT64_C(0x0057378C324BFA00), UINT64_C(0x001F4C62175258AF),
+ UINT64_C(0x03153B4FD5FCA3E4), UINT64_C(0x000682DC5C05BE3E),
+ UINT64_C(0x0330954DA1D1973A), UINT64_C(0x01BC1D711118932D),
+ UINT64_C(0x0168D97A2A9692FD), UINT64_C(0x012BBEB288330777),
+ UINT64_C(0x00E133BE00A38BE4) } },
+ { { UINT64_C(0x03F431A945F8022D), UINT64_C(0x01CDF8AABB4F5212),
+ UINT64_C(0x02CC1D637215E00A), UINT64_C(0x03D36BA40B447ED7),
+ UINT64_C(0x02513AB7E6956FDD), UINT64_C(0x008D5E83EDDB9727),
+ UINT64_C(0x01B75785B4FDC3C7), UINT64_C(0x01EAB35E8B3CAE24),
+ UINT64_C(0x01339E1C87AA8ECC) },
+ { UINT64_C(0x02D325A33450FD39), UINT64_C(0x00322202FEDA09D5),
+ UINT64_C(0x024827340C12DF41), UINT64_C(0x01E66CCCF20D3B06),
+ UINT64_C(0x02001372B74C978F), UINT64_C(0x012C696C6F55CD58),
+ UINT64_C(0x02D10F2EED8A9308), UINT64_C(0x02688747F53110D6),
+ UINT64_C(0x0188C13D0F26D624) } },
+ { { UINT64_C(0x0239E7FBF9FFF942), UINT64_C(0x024391DE07C9C0A8),
+ UINT64_C(0x03BB90544685654F), UINT64_C(0x010453EE881DA06B),
+ UINT64_C(0x02D2A672E21ACDCD), UINT64_C(0x0047CF596F209D90),
+ UINT64_C(0x0321D4C73047EE1B), UINT64_C(0x008011F4FFA1ADC5),
+ UINT64_C(0x0051B7DD6F083F62) },
+ { UINT64_C(0x00B4E0D173BF30CF), UINT64_C(0x0142CF0DBD8DD71C),
+ UINT64_C(0x02FE7953062D3E36), UINT64_C(0x02A5AB5A7D6604A9),
+ UINT64_C(0x03CC08A13AACC423), UINT64_C(0x024662C655FF1A2F),
+ UINT64_C(0x0179D6E29B6B1FCA), UINT64_C(0x03C8D9EF4E5B76E6),
+ UINT64_C(0x00CD341C315CEB11) } },
+ { { UINT64_C(0x00CC4030AC8B2AF6), UINT64_C(0x016D6A39FA7E9D4C),
+ UINT64_C(0x0392D441BAE14C3A), UINT64_C(0x038840FEA9B7D65B),
+ UINT64_C(0x02398CE4933605AF), UINT64_C(0x022CD8745AC294D0),
+ UINT64_C(0x00B8391D34172B85), UINT64_C(0x035C1A0D5C360EA4),
+ UINT64_C(0x00B2CE02EA54ADC4) },
+ { UINT64_C(0x004B32E432779E4D), UINT64_C(0x0396A43E6B80B056),
+ UINT64_C(0x035AEFC64CE26A3C), UINT64_C(0x01E9181F393D3B2C),
+ UINT64_C(0x0224B7B616D6F2A9), UINT64_C(0x0127AF2D0AF23C91),
+ UINT64_C(0x000AD7965D20EADA), UINT64_C(0x0379FD4481124D87),
+ UINT64_C(0x01BB6F3DFED6FF8E) } },
+ { { UINT64_C(0x001E54056209B80C), UINT64_C(0x01535B3A19C72F26),
+ UINT64_C(0x0160AA689BA423E2), UINT64_C(0x0188ECB5D9CC3A27),
+ UINT64_C(0x02349FCF75CC0736), UINT64_C(0x0298585615D70FD1),
+ UINT64_C(0x03A32918B91165DF), UINT64_C(0x022291948224D8DA),
+ UINT64_C(0x0099F8E69358E726) },
+ { UINT64_C(0x01F00247AE9F76E1), UINT64_C(0x0128BAD6165EB802),
+ UINT64_C(0x01B045052E08E61D), UINT64_C(0x032D595886F8C4D8),
+ UINT64_C(0x00186E393A2F7214), UINT64_C(0x016991BB5064F4DD),
+ UINT64_C(0x02AD9C4CF5574CEF), UINT64_C(0x0255AD5071D22CCE),
+ UINT64_C(0x01456916FD8D5687) } },
+ { { UINT64_C(0x0133F0C2BD45283F), UINT64_C(0x01B7E6242FDEFD97),
+ UINT64_C(0x035D6B97C76FCAF7), UINT64_C(0x01DEAC7652ACAD19),
+ UINT64_C(0x03C4E3BEA33C8BB3), UINT64_C(0x0217A37165F99AD5),
+ UINT64_C(0x0269B9B99EC2F11A), UINT64_C(0x028A7868FC6E7D80),
+ UINT64_C(0x01D15668B929808B) },
+ { UINT64_C(0x028D12F5F8D82B0E), UINT64_C(0x03E7880D363FAA5E),
+ UINT64_C(0x00437A04942C06CB), UINT64_C(0x0049CD3A9C99AEE3),
+ UINT64_C(0x015E2D9B6B404613), UINT64_C(0x0162924B16171DEA),
+ UINT64_C(0x00D5B19300B07C85), UINT64_C(0x02FDE0650EE6F8B2),
+ UINT64_C(0x00BB3143583D139C) } },
+ { { UINT64_C(0x009BBB9CD613AC50), UINT64_C(0x0128ACBF00659E30),
+ UINT64_C(0x003847B178A6C039), UINT64_C(0x03CE96D95CB2F3AB),
+ UINT64_C(0x0319F2188F1C72FB), UINT64_C(0x0082FCC27E7E96A0),
+ UINT64_C(0x00E32363BCE8DAB7), UINT64_C(0x0014FD07C4ADAC1E),
+ UINT64_C(0x0130440FC8AE58D8) },
+ { UINT64_C(0x0065ADF64359ED2E), UINT64_C(0x037ED7D5FA4BC647),
+ UINT64_C(0x03FF76F3555C909F), UINT64_C(0x03512196FF57D59B),
+ UINT64_C(0x00299F8EAAC04382), UINT64_C(0x0329BF8D6A784DA0),
+ UINT64_C(0x0175E680B9D87F6E), UINT64_C(0x000779614D617559),
+ UINT64_C(0x0091C31FD7BBAA02) } },
+ { { UINT64_C(0x007961B4B2C087ED), UINT64_C(0x019162C863ECAFF8),
+ UINT64_C(0x02BAA68FEDC62170), UINT64_C(0x00E14BEB5E7390A9),
+ UINT64_C(0x014BD12090B0D96E), UINT64_C(0x01E7BB1B54107513),
+ UINT64_C(0x023B8205C7A4AC9C), UINT64_C(0x0077AA83FD6A3B9F),
+ UINT64_C(0x00B556918DDE426E) },
+ { UINT64_C(0x007982C0406E7D53), UINT64_C(0x00514C5527392914),
+ UINT64_C(0x030F83C68AD1F365), UINT64_C(0x01248844664ABB22),
+ UINT64_C(0x00E9372C39E53CD3), UINT64_C(0x019288EBDD26390E),
+ UINT64_C(0x0175B25020B2C5E2), UINT64_C(0x01BE6F3235A8D35E),
+ UINT64_C(0x01BF2B1514039839) } },
+ { { UINT64_C(0x00ACAC37A302E505), UINT64_C(0x027765CE9E34F2E4),
+ UINT64_C(0x02EC67D63AAF96D8), UINT64_C(0x000F998F38DDD8C4),
+ UINT64_C(0x01F09C36E648CC10), UINT64_C(0x00F522A0C94D1ACD),
+ UINT64_C(0x01621C139782CB28), UINT64_C(0x002ADC14FDA30F4F),
+ UINT64_C(0x000AFE14E60E403A) },
+ { UINT64_C(0x03F6E66F873938D8), UINT64_C(0x008370549C4A240B),
+ UINT64_C(0x019BCDB6FBB27AB2), UINT64_C(0x03968D48A1554399),
+ UINT64_C(0x02AE029F24D2343E), UINT64_C(0x008518D4096DF4BA),
+ UINT64_C(0x011410655CE49E44), UINT64_C(0x030585BCC07AC55D),
+ UINT64_C(0x00DBC52BEF1D2C2E) } },
+ { { UINT64_C(0x031E0D6D77452267), UINT64_C(0x02FDA38F6A949512),
+ UINT64_C(0x01F65ED3128F260F), UINT64_C(0x0268DE30B333E479),
+ UINT64_C(0x03FD84E6AC2E676C), UINT64_C(0x0393B320720BDA53),
+ UINT64_C(0x009EDD5FCCBB47ED), UINT64_C(0x01B82B4900272372),
+ UINT64_C(0x01D21A307BE4561F) },
+ { UINT64_C(0x01FB6C41FDBC2674), UINT64_C(0x02FC0F6001620C6D),
+ UINT64_C(0x009450A0F3C6CB0F), UINT64_C(0x015385B69A47DECA),
+ UINT64_C(0x026E2296F08B9474), UINT64_C(0x0194DEC7BE891DCB),
+ UINT64_C(0x008B5DA06C5F46EF), UINT64_C(0x019F5A58030A2A18),
+ UINT64_C(0x00207771A8172F5B) } },
+ { { UINT64_C(0x02D0EED2AA2FCC67), UINT64_C(0x028799FC7DD58724),
+ UINT64_C(0x01664BF5933707D3), UINT64_C(0x039B5E487A0167D1),
+ UINT64_C(0x02767C865F544F76), UINT64_C(0x012879933B9C8060),
+ UINT64_C(0x03EBB40C5524547A), UINT64_C(0x0173A7851D6D690E),
+ UINT64_C(0x01CF4AB59422F25D) },
+ { UINT64_C(0x02E0C44B926C197B), UINT64_C(0x021DCFA310FAD65B),
+ UINT64_C(0x03309DFCCBCED9CA), UINT64_C(0x02A11F05E3D88EA0),
+ UINT64_C(0x039FE02B0CE3AE95), UINT64_C(0x023B5E3CAC5E3536),
+ UINT64_C(0x02C9903F85BF51A2), UINT64_C(0x018141A1EBBB4D03),
+ UINT64_C(0x01B6F9AE1517FBCC) } },
+ },
+ {
+ { { UINT64_C(0x01CE126EEC3D1383), UINT64_C(0x03E60292016C63B4),
+ UINT64_C(0x01086FC1B1F4E0C7), UINT64_C(0x02B824B832819651),
+ UINT64_C(0x018B5EE5C0AC1703), UINT64_C(0x03467EED60D31DFE),
+ UINT64_C(0x0370BD13E722F576), UINT64_C(0x01C406BA2A512BD9),
+ UINT64_C(0x00D7E1D110502A7C) },
+ { UINT64_C(0x02029FD2CA303000), UINT64_C(0x031CB26B2D4BB358),
+ UINT64_C(0x001AACC8DD8A2366), UINT64_C(0x02FD746E61373E27),
+ UINT64_C(0x01D1A80D5295C235), UINT64_C(0x01FA56B74D0D3443),
+ UINT64_C(0x0203660094D0A8F7), UINT64_C(0x006ACC0E24009F44),
+ UINT64_C(0x007532FAF2732979) } },
+ { { UINT64_C(0x00CC8937C5CFE5E0), UINT64_C(0x036CA3F94D098379),
+ UINT64_C(0x0127E76C1F2F6B01), UINT64_C(0x03F376385910CC44),
+ UINT64_C(0x005AE2B93F0F4F7C), UINT64_C(0x001F51D975E23E7E),
+ UINT64_C(0x0159FF4F64431F80), UINT64_C(0x0215FECEB62BCA1C),
+ UINT64_C(0x00168401E32600A7) },
+ { UINT64_C(0x01B5A301E78A8DB5), UINT64_C(0x00FF512D35D3F2D2),
+ UINT64_C(0x0354D19F77E5A97B), UINT64_C(0x0271EFC5E9AFD789),
+ UINT64_C(0x006980179F908FBC), UINT64_C(0x034A31A6FEF922C2),
+ UINT64_C(0x01832DCC33A8480C), UINT64_C(0x02589E9D28BAFB44),
+ UINT64_C(0x0115572B5F3957D4) } },
+ { { UINT64_C(0x02B1A9337E8401D3), UINT64_C(0x0290DCDD374D1722),
+ UINT64_C(0x03B06DFC52EC6DB8), UINT64_C(0x0230EA32F50E3F05),
+ UINT64_C(0x00FF74654453A452), UINT64_C(0x01A248F21E47C014),
+ UINT64_C(0x01E2CED97C15ABF4), UINT64_C(0x0283D12E9548735C),
+ UINT64_C(0x011DE7FF5CC44367) },
+ { UINT64_C(0x0397C8B2CA828FA8), UINT64_C(0x023C2C16EF221608),
+ UINT64_C(0x0079F7CCDCEE62D1), UINT64_C(0x02ABBC4A12FA2ABB),
+ UINT64_C(0x02D3E0D3AF058906), UINT64_C(0x016EE5FFCAFF1F4D),
+ UINT64_C(0x0383A01497A17543), UINT64_C(0x015456C9C2BA3AA0),
+ UINT64_C(0x00833A7F70B8DB1E) } },
+ { { UINT64_C(0x02874A121147F509), UINT64_C(0x00814720ED638371),
+ UINT64_C(0x03306823E9395088), UINT64_C(0x02A5E552F8389554),
+ UINT64_C(0x00F06CF7F0BA5751), UINT64_C(0x030415DEE1815B81),
+ UINT64_C(0x00E24A9DB057CA02), UINT64_C(0x0130F23B0BDFF500),
+ UINT64_C(0x00CD32356D2FBCF3) },
+ { UINT64_C(0x031835514BB690A0), UINT64_C(0x011475889E6369E4),
+ UINT64_C(0x02A366B8DA44B373), UINT64_C(0x01336BAE9A4C91D2),
+ UINT64_C(0x0321F6D6C8947D98), UINT64_C(0x0331E2910F0F8ECA),
+ UINT64_C(0x01F6B3937B0234FE), UINT64_C(0x016C792D27998656),
+ UINT64_C(0x009729CAFA8B37BB) } },
+ { { UINT64_C(0x030BF08BF55F34E4), UINT64_C(0x01472A877A6E6046),
+ UINT64_C(0x03502971975705FE), UINT64_C(0x00F5A66B1DDF090E),
+ UINT64_C(0x01DD9C80102CADCC), UINT64_C(0x004EB57A202D88C1),
+ UINT64_C(0x0383DED93A003D31), UINT64_C(0x00DF42EE4835E279),
+ UINT64_C(0x010B2A2DF2E8CDFC) },
+ { UINT64_C(0x00E3757112860379), UINT64_C(0x0049E41486F1D305),
+ UINT64_C(0x007F50407D2B699F), UINT64_C(0x0186CFF64543014A),
+ UINT64_C(0x015D637AD6EB6B8D), UINT64_C(0x03EDC1A07906ADD6),
+ UINT64_C(0x025B1CE8EFA6E451), UINT64_C(0x0281938DC6CCB3C0),
+ UINT64_C(0x01E95BF35241E85F) } },
+ { { UINT64_C(0x01900B5C8B1B724E), UINT64_C(0x00091B0E23027016),
+ UINT64_C(0x033EA7B567F8D8DD), UINT64_C(0x0149CA26370EF3C0),
+ UINT64_C(0x0224F7CCEEAEB621), UINT64_C(0x01056822C07633BE),
+ UINT64_C(0x02682C8A34D4C312), UINT64_C(0x017F1D80C56ACAFB),
+ UINT64_C(0x000D28BD510F85EC) },
+ { UINT64_C(0x0031C759D505A0E7), UINT64_C(0x00695B369E0D5C70),
+ UINT64_C(0x007414EC503E140D), UINT64_C(0x02998878F14B0559),
+ UINT64_C(0x03EB48B235BD02B9), UINT64_C(0x02030C241863472E),
+ UINT64_C(0x00302A0DF1BDB378), UINT64_C(0x02ADB25754F52D99),
+ UINT64_C(0x01EBEAF9E9BDE9AC) } },
+ { { UINT64_C(0x0016D2E6C4CB8040), UINT64_C(0x0251BE4AB3BBC8D1),
+ UINT64_C(0x00979A86B1EA6004), UINT64_C(0x03197F4F1967EFAE),
+ UINT64_C(0x03A8E572D3878481), UINT64_C(0x0175BC0B4A3D453E),
+ UINT64_C(0x0067A078B9E4BDD5), UINT64_C(0x00C290F9DB5CD51A),
+ UINT64_C(0x00C8A1050BE75174) },
+ { UINT64_C(0x0138FA01526AE111), UINT64_C(0x01E92EC50AC0E2D9),
+ UINT64_C(0x03430EFE4DD66F27), UINT64_C(0x027E3E362221AF89),
+ UINT64_C(0x0065DC30B6D8ED5E), UINT64_C(0x0194B4AA3299C658),
+ UINT64_C(0x03FCCBD1A1EE5AFC), UINT64_C(0x0011C786A00C112C),
+ UINT64_C(0x01770EC65BD04CBD) } },
+ { { UINT64_C(0x0219978F485193F0), UINT64_C(0x0169EF77837E1846),
+ UINT64_C(0x039A4F73B9DC8ADB), UINT64_C(0x0060DDE7E026EABA),
+ UINT64_C(0x033EDEE638C66335), UINT64_C(0x0296BFF6A6D575A3),
+ UINT64_C(0x01B793FCB261CF96), UINT64_C(0x00066B2DAA6E8B8E),
+ UINT64_C(0x00FAA4EE0DF08936) },
+ { UINT64_C(0x0082665D53161177), UINT64_C(0x00BF125BA82F6D39),
+ UINT64_C(0x022B5DABCDFDBE3B), UINT64_C(0x021CD6983941E0F2),
+ UINT64_C(0x010414D9EC902549), UINT64_C(0x03C8E709DAE4453B),
+ UINT64_C(0x03B39712A9467665), UINT64_C(0x01718D188F0108E5),
+ UINT64_C(0x0001E683E6E53299) } },
+ { { UINT64_C(0x026BEC9ED63E2975), UINT64_C(0x02445B0FA3670F21),
+ UINT64_C(0x01B0436EA7FA88A2), UINT64_C(0x01B3E0317834AC34),
+ UINT64_C(0x0370A51D7EBF7519), UINT64_C(0x028FE5E7A5374634),
+ UINT64_C(0x004F9C7DD9D61B9E), UINT64_C(0x024629F3A018136E),
+ UINT64_C(0x01B14207DD17A593) },
+ { UINT64_C(0x02B49CBF0B981980), UINT64_C(0x03D510AA4EE52E56),
+ UINT64_C(0x0223FC5E38C54336), UINT64_C(0x006CECAD3BD995A0),
+ UINT64_C(0x01C1E9CE9CFF80F2), UINT64_C(0x03F2A4F91A9DFFC4),
+ UINT64_C(0x023C10907D4D0C02), UINT64_C(0x0266DE5575DC75DB),
+ UINT64_C(0x00C42F22C54D0AE1) } },
+ { { UINT64_C(0x02CA7240C82B5AA4), UINT64_C(0x009FC67BD6157E6E),
+ UINT64_C(0x0237AEA0E986F61E), UINT64_C(0x0295536DA6F6D324),
+ UINT64_C(0x03CCCEAED7D090D6), UINT64_C(0x02AEB5185AD3ED8F),
+ UINT64_C(0x01709E10CC89909F), UINT64_C(0x02104E7DD9DB3C2E),
+ UINT64_C(0x018FBE92AA69FDDA) },
+ { UINT64_C(0x019CC5A0410AA767), UINT64_C(0x01BD2A1F9D7CB636),
+ UINT64_C(0x016925EEC5FA539B), UINT64_C(0x030EE211BCC86603),
+ UINT64_C(0x02286DD13B9B314D), UINT64_C(0x019EE14925C53864),
+ UINT64_C(0x03BA30594CCCD2C4), UINT64_C(0x03CF135ECF524017),
+ UINT64_C(0x009675B7F38F7A5F) } },
+ { { UINT64_C(0x034097FDD5C529C4), UINT64_C(0x022BABC53852C90D),
+ UINT64_C(0x005FA5449B2CFEAE), UINT64_C(0x0213E3712D2D891B),
+ UINT64_C(0x01EC7B3EEE99C138), UINT64_C(0x027C357D0B9CBABB),
+ UINT64_C(0x025A19E877887A6F), UINT64_C(0x00D4CD3E5DC97F03),
+ UINT64_C(0x01A0BD7971FE9BC8) },
+ { UINT64_C(0x01302079C035FA1B), UINT64_C(0x03A553C1D7472F9E),
+ UINT64_C(0x01A4254310460FA3), UINT64_C(0x00172E37209ED67F),
+ UINT64_C(0x01598766A435004B), UINT64_C(0x015F6DA2FE9089F7),
+ UINT64_C(0x03D7A8AD6610ED72), UINT64_C(0x00218A47CD395F7C),
+ UINT64_C(0x01CEBC586BD69C42) } },
+ { { UINT64_C(0x005E156C633E8718), UINT64_C(0x036F6921E8311E5A),
+ UINT64_C(0x012516B3E4747664), UINT64_C(0x016B6481265AF56F),
+ UINT64_C(0x005B9CA959873FB0), UINT64_C(0x01215A2E38706CDD),
+ UINT64_C(0x00C64AAAEE1FE5AB), UINT64_C(0x009494AE29DD5833),
+ UINT64_C(0x001DE0FFFA144A84) },
+ { UINT64_C(0x01AB0B04D7864A53), UINT64_C(0x03B6589B739D3720),
+ UINT64_C(0x0342AE6EE03B4D2D), UINT64_C(0x0366C4CD40B083D3),
+ UINT64_C(0x02E583D735216939), UINT64_C(0x028069A08705938A),
+ UINT64_C(0x03470E4558BB0247), UINT64_C(0x037269A3A352E23F),
+ UINT64_C(0x000A1B500F437A69) } },
+ { { UINT64_C(0x017C93D92A097CC4), UINT64_C(0x001BA88CC46C7150),
+ UINT64_C(0x01AE786C3A4D3E20), UINT64_C(0x028BF5869DC58997),
+ UINT64_C(0x02E52726A122777F), UINT64_C(0x00972F198872159B),
+ UINT64_C(0x02552DD5544B0BA5), UINT64_C(0x009FAC089C64945A),
+ UINT64_C(0x00A926F159FE26EE) },
+ { UINT64_C(0x003998CBAECC32F4), UINT64_C(0x01BD7CE18DCAAA28),
+ UINT64_C(0x00A1F5FB988BB383), UINT64_C(0x03AEB19DEFD835C2),
+ UINT64_C(0x00244E47BC8D865E), UINT64_C(0x0038157724E1BB10),
+ UINT64_C(0x007BD8BF38E25231), UINT64_C(0x00C5E24E2CD69DAB),
+ UINT64_C(0x01A779CC34494897) } },
+ { { UINT64_C(0x004BD43B7D176E2E), UINT64_C(0x005E93AB83087469),
+ UINT64_C(0x03E80C170CBB6730), UINT64_C(0x02CA4F7C8BEDBE63),
+ UINT64_C(0x02A85DD542AB5799), UINT64_C(0x0066D2B71D97D372),
+ UINT64_C(0x03558E6854EDDBC6), UINT64_C(0x01014B87714911B3),
+ UINT64_C(0x0150C0A4F996E45F) },
+ { UINT64_C(0x01E0E94EA8A05AA1), UINT64_C(0x02AFE47CFC92BB70),
+ UINT64_C(0x0203EC4D3CE6EAF1), UINT64_C(0x024771DB1D696301),
+ UINT64_C(0x0196D9AA529C496E), UINT64_C(0x03B56E31398127F0),
+ UINT64_C(0x0387E08D7862B4A2), UINT64_C(0x032941073AE64CE3),
+ UINT64_C(0x0000E769C78F3C16) } },
+ { { UINT64_C(0x034AFDE7FF46E9D5), UINT64_C(0x01174874945BB22A),
+ UINT64_C(0x0315AE08354CD33E), UINT64_C(0x020944101FCD5584),
+ UINT64_C(0x02AD3EF0CDDE6E15), UINT64_C(0x030A2698AB480B82),
+ UINT64_C(0x03BF15403C92749F), UINT64_C(0x025EFF1408AEDEF4),
+ UINT64_C(0x00853B2112F03584) },
+ { UINT64_C(0x017A76C60E367447), UINT64_C(0x031C3B84E9CFE4B6),
+ UINT64_C(0x0383807320E00DD1), UINT64_C(0x02152F5E5EE3BE00),
+ UINT64_C(0x035287A9CC92FA2D), UINT64_C(0x0007C4F52ABBB00A),
+ UINT64_C(0x006B2558DC7D9071), UINT64_C(0x0266DBFFAED357E3),
+ UINT64_C(0x007E76EA86C8A78C) } },
+ { { UINT64_C(0x00DA97D33D831A04), UINT64_C(0x0273CA87AB20DA80),
+ UINT64_C(0x004C77C7C118ED92), UINT64_C(0x00F87131473BDF57),
+ UINT64_C(0x036EC3E2E0DE7125), UINT64_C(0x00C7E8EADB491D0D),
+ UINT64_C(0x0299CB19B912B7BF), UINT64_C(0x0399A443D4E010F6),
+ UINT64_C(0x0098FCF8A99C2A16) },
+ { UINT64_C(0x030D9571D49B2FC3), UINT64_C(0x02127D20D334D6E9),
+ UINT64_C(0x00CF98756BB05081), UINT64_C(0x02A955A34EA7C78A),
+ UINT64_C(0x0099BBA4C82FA729), UINT64_C(0x03B80CA8EED74492),
+ UINT64_C(0x03A7668CD742B7C3), UINT64_C(0x039AA1A4CD0B2F61),
+ UINT64_C(0x01769BB74BE7BFCF) } },
+ },
+ {
+ { { UINT64_C(0x01AE6D7AF8ECE594), UINT64_C(0x004BD233382C1067),
+ UINT64_C(0x02FC7E73749707AD), UINT64_C(0x01A0C47D78BA765F),
+ UINT64_C(0x02BB7416407B8B16), UINT64_C(0x02F996A9035A29ED),
+ UINT64_C(0x01C78A5F9EA3DEA9), UINT64_C(0x03997AA8F9A04684),
+ UINT64_C(0x0062155AD4E50AC6) },
+ { UINT64_C(0x0136D4FEFEBBFAD7), UINT64_C(0x03C498A8C3B5B196),
+ UINT64_C(0x03AF4B2081A7DC94), UINT64_C(0x02FE1693A20D804F),
+ UINT64_C(0x0019DBDAD1684FFD), UINT64_C(0x03E47903EABFC90E),
+ UINT64_C(0x00EA7078F3484441), UINT64_C(0x037A0851741BD87B),
+ UINT64_C(0x004DEB7A4980ECBA) } },
+ { { UINT64_C(0x02A998A0008164D4), UINT64_C(0x014B73504FD3FC3A),
+ UINT64_C(0x00C19E4FF76A915D), UINT64_C(0x00D30C3B2FD0EC60),
+ UINT64_C(0x01518FD432879FDC), UINT64_C(0x018585905FB0DE73),
+ UINT64_C(0x002E0E88A51BB32E), UINT64_C(0x011E824BA1621756),
+ UINT64_C(0x008F5503550AE008) },
+ { UINT64_C(0x01F4C5CC039B003C), UINT64_C(0x034FE4F1205365F7),
+ UINT64_C(0x029B502075F020C8), UINT64_C(0x02E622483E3884F2),
+ UINT64_C(0x0096DBF1B7347D87), UINT64_C(0x03E49F71A5BBC472),
+ UINT64_C(0x028F694B092BA1CC), UINT64_C(0x03911DA84B731F41),
+ UINT64_C(0x00AEE98DB68D16A6) } },
+ { { UINT64_C(0x03335FA8EB78796F), UINT64_C(0x02878D6632487FA2),
+ UINT64_C(0x023DC13EBB873632), UINT64_C(0x0328E4AB268A2A07),
+ UINT64_C(0x017A111FE36EA0A1), UINT64_C(0x02DD260BC4AB23DF),
+ UINT64_C(0x02BD012E8019E481), UINT64_C(0x02DAEA5C2102ACDC),
+ UINT64_C(0x0191F08F46778030) },
+ { UINT64_C(0x01DAFF85FF6CA70B), UINT64_C(0x00C20C713262D23C),
+ UINT64_C(0x0002F4B44F09083A), UINT64_C(0x014BFF17F10ECF45),
+ UINT64_C(0x025ADB2237EA42A8), UINT64_C(0x03E47544193ED683),
+ UINT64_C(0x016D405A3F97D5CE), UINT64_C(0x03412AAA28009BC3),
+ UINT64_C(0x0061A9DB41BEFEDC) } },
+ { { UINT64_C(0x02DE586F26762E69), UINT64_C(0x016435D71514BA52),
+ UINT64_C(0x016D7A3D17B63A4D), UINT64_C(0x026D50DCE42619B6),
+ UINT64_C(0x0071889F59482029), UINT64_C(0x011CE57167125C3C),
+ UINT64_C(0x00A0EA2BE409EA4A), UINT64_C(0x009EDE87052C5E58),
+ UINT64_C(0x01024A33C8A03073) },
+ { UINT64_C(0x0190FE7C2B54A6C6), UINT64_C(0x006AD6F23DFB4339),
+ UINT64_C(0x01A290051C927B4A), UINT64_C(0x001E3AB0900247C6),
+ UINT64_C(0x02F0CF556BD9F5D6), UINT64_C(0x0044A9D7E6F09A3D),
+ UINT64_C(0x03647C4823C77404), UINT64_C(0x0174246A05A125F4),
+ UINT64_C(0x005046F70E49B3B4) } },
+ { { UINT64_C(0x0168F14947F5FEA0), UINT64_C(0x00769E99AB9E6CB3),
+ UINT64_C(0x0132518C89E21038), UINT64_C(0x01B680C1A8696720),
+ UINT64_C(0x002ED6053CD44327), UINT64_C(0x01D30DD43B7E58A9),
+ UINT64_C(0x00944E2E081D9491), UINT64_C(0x006831ACBEAD123C),
+ UINT64_C(0x0152C11DC5777195) },
+ { UINT64_C(0x00241773802E1A49), UINT64_C(0x01BAF7037807F846),
+ UINT64_C(0x03D3C7A48FA494BE), UINT64_C(0x011E5017010FAAB7),
+ UINT64_C(0x02754857375E5F4A), UINT64_C(0x03779B43EFE7F8E1),
+ UINT64_C(0x0012FF3BABC982CB), UINT64_C(0x00FFF200A782A57D),
+ UINT64_C(0x01525BFCB1CE27F1) } },
+ { { UINT64_C(0x03E552EA093A81E5), UINT64_C(0x0289B3D7E8ED9281),
+ UINT64_C(0x0342009AC81D0D79), UINT64_C(0x03AD34454A991783),
+ UINT64_C(0x01E2910F69599605), UINT64_C(0x03D879F03BB2582D),
+ UINT64_C(0x027BC06449C49ACB), UINT64_C(0x008DC219F862EDC8),
+ UINT64_C(0x01C5BFA6129C1E94) },
+ { UINT64_C(0x026A51D1748353E7), UINT64_C(0x0181475224C056F6),
+ UINT64_C(0x00C626EAA883505E), UINT64_C(0x0279EE327830A7B4),
+ UINT64_C(0x0320D8F515A684E8), UINT64_C(0x00C3F8E23CD44D3F),
+ UINT64_C(0x02C122EE12C67CA1), UINT64_C(0x00E99C91530D5183),
+ UINT64_C(0x0021144C6B142C61) } },
+ { { UINT64_C(0x011D351AD93C77DA), UINT64_C(0x03AA1509EA474780),
+ UINT64_C(0x018659BD1EF489E2), UINT64_C(0x003305C7CD548712),
+ UINT64_C(0x0274078260A570D7), UINT64_C(0x0053143C92277CEB),
+ UINT64_C(0x002C9848EA865C9F), UINT64_C(0x02CCE08E86A1AEA9),
+ UINT64_C(0x017387D78B16B104) },
+ { UINT64_C(0x004AA27AD541016D), UINT64_C(0x018249526E484E54),
+ UINT64_C(0x02AB312423D0089E), UINT64_C(0x0219D7F11A43C693),
+ UINT64_C(0x02063682A176BD49), UINT64_C(0x03B53A444F4AA295),
+ UINT64_C(0x00795B99C8C7C949), UINT64_C(0x03E13055864354E1),
+ UINT64_C(0x00AD0290F60CD7D0) } },
+ { { UINT64_C(0x012D2A436D526DD9), UINT64_C(0x01CD402DD6D978C6),
+ UINT64_C(0x00A58E861B88A485), UINT64_C(0x02D5660B63C2B513),
+ UINT64_C(0x00AC661A50344950), UINT64_C(0x005912EC7C3046DF),
+ UINT64_C(0x00386C50A42C0A1A), UINT64_C(0x03AB81C1B172201D),
+ UINT64_C(0x00C7E276190DAFE0) },
+ { UINT64_C(0x02C2EF02CE4F4EFB), UINT64_C(0x036C62A28EE8E529),
+ UINT64_C(0x007713DEA66609AC), UINT64_C(0x0335AC64B1B06D35),
+ UINT64_C(0x030C33E87E4697D9), UINT64_C(0x02A8B6DA5FD2C060),
+ UINT64_C(0x00A7681837DA7123), UINT64_C(0x034383051138278A),
+ UINT64_C(0x0100BA5CB675B5C3) } },
+ { { UINT64_C(0x007A90498A37CD61), UINT64_C(0x00C21A3950646D6E),
+ UINT64_C(0x00E24CC900B23BA5), UINT64_C(0x00177482F428680B),
+ UINT64_C(0x008C265BAA81CF89), UINT64_C(0x035D3B4D224FFF8E),
+ UINT64_C(0x036D6B85A5B0977B), UINT64_C(0x00D1075A6C1311DD),
+ UINT64_C(0x01CE20C3E0DE4C26) },
+ { UINT64_C(0x03983305308A7408), UINT64_C(0x034CC1C79BB9BDAE),
+ UINT64_C(0x02079940C900D507), UINT64_C(0x011184B7705AB688),
+ UINT64_C(0x00BE018DECC7C858), UINT64_C(0x00059833EA10EFD5),
+ UINT64_C(0x03D3C58726A0CFF9), UINT64_C(0x03FAC56BC268E09A),
+ UINT64_C(0x00AF6C171D653277) } },
+ { { UINT64_C(0x01151276D19DDB66), UINT64_C(0x00BE849EE9A2D3A8),
+ UINT64_C(0x02C6A7580CC1CD5D), UINT64_C(0x03AE7FCF32E2402D),
+ UINT64_C(0x0077F3388646E57B), UINT64_C(0x0321275FFC38AED4),
+ UINT64_C(0x035220194FAC16E6), UINT64_C(0x00AC60DD1664CBF4),
+ UINT64_C(0x005C9F4FAEB1E475) },
+ { UINT64_C(0x03454E2FDA228C02), UINT64_C(0x03CE54CE918B9E80),
+ UINT64_C(0x01E6700CB1251E2C), UINT64_C(0x004D9EF2E269258E),
+ UINT64_C(0x0271A9DFD10397F8), UINT64_C(0x01D68E1301C08065),
+ UINT64_C(0x0255D3F4888FC07C), UINT64_C(0x01EA14C32D6DB6C1),
+ UINT64_C(0x00641A5E7FF0CED4) } },
+ { { UINT64_C(0x03D2DB7494E80EB1), UINT64_C(0x03429AAC7DF50EDF),
+ UINT64_C(0x0193B4233D776372), UINT64_C(0x00FA6676BCB0445B),
+ UINT64_C(0x00962AF93FA06ADE), UINT64_C(0x00ED262149C44EC5),
+ UINT64_C(0x00DD0F0802C2CD3B), UINT64_C(0x0349A7F09C0CD9BA),
+ UINT64_C(0x019BCEE240624924) },
+ { UINT64_C(0x0301B8CB30F92986), UINT64_C(0x02FBD5618F84FCAA),
+ UINT64_C(0x020844CC6DEA56EF), UINT64_C(0x0399AC423AE9922A),
+ UINT64_C(0x0304B577679CF04F), UINT64_C(0x033A00D5B3E1E90B),
+ UINT64_C(0x02E0EA5DF7501CB6), UINT64_C(0x01AEEBA7909CF3AB),
+ UINT64_C(0x00D1F739C1192316) } },
+ { { UINT64_C(0x03FBED19829AE558), UINT64_C(0x018A508538E70057),
+ UINT64_C(0x00CB16FE844A9E7C), UINT64_C(0x02A5D97534D7DBBC),
+ UINT64_C(0x005769E43FDAB701), UINT64_C(0x02371B260F0C6E67),
+ UINT64_C(0x0088CED91D562ACB), UINT64_C(0x03FF0E5F0D26F719),
+ UINT64_C(0x009911094F5E4AA4) },
+ { UINT64_C(0x014DA634DAAD22D1), UINT64_C(0x0126CD74DB263614),
+ UINT64_C(0x00B20F1368A80FE1), UINT64_C(0x01C40150F01BDEEF),
+ UINT64_C(0x036B7B115D665EA4), UINT64_C(0x00E64D810EAB1790),
+ UINT64_C(0x037432C58B6DDE4A), UINT64_C(0x02689716E469337C),
+ UINT64_C(0x009023B703EED1A4) } },
+ { { UINT64_C(0x0168DF986EB8B398), UINT64_C(0x0373053537795BF1),
+ UINT64_C(0x018911988685F26D), UINT64_C(0x0387383FA6C93770),
+ UINT64_C(0x019704736EAD528F), UINT64_C(0x0271A2FD2A7AB31F),
+ UINT64_C(0x016F759D385DF60B), UINT64_C(0x00588A673CE9E385),
+ UINT64_C(0x00F00D2C74D140B1) },
+ { UINT64_C(0x037761186D05FF6A), UINT64_C(0x021D5810D7AE7578),
+ UINT64_C(0x032F7D951B6FE596), UINT64_C(0x00F101711823BB39),
+ UINT64_C(0x028DE92770998580), UINT64_C(0x037C0C99F0D97BF8),
+ UINT64_C(0x030EB60AA7504E10), UINT64_C(0x038624C9A9EBB17E),
+ UINT64_C(0x0117D8E0506A5993) } },
+ { { UINT64_C(0x02D315A154D9F1F8), UINT64_C(0x00A34DBD30332164),
+ UINT64_C(0x0306F497C34DB615), UINT64_C(0x03599315A4DB339F),
+ UINT64_C(0x007E9E0F8E2399AC), UINT64_C(0x003A93148F4FA95A),
+ UINT64_C(0x011F62B5F0DC45EF), UINT64_C(0x02C2CA027E1C8CCA),
+ UINT64_C(0x017EDB2AB60DCF2F) },
+ { UINT64_C(0x03D0BE47BDAF0C41), UINT64_C(0x0261770EA9BAF337),
+ UINT64_C(0x00123C9A8D5C885C), UINT64_C(0x02304942CA223A54),
+ UINT64_C(0x027514FEE2CC680A), UINT64_C(0x02845D9CADE7E084),
+ UINT64_C(0x037BF3E603649E24), UINT64_C(0x00221D7FD1EC9BB3),
+ UINT64_C(0x019ABE2E017E3282) } },
+ { { UINT64_C(0x022C310986DBC74A), UINT64_C(0x016910C9D8D292FA),
+ UINT64_C(0x0168FBA7C0C784B2), UINT64_C(0x02F0C2E785D2A006),
+ UINT64_C(0x01AE45ADAA754923), UINT64_C(0x0340D3039A77094C),
+ UINT64_C(0x028C800560A74DE4), UINT64_C(0x0209DAB7CF99A92A),
+ UINT64_C(0x01A7AE95C3D65A81) },
+ { UINT64_C(0x03D0EF28C4FA3D53), UINT64_C(0x01C7BD38B1347859),
+ UINT64_C(0x0005A7461F21783E), UINT64_C(0x01367207E2FE3122),
+ UINT64_C(0x033746BBB79E2E44), UINT64_C(0x0279FE17A5803572),
+ UINT64_C(0x03015592FFEC7617), UINT64_C(0x02742174C25F4D16),
+ UINT64_C(0x00E410A0B89682D7) } },
+ { { UINT64_C(0x02B22FBEE727DDB2), UINT64_C(0x024FD40DFE0DC5F9),
+ UINT64_C(0x015C3DCCFE2E8278), UINT64_C(0x029992449755EB6E),
+ UINT64_C(0x03FD36B4574277E1), UINT64_C(0x02D49C964F2299EE),
+ UINT64_C(0x021CD67B9805D246), UINT64_C(0x0157D17DBA6DBB8F),
+ UINT64_C(0x014315532B63B009) },
+ { UINT64_C(0x0192F41C11B068CF), UINT64_C(0x013ADE386B9A6252),
+ UINT64_C(0x0023510A4F9C5B28), UINT64_C(0x027BD3DC9B9B0039),
+ UINT64_C(0x02377F19B4B907D4), UINT64_C(0x0292B925A6106638),
+ UINT64_C(0x01058CF22E01616A), UINT64_C(0x017799C00E576B04),
+ UINT64_C(0x00A289A954F56291) } },
+ },
+ {
+ { { UINT64_C(0x00C4AC143FFE4858), UINT64_C(0x0306D22EAAC4A5AD),
+ UINT64_C(0x01F0A5791E3783D9), UINT64_C(0x03A0A974CB2ACA2D),
+ UINT64_C(0x02E76FB3F03AA34D), UINT64_C(0x0217400AE3A40C22),
+ UINT64_C(0x0040CD3B74A7ED3C), UINT64_C(0x00FCB122891AAD96),
+ UINT64_C(0x01B8C8494718771D) },
+ { UINT64_C(0x03F57D14A28DA023), UINT64_C(0x022E364741E3E46C),
+ UINT64_C(0x01A7ABA67F27FDBC), UINT64_C(0x030FF1837DC3E97D),
+ UINT64_C(0x00618486CF4908AD), UINT64_C(0x02CF161553F374F8),
+ UINT64_C(0x019DD012E725571E), UINT64_C(0x033EDF6BF47BD717),
+ UINT64_C(0x0125806554EE19B9) } },
+ { { UINT64_C(0x018E9A7BA994A7B1), UINT64_C(0x02AC0D7BEC6A8983),
+ UINT64_C(0x03D38D705E07CD01), UINT64_C(0x005566DD3C426505),
+ UINT64_C(0x0067EB2AB8C5C6E4), UINT64_C(0x02833D0E2656CD6B),
+ UINT64_C(0x01DDCA9C78AA1909), UINT64_C(0x00EDF1FB3DAA7F12),
+ UINT64_C(0x0166F72F3DE51C63) },
+ { UINT64_C(0x02B78FAEB96F6D73), UINT64_C(0x02052F35A5545293),
+ UINT64_C(0x005CD62AD9BF553E), UINT64_C(0x00B728FA50CC968E),
+ UINT64_C(0x019295FA16301250), UINT64_C(0x0287D8B59A13D480),
+ UINT64_C(0x0316813DDF4A21F3), UINT64_C(0x01769E5723184C7C),
+ UINT64_C(0x0066E0E7009AE7B5) } },
+ { { UINT64_C(0x021F2EE46CDE12CD), UINT64_C(0x003D0000412CCD1F),
+ UINT64_C(0x02C67E761CB63537), UINT64_C(0x02C1A38D4F403A59),
+ UINT64_C(0x03B812F8D1F26B87), UINT64_C(0x029994AD5ACE97AC),
+ UINT64_C(0x026C55C785488093), UINT64_C(0x01869CEF172A91D6),
+ UINT64_C(0x01661593B4702F1D) },
+ { UINT64_C(0x0197935A2366B021), UINT64_C(0x01C8C53ECC9EEE7B),
+ UINT64_C(0x02C636CFB825AB8B), UINT64_C(0x02EEC0E46E96B427),
+ UINT64_C(0x00525F145382F270), UINT64_C(0x0133F597DCA61576),
+ UINT64_C(0x0237ACF913367D38), UINT64_C(0x02C6B96EB5398F41),
+ UINT64_C(0x0088A6A556F6EF14) } },
+ { { UINT64_C(0x03AE1C8DCCD34315), UINT64_C(0x0157B6DF5CCF4DF6),
+ UINT64_C(0x02191AB191DCA071), UINT64_C(0x01897CF46F10173C),
+ UINT64_C(0x02767320BD61533A), UINT64_C(0x01A9DAB7019D6315),
+ UINT64_C(0x01911BB32715F1BB), UINT64_C(0x001C7F74F8A656CA),
+ UINT64_C(0x0009C70F08ACB68E) },
+ { UINT64_C(0x0072A1ED9356A25A), UINT64_C(0x01556970A7D5EEF6),
+ UINT64_C(0x0350BEDB0F71D649), UINT64_C(0x03EA3565DDFF826F),
+ UINT64_C(0x013B29E08B1AF8F4), UINT64_C(0x0331B92ACB74C5CA),
+ UINT64_C(0x03A4E6E26F5AAC1D), UINT64_C(0x036F06A79D110118),
+ UINT64_C(0x00631FDFA318D2BC) } },
+ { { UINT64_C(0x035871450EAD4FF9), UINT64_C(0x0045783A9CFF37E4),
+ UINT64_C(0x03713AE92AC33512), UINT64_C(0x009A3896CE34EF6D),
+ UINT64_C(0x03A8EE82555DC9D1), UINT64_C(0x002C620829E4335D),
+ UINT64_C(0x0375E016D1AE1B50), UINT64_C(0x016D891B140E00CD),
+ UINT64_C(0x00097FE78FE880E9) },
+ { UINT64_C(0x01A323FFCB8B195A), UINT64_C(0x014E7DA6CA0AAFF4),
+ UINT64_C(0x00C88E8E6528DDB5), UINT64_C(0x01A720372EE878E6),
+ UINT64_C(0x015A2426F3EF9BB8), UINT64_C(0x01604A559CF4A620),
+ UINT64_C(0x02C8F10B967488E1), UINT64_C(0x028191262B209448),
+ UINT64_C(0x019E5661C083C48E) } },
+ { { UINT64_C(0x01D1ED07D6920A2A), UINT64_C(0x03909AA105A814DB),
+ UINT64_C(0x029B1BBB7F2ECAC2), UINT64_C(0x03BB4096CC1FBE27),
+ UINT64_C(0x0382CAD68C150CCC), UINT64_C(0x00F1CBB480EE5E69),
+ UINT64_C(0x03933B382F4CE45C), UINT64_C(0x0283D1969E6EC1D6),
+ UINT64_C(0x008C6BE4F8FBF5F9) },
+ { UINT64_C(0x00C2A30AF1CA3CCC), UINT64_C(0x02FF4D4359C3CABE),
+ UINT64_C(0x020AA78B337657B0), UINT64_C(0x01C5C613D10C423A),
+ UINT64_C(0x003249BB2418CB6D), UINT64_C(0x00CAB4378A53687C),
+ UINT64_C(0x0147E31B6118850C), UINT64_C(0x02D08DC29C2D596C),
+ UINT64_C(0x00409A1F9C9C0372) } },
+ { { UINT64_C(0x03985FC5DEB5DCD3), UINT64_C(0x02328F30C46302C2),
+ UINT64_C(0x00260388D4747802), UINT64_C(0x03BFBB0240E60F52),
+ UINT64_C(0x03B209042D288213), UINT64_C(0x00F7BBEE239C04F6),
+ UINT64_C(0x039A7EE4CF9007B4), UINT64_C(0x01BFEC97A07FF7ED),
+ UINT64_C(0x00F46BA7F4461BE4) },
+ { UINT64_C(0x02FF04BE53B68E6C), UINT64_C(0x01CA69133AC1C9A1),
+ UINT64_C(0x001C0711D4BE94AE), UINT64_C(0x02E7507B45945E53),
+ UINT64_C(0x011B7A5F7EC81DBE), UINT64_C(0x0329BFC6DA7CDB63),
+ UINT64_C(0x01FCD3B287A0A497), UINT64_C(0x01F250F924D3B826),
+ UINT64_C(0x0174EABAF5F90BA0) } },
+ { { UINT64_C(0x0288B8614B07B1BF), UINT64_C(0x00AE0C951E1C4290),
+ UINT64_C(0x01FC49AB7CD0CA2F), UINT64_C(0x0139ED7FA367ECE7),
+ UINT64_C(0x007ACFF8F0933B14), UINT64_C(0x01BE527A6CE02D5F),
+ UINT64_C(0x03F3D3A06B11DFFE), UINT64_C(0x021959D14B1DF4BB),
+ UINT64_C(0x01BC6741AD8DA8F8) },
+ { UINT64_C(0x034CD028C42166D8), UINT64_C(0x0185807E32738495),
+ UINT64_C(0x005883F1CCD9FD2E), UINT64_C(0x03CA0BFCEE08ED5A),
+ UINT64_C(0x03EAF8CDFF12C8BC), UINT64_C(0x039F9E6871AF8AEE),
+ UINT64_C(0x0109893E423B3304), UINT64_C(0x0120DC6E783F51AB),
+ UINT64_C(0x011A855D5413AED9) } },
+ { { UINT64_C(0x03EC078648AA3834), UINT64_C(0x022666BDFBC08928),
+ UINT64_C(0x020CD318C559ED79), UINT64_C(0x031A1F3F1113AB91),
+ UINT64_C(0x0225DA57498B9B85), UINT64_C(0x00501D2B9387A084),
+ UINT64_C(0x01462ED6150B49FB), UINT64_C(0x0270A359C4EB430D),
+ UINT64_C(0x01AD03ACD7F1F2DA) },
+ { UINT64_C(0x00577220553E08C6), UINT64_C(0x02711DCC2A6176C2),
+ UINT64_C(0x00D41E0F942DF9B3), UINT64_C(0x032019849BF44B40),
+ UINT64_C(0x006F6F65E6AF51C1), UINT64_C(0x02192F8FD6395745),
+ UINT64_C(0x0369C64E6D49408A), UINT64_C(0x01C1CA82AADBB384),
+ UINT64_C(0x00252180D9240A33) } },
+ { { UINT64_C(0x03B36603F69B34EA), UINT64_C(0x023601EA98DB7FF6),
+ UINT64_C(0x0119384D5B4D0084), UINT64_C(0x009CB1557E1A2117),
+ UINT64_C(0x0120F29FC187E5AB), UINT64_C(0x020795FEFEF91AF3),
+ UINT64_C(0x01654BD2C20FF213), UINT64_C(0x0193B09B2AFFB3A3),
+ UINT64_C(0x01F2DBD41C09A92B) },
+ { UINT64_C(0x0190B8EB79047156), UINT64_C(0x002863629F98DF90),
+ UINT64_C(0x0131D825BFCD5C94), UINT64_C(0x012459BCEEE81461),
+ UINT64_C(0x012AEB328B250B06), UINT64_C(0x031E1C2DAC09694B),
+ UINT64_C(0x000530A4AD5276F9), UINT64_C(0x02B3D1F18BB7C853),
+ UINT64_C(0x01E8BD2FCCA04F6F) } },
+ { { UINT64_C(0x02834F110665B1CF), UINT64_C(0x017AA90109CDC18A),
+ UINT64_C(0x009242A3E1F2E720), UINT64_C(0x02D5A60BD5F8954E),
+ UINT64_C(0x03508324EB838D5B), UINT64_C(0x02EDD0C3ED33B190),
+ UINT64_C(0x00AAD5DC3A119996), UINT64_C(0x01CD04A457847144),
+ UINT64_C(0x008F9F585EE51416) },
+ { UINT64_C(0x0353544CA94CC511), UINT64_C(0x03C458B74ECFBB85),
+ UINT64_C(0x00DFB34B9CF940F6), UINT64_C(0x025DDCAA8FA2C670),
+ UINT64_C(0x005DE224A75FEDB1), UINT64_C(0x0133692E8F60712D),
+ UINT64_C(0x0273753106CAA7BE), UINT64_C(0x01408D58EA2D6196),
+ UINT64_C(0x00E26553508F8448) } },
+ { { UINT64_C(0x01A3A4F60BB13D25), UINT64_C(0x0023ED9ED8B71298),
+ UINT64_C(0x03FFC9A520FCC5AA), UINT64_C(0x0045A041830B9268),
+ UINT64_C(0x00CC9DB2983FF213), UINT64_C(0x0121E74580D3BD97),
+ UINT64_C(0x03180DFFF5302191), UINT64_C(0x017F708B61C069C2),
+ UINT64_C(0x00AFC5190BADFB44) },
+ { UINT64_C(0x0059EAFDA4B66F01), UINT64_C(0x007705DA965D6F67),
+ UINT64_C(0x020B87871134FA29), UINT64_C(0x01AD088735B31B4F),
+ UINT64_C(0x018012C061713383), UINT64_C(0x0284C3C51E97DE38),
+ UINT64_C(0x011439AE9AC5E3B5), UINT64_C(0x0201A73CE2ADC421),
+ UINT64_C(0x013663825C862321) } },
+ { { UINT64_C(0x018D68C0B140A004), UINT64_C(0x01BFAA6599011216),
+ UINT64_C(0x01E7950576D7B0B1), UINT64_C(0x0078B24B131D0E5F),
+ UINT64_C(0x02AD5C3FFEDF02C1), UINT64_C(0x0322CFD3147C6177),
+ UINT64_C(0x038BD27915C61C9C), UINT64_C(0x02F37687B9498DE9),
+ UINT64_C(0x00EBB6AC6E166ECF) },
+ { UINT64_C(0x01DE078E81F8F797), UINT64_C(0x036F3FD0C148612A),
+ UINT64_C(0x00D42800CEE62CC8), UINT64_C(0x02EF08C94C9988E1),
+ UINT64_C(0x02A200E24C7221CE), UINT64_C(0x0087BB91FBA9446C),
+ UINT64_C(0x01AEF9F64351AA5D), UINT64_C(0x0379F61D1F515F5C),
+ UINT64_C(0x01D6BBEA838FBDE0) } },
+ { { UINT64_C(0x029C5257AC98DFAE), UINT64_C(0x033122DA34CA0C86),
+ UINT64_C(0x02E5AEB04EB596D8), UINT64_C(0x01866E31FF449E97),
+ UINT64_C(0x01EFC618512D868E), UINT64_C(0x02AB8DD8A2E422DD),
+ UINT64_C(0x0315FBBF0AB5F678), UINT64_C(0x029B64EE769245C7),
+ UINT64_C(0x006C6C12185D61E3) },
+ { UINT64_C(0x008781A5F0C92FB5), UINT64_C(0x02186CDBC76A7DC2),
+ UINT64_C(0x02BF30F2AE35EBF2), UINT64_C(0x02A9033768598F59),
+ UINT64_C(0x026D8F763CE2DDB2), UINT64_C(0x000096A41DC06247),
+ UINT64_C(0x0378DBDD308791A2), UINT64_C(0x0303B0E7D471E5F3),
+ UINT64_C(0x0047B4CFEAEEA101) } },
+ { { UINT64_C(0x03329136A629DD22), UINT64_C(0x00E5BE3AD1E98750),
+ UINT64_C(0x00E718574118A518), UINT64_C(0x0001BFD334A31B85),
+ UINT64_C(0x010ACC7BD56131AD), UINT64_C(0x01BAE8680FF31AF2),
+ UINT64_C(0x033BF365D3656538), UINT64_C(0x01275681F6A3E780),
+ UINT64_C(0x01D9134C0EBA1F9E) },
+ { UINT64_C(0x03FC0784F75200EB), UINT64_C(0x02505880E37CB45D),
+ UINT64_C(0x02D012B6F4AEDF75), UINT64_C(0x0239FE68EEDA06B2),
+ UINT64_C(0x0214FD97D35A83E1), UINT64_C(0x0161FD60913389DA),
+ UINT64_C(0x02E06AA08A955A74), UINT64_C(0x00A478BB3A540872),
+ UINT64_C(0x0194213360ACA782) } },
+ { { UINT64_C(0x01C7D837402145D7), UINT64_C(0x029A3987EA8CF574),
+ UINT64_C(0x017B7322E3920EED), UINT64_C(0x01DA90CCE8A07229),
+ UINT64_C(0x019966632762CF1A), UINT64_C(0x02EA82E975BFDBB2),
+ UINT64_C(0x00D089776CD7C2DA), UINT64_C(0x01094FFA3D38BAB2),
+ UINT64_C(0x00ED9425E7C61A8F) },
+ { UINT64_C(0x030890ADFDDB406F), UINT64_C(0x02F38194427778C1),
+ UINT64_C(0x02645A577E29DB0B), UINT64_C(0x02B73BB5A04F839F),
+ UINT64_C(0x02CBE569872B94D6), UINT64_C(0x034D3051E8314100),
+ UINT64_C(0x0228FAA39358328C), UINT64_C(0x00F6B458D19C41F5),
+ UINT64_C(0x01B60D6BFFF120A1) } },
+ },
+ {
+ { { UINT64_C(0x03B0D91DCEF34144), UINT64_C(0x0240FE90ACAA2EEA),
+ UINT64_C(0x02F5638E4C5FABC5), UINT64_C(0x0279B56C13AF89E7),
+ UINT64_C(0x007BB923CEB3416E), UINT64_C(0x024528E9111E0646),
+ UINT64_C(0x0019F3658FEFA212), UINT64_C(0x007942C115ACBB8B),
+ UINT64_C(0x00B3176361BBE92C) },
+ { UINT64_C(0x0056A1AF824FDE34), UINT64_C(0x03EFECC262943F2F),
+ UINT64_C(0x00F55AB9CFA7333B), UINT64_C(0x02E423937E89B9C8),
+ UINT64_C(0x0177865B2FF1E104), UINT64_C(0x00D9D0346E5AE2AF),
+ UINT64_C(0x0250F4369EB257AA), UINT64_C(0x02479F5CEE51B49A),
+ UINT64_C(0x007A588E4A1470CD) } },
+ { { UINT64_C(0x006FD0B27FF5FDD9), UINT64_C(0x0315207EADCA6EB7),
+ UINT64_C(0x038531FDE9E82663), UINT64_C(0x03E9C7DA1307DC24),
+ UINT64_C(0x007FCF66FC293D27), UINT64_C(0x0073411170172CF4),
+ UINT64_C(0x03FA0B1709D86BA1), UINT64_C(0x0023FC735B565525),
+ UINT64_C(0x00C65EABD8A0D474) },
+ { UINT64_C(0x001EA477B6B64713), UINT64_C(0x03CAD4127E803700),
+ UINT64_C(0x02F97EFCE2EC6148), UINT64_C(0x021B881732700041),
+ UINT64_C(0x01A6D874ACACA115), UINT64_C(0x00A7CA705835C220),
+ UINT64_C(0x01191B137DD5C14D), UINT64_C(0x02CB4161AB1B2384),
+ UINT64_C(0x01EA96470F229677) } },
+ { { UINT64_C(0x016F41AA44BE78BD), UINT64_C(0x00DBC87805312BB8),
+ UINT64_C(0x0318156EA17D7B54), UINT64_C(0x026CDF0148DE5C45),
+ UINT64_C(0x03F974EA0D77EB08), UINT64_C(0x02136BB03794FF4E),
+ UINT64_C(0x01B53A227C4C2E9C), UINT64_C(0x02B0229F1C11498E),
+ UINT64_C(0x01CDAB34CEF9122C) },
+ { UINT64_C(0x01942B2B520FED74), UINT64_C(0x0278BB0606178C91),
+ UINT64_C(0x03C70799A5848E33), UINT64_C(0x01024AF0188FBCA7),
+ UINT64_C(0x017502FD5E81CD21), UINT64_C(0x0341AC8FD5BE6E9F),
+ UINT64_C(0x03807308C0C55507), UINT64_C(0x02DA9120D7D39BD9),
+ UINT64_C(0x0078E0C0ADC9F3B8) } },
+ { { UINT64_C(0x0249E4056736B7A8), UINT64_C(0x000AD5FD0E326A32),
+ UINT64_C(0x00F1D8DD5BD49BAE), UINT64_C(0x03C65D240FD61C7B),
+ UINT64_C(0x0348AA1A2246B05E), UINT64_C(0x03D6D10E55244A30),
+ UINT64_C(0x02E9906E8F8D085E), UINT64_C(0x0187FD8BEFA8BFBF),
+ UINT64_C(0x00F8ECD06F55C492) },
+ { UINT64_C(0x003A56FE1DEF19D6), UINT64_C(0x0197C74F933E6798),
+ UINT64_C(0x005694559A51C48D), UINT64_C(0x028423114901AE4B),
+ UINT64_C(0x006C134B2FD133CC), UINT64_C(0x01F5B1FDE595A9F1),
+ UINT64_C(0x037CDF87E407C290), UINT64_C(0x01C9430D19026B6E),
+ UINT64_C(0x00AE4EBC0B91EEC4) } },
+ { { UINT64_C(0x0027F5A2CFACC519), UINT64_C(0x0007D8CA3F95188A),
+ UINT64_C(0x02386E76D1ED1FA2), UINT64_C(0x012CFC615ECB44AE),
+ UINT64_C(0x02BAC8E16C4EECC0), UINT64_C(0x030FC8B6EACB48A4),
+ UINT64_C(0x0356F1C94FF8F3DD), UINT64_C(0x00E7898C9228D80E),
+ UINT64_C(0x0100391DE5D28C45) },
+ { UINT64_C(0x00DDA167BAEA3E6E), UINT64_C(0x024E9B6238591A96),
+ UINT64_C(0x000B124B20D76C9C), UINT64_C(0x00844E80DAD85B15),
+ UINT64_C(0x006322B9CC9CFBC9), UINT64_C(0x03C3F3E68B0EC1FB),
+ UINT64_C(0x0198C8988C8CDF43), UINT64_C(0x012F63F58B2E6769),
+ UINT64_C(0x0146D6A4BBF8FA16) } },
+ { { UINT64_C(0x025929A379C36058), UINT64_C(0x03AA8D69D0F228FC),
+ UINT64_C(0x03137C58503106D0), UINT64_C(0x031D3407BEC09250),
+ UINT64_C(0x012A5E9F3CB78FCD), UINT64_C(0x03C89A97F7DE8B2F),
+ UINT64_C(0x03FFA336D8C2CB9D), UINT64_C(0x03CDFCCBE0B2ABB7),
+ UINT64_C(0x018DB520A44381C3) },
+ { UINT64_C(0x037F91B7E71EFA02), UINT64_C(0x02CD2A4F8F2A0051),
+ UINT64_C(0x03247FBAA82739BD), UINT64_C(0x004F7652DC5CA6F6),
+ UINT64_C(0x0247D54BFA1094B5), UINT64_C(0x01201F41A5F24EA8),
+ UINT64_C(0x036AE048899075C8), UINT64_C(0x008DE5B2C2092D5F),
+ UINT64_C(0x01A05D1DEF90E6C9) } },
+ { { UINT64_C(0x009C63F00DDEF055), UINT64_C(0x029E867514AE17BD),
+ UINT64_C(0x0071477B7FA6548A), UINT64_C(0x01DCF23B30CCB894),
+ UINT64_C(0x039F3EAF10214846), UINT64_C(0x0131314742EE42E6),
+ UINT64_C(0x025A42537B162041), UINT64_C(0x0344D321CAEDE286),
+ UINT64_C(0x00C49346566A2F80) },
+ { UINT64_C(0x00AC1057A1A2F1BD), UINT64_C(0x01B16F3F4CF6D85A),
+ UINT64_C(0x00470A35FA26D12C), UINT64_C(0x02FDF7EC571664A6),
+ UINT64_C(0x00357DE22954AF5D), UINT64_C(0x01CB9B6C3295D89E),
+ UINT64_C(0x02A6D5E003D32198), UINT64_C(0x02BCFEFCD08395C8),
+ UINT64_C(0x0024E3256C9EC29E) } },
+ { { UINT64_C(0x02E3E3726899A80A), UINT64_C(0x0026F9277D12E5D8),
+ UINT64_C(0x03A9F147B7CC784D), UINT64_C(0x02D1E1BE2785B816),
+ UINT64_C(0x035FD35148DBC7EB), UINT64_C(0x008735EF566F4D0B),
+ UINT64_C(0x023A56774FF10ABF), UINT64_C(0x02650BA6B7B26925),
+ UINT64_C(0x016ADF49024BBCF1) },
+ { UINT64_C(0x003AD342E4E67976), UINT64_C(0x03C92192D00DAB16),
+ UINT64_C(0x020460FDED50A384), UINT64_C(0x034C8C7A7CCCB477),
+ UINT64_C(0x026F1F63625979C2), UINT64_C(0x01C81B4E10D5FC66),
+ UINT64_C(0x036A3D003DC0490C), UINT64_C(0x012B902A026C1347),
+ UINT64_C(0x01F7B86A36390DAD) } },
+ { { UINT64_C(0x000691E2EC112CB8), UINT64_C(0x024EF99D143B7D60),
+ UINT64_C(0x0115A42EEFCFA47F), UINT64_C(0x01E802D725D2BBE5),
+ UINT64_C(0x0121B37EFA442937), UINT64_C(0x0017BB506D32E10E),
+ UINT64_C(0x026AAA87600CCD57), UINT64_C(0x016CF4C8E0A70FF4),
+ UINT64_C(0x009FFBF163AE94B4) },
+ { UINT64_C(0x0295886926814D18), UINT64_C(0x03A0FBF4C1A9E1DB),
+ UINT64_C(0x03C42214E510B980), UINT64_C(0x01795048E2D2FBCB),
+ UINT64_C(0x007E6ECA8AF45230), UINT64_C(0x03B7348F6C6F8B62),
+ UINT64_C(0x0082EEE297D2810F), UINT64_C(0x001262A01DEC143A),
+ UINT64_C(0x01B9903A2D05B891) } },
+ { { UINT64_C(0x023634A86BE77EA4), UINT64_C(0x00A0B41ED63F1BFE),
+ UINT64_C(0x0275C4824374C264), UINT64_C(0x02608A7A328E460A),
+ UINT64_C(0x00FED89AAE8DD2B7), UINT64_C(0x02109029EF3CE021),
+ UINT64_C(0x011969F67E04BEBE), UINT64_C(0x01A57DE74BB6D7CF),
+ UINT64_C(0x0032260FF5FAEF2A) },
+ { UINT64_C(0x02058C1764B8EB93), UINT64_C(0x034A7BEAEE142796),
+ UINT64_C(0x01C4178E14455ABA), UINT64_C(0x0089C0C3FD3F4E75),
+ UINT64_C(0x006C6AD7C0E981DA), UINT64_C(0x0228FCA3E86007B0),
+ UINT64_C(0x025CE2ECCA48B8F4), UINT64_C(0x01E5A636E10EA6E7),
+ UINT64_C(0x00B998D460C196E1) } },
+ { { UINT64_C(0x0160926185730C8D), UINT64_C(0x032DE7C19EF3EB5F),
+ UINT64_C(0x01B89DB78DA4AF19), UINT64_C(0x03E8BF1A8A7D683F),
+ UINT64_C(0x00C74484F132486E), UINT64_C(0x0020C78A33777ADF),
+ UINT64_C(0x028B418FCCA39E1E), UINT64_C(0x03C6B30F7BDFA864),
+ UINT64_C(0x012E1D3651FF3815) },
+ { UINT64_C(0x023FC40DA01A8D36), UINT64_C(0x0396DC8A8E0AC356),
+ UINT64_C(0x0257ECBA277518BE), UINT64_C(0x015E0BE8CDCF0B5F),
+ UINT64_C(0x017CA95C0BC967EE), UINT64_C(0x0305AA19591EC746),
+ UINT64_C(0x00ECEE9B1C5E531F), UINT64_C(0x017F62DDF7CD8C93),
+ UINT64_C(0x01843F3A5D58D681) } },
+ { { UINT64_C(0x008235BF1CE87EAC), UINT64_C(0x0337B13BA7D5C15E),
+ UINT64_C(0x03846B02056DE241), UINT64_C(0x033C6CAEB5DEAB90),
+ UINT64_C(0x030248638020D787), UINT64_C(0x0224F8D01B9221DD),
+ UINT64_C(0x01F402C62FF58E8A), UINT64_C(0x03AAD9850E5506F5),
+ UINT64_C(0x003902A9875C05FB) },
+ { UINT64_C(0x0020DA18AA01F6F0), UINT64_C(0x030A6715F4E78D18),
+ UINT64_C(0x037807033B777232), UINT64_C(0x01B7606FD787D415),
+ UINT64_C(0x008A9CC327698B87), UINT64_C(0x0061BCA066C82FF1),
+ UINT64_C(0x01BFA28EB25A2709), UINT64_C(0x024D6272DC7593CB),
+ UINT64_C(0x00EC0BB76A281871) } },
+ { { UINT64_C(0x032999435C8AA41D), UINT64_C(0x01A489157A228E17),
+ UINT64_C(0x0156F793B6B0E956), UINT64_C(0x028D96D92EBD33D6),
+ UINT64_C(0x0359740492EFE167), UINT64_C(0x015A71262E572E91),
+ UINT64_C(0x01FA4485B8FC6399), UINT64_C(0x0347A0956647A542),
+ UINT64_C(0x010E38E5A425F12F) },
+ { UINT64_C(0x00AEFDFC244C41BB), UINT64_C(0x003952945BE8B3B5),
+ UINT64_C(0x0319FE9C6BCFD1F0), UINT64_C(0x03F504A658EDEE0B),
+ UINT64_C(0x02ED873A43F5A1E1), UINT64_C(0x02712F6EE0434187),
+ UINT64_C(0x03F8F26F084CADB4), UINT64_C(0x0037A2587E5D9BC4),
+ UINT64_C(0x007E3E8815CB75BB) } },
+ { { UINT64_C(0x00D0B08F2FB80E07), UINT64_C(0x001F1C3F02C8AA99),
+ UINT64_C(0x02C965AB70A7B621), UINT64_C(0x02934839B849A6F8),
+ UINT64_C(0x003F88BA718D98ED), UINT64_C(0x02899A10EC155762),
+ UINT64_C(0x0019825E2EA0BBFE), UINT64_C(0x031BADAF50BB1556),
+ UINT64_C(0x00C2052564BF2D01) },
+ { UINT64_C(0x02BBD600B64986F4), UINT64_C(0x0001308CBE96F1C1),
+ UINT64_C(0x00C849F303B9F9E3), UINT64_C(0x02D14076FC63D1DE),
+ UINT64_C(0x0236169D2D35EA78), UINT64_C(0x0264B3B8EE95BD05),
+ UINT64_C(0x002F66E82F19619B), UINT64_C(0x0095E5BD3AAECF3F),
+ UINT64_C(0x004DAC1BA614BE0C) } },
+ { { UINT64_C(0x031F00ED67DF6D6E), UINT64_C(0x03D70047AC4E0BA7),
+ UINT64_C(0x02D8711992AA1754), UINT64_C(0x036ECAEB89D30859),
+ UINT64_C(0x0036A42A32CE3566), UINT64_C(0x01D98A9D0A6301E2),
+ UINT64_C(0x0254343364F9506D), UINT64_C(0x00BA44E9D5246E7C),
+ UINT64_C(0x01A19768E78BDB19) },
+ { UINT64_C(0x01612B559D4C1CFE), UINT64_C(0x00FD06AC0FA53998),
+ UINT64_C(0x01000FCBA8F910A9), UINT64_C(0x02941E6AFC5E6D3F),
+ UINT64_C(0x00CAEFF18F01E2A7), UINT64_C(0x00C3611A9DC5189A),
+ UINT64_C(0x004BD42C721A7B6E), UINT64_C(0x02CFCE0AB6DE8255),
+ UINT64_C(0x0157E0604D9A6299) } },
+ { { UINT64_C(0x004C36A17F3F00C1), UINT64_C(0x03AAE85897960B4C),
+ UINT64_C(0x00162519D94A771E), UINT64_C(0x00EFA894195CFB14),
+ UINT64_C(0x0377393E0BEA5785), UINT64_C(0x01275D68934C0C3C),
+ UINT64_C(0x020E33D09CE0D489), UINT64_C(0x00636664BBECE0A2),
+ UINT64_C(0x01D94E3BA2F10531) },
+ { UINT64_C(0x00F1D932B72461C9), UINT64_C(0x030803CCCD33A980),
+ UINT64_C(0x03D527D0F91F6DBE), UINT64_C(0x032A75271076B0B3),
+ UINT64_C(0x00618C0762DDDF10), UINT64_C(0x0023381E1F452B93),
+ UINT64_C(0x02E55888093553F9), UINT64_C(0x0179B91A78A3270C),
+ UINT64_C(0x008109452184E2A2) } },
+ },
+ {
+ { { UINT64_C(0x039BF352B2648196), UINT64_C(0x0255A7410BF9D82B),
+ UINT64_C(0x00E69B9D9444400A), UINT64_C(0x0115B8CE4ADD0E15),
+ UINT64_C(0x0286C0702CA01A26), UINT64_C(0x0343E585D0F62B8D),
+ UINT64_C(0x0270AB3B658EDEED), UINT64_C(0x00BDF019DAC3BE2C),
+ UINT64_C(0x01DA71CEBA8F0207) },
+ { UINT64_C(0x031B398D4D9BC7BB), UINT64_C(0x000CF24C3929C7AB),
+ UINT64_C(0x01B421C8D3FD5E6F), UINT64_C(0x007CC4196EE4E246),
+ UINT64_C(0x020BD4BEA34DCA8A), UINT64_C(0x0290B50CAE9698DF),
+ UINT64_C(0x00FCD1330F886EB9), UINT64_C(0x01E1AC79F03E8C00),
+ UINT64_C(0x00DA9DFFAC1D7299) } },
+ { { UINT64_C(0x023B6F4171DE62A2), UINT64_C(0x02483565211B08E1),
+ UINT64_C(0x03590C48E9F4C557), UINT64_C(0x0300655D7CA7761E),
+ UINT64_C(0x000FC94679705CC8), UINT64_C(0x03F1F51E4C554176),
+ UINT64_C(0x02F4AA91C9B85DEC), UINT64_C(0x01830B06FDF1C0BD),
+ UINT64_C(0x01705BC114A4818F) },
+ { UINT64_C(0x026AF34683BFC242), UINT64_C(0x02704B0386A138E6),
+ UINT64_C(0x0201A2D902335BC5), UINT64_C(0x00F97548337FE82F),
+ UINT64_C(0x0068481E95BAAC46), UINT64_C(0x02198BC38D3244C8),
+ UINT64_C(0x02FB3AE37E76F25B), UINT64_C(0x0051FD7A6C46B763),
+ UINT64_C(0x00BB4F63544525E2) } },
+ { { UINT64_C(0x0184463DCFE3927A), UINT64_C(0x038592C4A5446C69),
+ UINT64_C(0x00820DA1FCA22B30), UINT64_C(0x01BE68F5BD638385),
+ UINT64_C(0x01820BD08BDBAACC), UINT64_C(0x02A44306C3D5797E),
+ UINT64_C(0x0038CCA1AA697778), UINT64_C(0x00C7C5B9FA5A6346),
+ UINT64_C(0x00AF09862D4121FA) },
+ { UINT64_C(0x01CB3F3FBEBC6638), UINT64_C(0x037E0A83514FED33),
+ UINT64_C(0x03EACD5523409D6F), UINT64_C(0x020D6BA55D786340),
+ UINT64_C(0x01CCC13F9ADFA032), UINT64_C(0x0019CA4869978150),
+ UINT64_C(0x039E387EBA3B5F3E), UINT64_C(0x02E531E4CE95EAED),
+ UINT64_C(0x019F9D4B6C1E271A) } },
+ { { UINT64_C(0x03D9C637E6B4D0F2), UINT64_C(0x02F39727B4A2B4A9),
+ UINT64_C(0x03B1C91C466BE1FF), UINT64_C(0x0002CA1D422DB470),
+ UINT64_C(0x035959F6F8064E3B), UINT64_C(0x01A06409B64B70C1),
+ UINT64_C(0x0138166589198416), UINT64_C(0x01E4D2E6E69DFBF6),
+ UINT64_C(0x01235B6CCAD8ED3A) },
+ { UINT64_C(0x036BC004511EBBDB), UINT64_C(0x03C77128404EB6AD),
+ UINT64_C(0x02C7DBC63944D083), UINT64_C(0x00A0B83D92DC53A7),
+ UINT64_C(0x0236B4A39AE88503), UINT64_C(0x03A8D6E5C0E1C279),
+ UINT64_C(0x029FE38FA8BE1456), UINT64_C(0x03585B0A0A7CC668),
+ UINT64_C(0x00A7641453F65799) } },
+ { { UINT64_C(0x00158306BEA400A9), UINT64_C(0x007F40534A2A445F),
+ UINT64_C(0x01C35C303D86F4A4), UINT64_C(0x00EDDE592FDFA8FD),
+ UINT64_C(0x0103A9EFC14289AA), UINT64_C(0x03407BDDBE6E50BA),
+ UINT64_C(0x009401AB57CFB13E), UINT64_C(0x0399C8A12EA5A5B1),
+ UINT64_C(0x00FC6AFA631B2401) },
+ { UINT64_C(0x03676F7FA3EA1F68), UINT64_C(0x0292D21900F132BA),
+ UINT64_C(0x023C1FDE32777454), UINT64_C(0x016AD44E9E4A043B),
+ UINT64_C(0x034CE0B6BF5A83B8), UINT64_C(0x007C5DBECEE12BCA),
+ UINT64_C(0x034C6521C9D71204), UINT64_C(0x0295DA0F38E7DE8B),
+ UINT64_C(0x0062381F9092A871) } },
+ { { UINT64_C(0x021E20A63FBBA24C), UINT64_C(0x036388882DF52B55),
+ UINT64_C(0x00530F2F7C7C2371), UINT64_C(0x03643DB108CC955E),
+ UINT64_C(0x024B18165F1B6107), UINT64_C(0x02769559E8B8FA46),
+ UINT64_C(0x00ABDA3964357585), UINT64_C(0x006A3DE26D6BDE65),
+ UINT64_C(0x00FA0EF45FF0F7F0) },
+ { UINT64_C(0x0328AF72F4ADEFE3), UINT64_C(0x00F209DB1F3C181A),
+ UINT64_C(0x01A0AC16B36B8052), UINT64_C(0x03FE68F1AFEB358F),
+ UINT64_C(0x011BB7B356C432BB), UINT64_C(0x03D087AF0D447953),
+ UINT64_C(0x00088B00BECEF91E), UINT64_C(0x0330A2DA3B763B85),
+ UINT64_C(0x01CC26379FF0902A) } },
+ { { UINT64_C(0x02451A0F72841A85), UINT64_C(0x0354FC0056ED797F),
+ UINT64_C(0x03F4EAB6EB12B346), UINT64_C(0x0032B842273C8FB8),
+ UINT64_C(0x024B836D935DD874), UINT64_C(0x0090627CCD9E0492),
+ UINT64_C(0x0244927C3C49DF5D), UINT64_C(0x0042534A4E5AA66E),
+ UINT64_C(0x00B4C23CB62729C6) },
+ { UINT64_C(0x00295DE15E7B0D82), UINT64_C(0x003481AED4B38216),
+ UINT64_C(0x020CB574DA2A8CEB), UINT64_C(0x03DB292DC006EFC3),
+ UINT64_C(0x03153DE3966C31DB), UINT64_C(0x0398C0D13BB538D2),
+ UINT64_C(0x00D2735B5509DAE6), UINT64_C(0x00BBE1C7422AD656),
+ UINT64_C(0x006495E2F55306CC) } },
+ { { UINT64_C(0x00FC0E58752517BF), UINT64_C(0x0287DC3FE2714AA6),
+ UINT64_C(0x024BBBD332D8AADB), UINT64_C(0x000BF6FA0D08504F),
+ UINT64_C(0x02E724A624D71D7E), UINT64_C(0x01F16EF435B7F288),
+ UINT64_C(0x024E6F71370923F3), UINT64_C(0x00C2B9525922566C),
+ UINT64_C(0x005733338A43CFE0) },
+ { UINT64_C(0x0372270A8BB6E5C0), UINT64_C(0x0023295E1C578E27),
+ UINT64_C(0x01EA019B1BDD171A), UINT64_C(0x0243564F2EC5E9B6),
+ UINT64_C(0x01283B58FFA9DAE7), UINT64_C(0x00215CCB462BFC41),
+ UINT64_C(0x03E3900D562119A3), UINT64_C(0x0273C10EF622442D),
+ UINT64_C(0x00D7B5F5A5718A0A) } },
+ { { UINT64_C(0x03E792204254F3D7), UINT64_C(0x0197A7FB52460AD3),
+ UINT64_C(0x0387DC97132E1376), UINT64_C(0x00D82DE34F7F5873),
+ UINT64_C(0x03B853655C8CF8AC), UINT64_C(0x0173E013A8BD55E9),
+ UINT64_C(0x008A7D4896369A87), UINT64_C(0x024DBCC16EA9BB3A),
+ UINT64_C(0x010910C0CEC40352) },
+ { UINT64_C(0x03B95A34F108C612), UINT64_C(0x0333E2F3D8672331),
+ UINT64_C(0x028C77D48D5C235B), UINT64_C(0x0233CC3106C11962),
+ UINT64_C(0x03EBBF90DDDA15FE), UINT64_C(0x0369066DD81ED647),
+ UINT64_C(0x03BD05AA96CD4304), UINT64_C(0x039E3FFACDB3BA32),
+ UINT64_C(0x01EAC4B260DDEC7F) } },
+ { { UINT64_C(0x035858F23BBE227D), UINT64_C(0x00EAE5030697E923),
+ UINT64_C(0x02368A87F3DE71C5), UINT64_C(0x0168E7B6DEE0F7C3),
+ UINT64_C(0x00527543ED139D52), UINT64_C(0x0127219B1CDD187E),
+ UINT64_C(0x023DB1516D99AC2E), UINT64_C(0x008101C88F395DB5),
+ UINT64_C(0x00C6A87659F9030E) },
+ { UINT64_C(0x039C69A3A7EC3A20), UINT64_C(0x02842173900384B8),
+ UINT64_C(0x0136BA0852E2F7FE), UINT64_C(0x034921364764BE1F),
+ UINT64_C(0x02C74764840F38B3), UINT64_C(0x02F37D32908AE4DC),
+ UINT64_C(0x0138C24B162396AC), UINT64_C(0x02A70AD1A514245D),
+ UINT64_C(0x00C442ABF244BFAF) } },
+ { { UINT64_C(0x02A6B09F093E7BB1), UINT64_C(0x027395A268EC7AC7),
+ UINT64_C(0x028CC643D554CA43), UINT64_C(0x0035243849E2C949),
+ UINT64_C(0x03CF25745B571D36), UINT64_C(0x00F8968B891A06D4),
+ UINT64_C(0x03F9158462DF4912), UINT64_C(0x0277B23F176B632C),
+ UINT64_C(0x0100FDC9203FE38B) },
+ { UINT64_C(0x024667E35C0213B3), UINT64_C(0x001C9D8E55C59D73),
+ UINT64_C(0x03C67911C028CE7C), UINT64_C(0x01D6BE78640D4CA8),
+ UINT64_C(0x024E359FD8B3F600), UINT64_C(0x03240449153262A6),
+ UINT64_C(0x03B253E7A16A83A5), UINT64_C(0x02FDB9879C3019FF),
+ UINT64_C(0x01D5771531A45180) } },
+ { { UINT64_C(0x02FFF1EEAD72BA02), UINT64_C(0x01773B2AD40CD7B5),
+ UINT64_C(0x00B549067C93A24B), UINT64_C(0x0040E568D769A5B9),
+ UINT64_C(0x01CBA8C547CFD559), UINT64_C(0x01B900D1740D29F8),
+ UINT64_C(0x0153A5FEC2807EDD), UINT64_C(0x003616B13CBFDC6E),
+ UINT64_C(0x014FA30FBEC2B9FF) },
+ { UINT64_C(0x03CEBD84555A3B73), UINT64_C(0x011642C087A74BA4),
+ UINT64_C(0x03FAF4C90C28B568), UINT64_C(0x00D2B6FE13831FC3),
+ UINT64_C(0x02F1845F4A404C99), UINT64_C(0x03031352DB2945ED),
+ UINT64_C(0x0192B108B24A2CC8), UINT64_C(0x008B79F2C497B8AE),
+ UINT64_C(0x016844B1F9A48A1A) } },
+ { { UINT64_C(0x033F1B159EA0B318), UINT64_C(0x015BA4F73890FCA5),
+ UINT64_C(0x03AB1671767AEB58), UINT64_C(0x0190DE3F4B53983C),
+ UINT64_C(0x01C67D39EE1606B7), UINT64_C(0x02092898897E0832),
+ UINT64_C(0x016BC61B17E221D9), UINT64_C(0x0302B2A3F7863F1A),
+ UINT64_C(0x0153FC11A3315E45) },
+ { UINT64_C(0x02AC9E25352466CC), UINT64_C(0x03A49408E6FA3892),
+ UINT64_C(0x03B3B7FC83F96BAA), UINT64_C(0x02447E01B52DE677),
+ UINT64_C(0x01EB6353F032192D), UINT64_C(0x00910C3CF3E5926D),
+ UINT64_C(0x02261F650A5EA2DB), UINT64_C(0x03AA8819EC45E274),
+ UINT64_C(0x01F274F4B47595FA) } },
+ { { UINT64_C(0x0026282EB3F78C83), UINT64_C(0x00C28C0709CFCB19),
+ UINT64_C(0x01821376CE1FE0A2), UINT64_C(0x01FDCED392DF4511),
+ UINT64_C(0x007CEFA4CDFC46EC), UINT64_C(0x01C18D201835A1D3),
+ UINT64_C(0x021190BA9D0FC1B3), UINT64_C(0x01CF1181F215C327),
+ UINT64_C(0x0144F63DC1DC2337) },
+ { UINT64_C(0x02467154F82AE76F), UINT64_C(0x00A8E4BC6B21A6C1),
+ UINT64_C(0x003C5960D11DFC29), UINT64_C(0x02CCE05B7F97DFEA),
+ UINT64_C(0x0155EBEF61A21A64), UINT64_C(0x02E5A1DD22DB3809),
+ UINT64_C(0x008CACD3BAEA4ADC), UINT64_C(0x01AF102BA92E48C7),
+ UINT64_C(0x0060B7381DB1721E) } },
+ { { UINT64_C(0x03861A0264B1FB35), UINT64_C(0x02F8C8B3CD33A6FA),
+ UINT64_C(0x030806F41BBA295F), UINT64_C(0x0164D82631325495),
+ UINT64_C(0x00CE9EA6FF0E358B), UINT64_C(0x0079012DD18DCC6B),
+ UINT64_C(0x000CC353D3BB1AC0), UINT64_C(0x03AB6D47DE397D50),
+ UINT64_C(0x00AD096897EA08E2) },
+ { UINT64_C(0x023B78EFC3812C10), UINT64_C(0x0089EFA9532A659C),
+ UINT64_C(0x0281A0EB9A3DF013), UINT64_C(0x03AE4559CDF48DB0),
+ UINT64_C(0x00CF5D05BA21B5A4), UINT64_C(0x000FB2B315217C86),
+ UINT64_C(0x018D07209C8D7927), UINT64_C(0x0142BF514B4FAA4C),
+ UINT64_C(0x002374D59706AD5B) } },
+ { { UINT64_C(0x00C15F67DD00894F), UINT64_C(0x0365718AE78487A2),
+ UINT64_C(0x01F5CF8A8DD7221A), UINT64_C(0x00B966824944DA72),
+ UINT64_C(0x039495E53E96A028), UINT64_C(0x017A489926C99CDF),
+ UINT64_C(0x03E7DBA2A6042AD8), UINT64_C(0x0070896FE2C77ED8),
+ UINT64_C(0x01DE2D3E99009396) },
+ { UINT64_C(0x02CDACE519305F18), UINT64_C(0x0199321FCFA0FFC9),
+ UINT64_C(0x01FDEB80C6DC481C), UINT64_C(0x02944307EF501A18),
+ UINT64_C(0x0007F535095DB6A0), UINT64_C(0x01898CF112F16E56),
+ UINT64_C(0x00CB5741AFE7E00B), UINT64_C(0x01926B1FD8D17FCB),
+ UINT64_C(0x015E5CD28BDE5A59) } },
+ },
+ {
+ { { UINT64_C(0x0287283D0F0DB502), UINT64_C(0x01F7D518BD1DEC47),
+ UINT64_C(0x0110E901D0288278), UINT64_C(0x000A9C8AA5A57C0C),
+ UINT64_C(0x03B765C5FA16BDCF), UINT64_C(0x03E5DF4E7DE798D7),
+ UINT64_C(0x00F43CD382F586CB), UINT64_C(0x016DF729B4C5BFE2),
+ UINT64_C(0x00F84CAB1D3D3490) },
+ { UINT64_C(0x03C62F43F45CE248), UINT64_C(0x01779CCA073E2076),
+ UINT64_C(0x003E7EB22E4B1573), UINT64_C(0x0192926CE48BFBEA),
+ UINT64_C(0x00AEAE190B45D381), UINT64_C(0x02BD36FBE7AB443A),
+ UINT64_C(0x00906E0CD124F126), UINT64_C(0x025881B2A14C49E4),
+ UINT64_C(0x016E768F54273911) } },
+ { { UINT64_C(0x0339D7B298B06389), UINT64_C(0x00171C63E44DC1B1),
+ UINT64_C(0x00C31B1589FD2080), UINT64_C(0x00B27F131898A9FA),
+ UINT64_C(0x0342FE5ADE76B5A2), UINT64_C(0x01090D97105A2655),
+ UINT64_C(0x0388BB1432187198), UINT64_C(0x02D27D0C82BF52D7),
+ UINT64_C(0x00807B9F1B11A583) },
+ { UINT64_C(0x01F3344975177EBC), UINT64_C(0x00D1C4854243F6DB),
+ UINT64_C(0x00CF85E1839AB312), UINT64_C(0x00D9C19A12D20012),
+ UINT64_C(0x01709110819085E7), UINT64_C(0x011FEDA170483D5C),
+ UINT64_C(0x01B28F055EEB31A0), UINT64_C(0x02289D0F2CBAB0E6),
+ UINT64_C(0x000867BA2963A0E1) } },
+ { { UINT64_C(0x03F6911B90581DC0), UINT64_C(0x01F1FB19987F20FB),
+ UINT64_C(0x0134E22EFA2F437F), UINT64_C(0x00398E1EB156A4E0),
+ UINT64_C(0x0325F4C0DBD2FAF4), UINT64_C(0x0204D252D5C55B5B),
+ UINT64_C(0x00E279F64EA373DA), UINT64_C(0x01DB9B5CD34A8E6F),
+ UINT64_C(0x00D14F2FC1B2EE3D) },
+ { UINT64_C(0x0391CF084FAB453E), UINT64_C(0x016D9E632F3C4388),
+ UINT64_C(0x01D15FD339420C4A), UINT64_C(0x026356CC61C907C7),
+ UINT64_C(0x026E23E3D6197795), UINT64_C(0x0142F5E058DB2B6C),
+ UINT64_C(0x020EFE8EAFF59180), UINT64_C(0x00A481A4F4563A8C),
+ UINT64_C(0x012FEE21C8B4C4E9) } },
+ { { UINT64_C(0x02056DCD3DB8A57B), UINT64_C(0x0317AAE4B46AB720),
+ UINT64_C(0x031833D064C1F1CD), UINT64_C(0x03A3CC17BEBD056B),
+ UINT64_C(0x03F05A7034003715), UINT64_C(0x009FAC41671C58C9),
+ UINT64_C(0x01BEE4D8BD8671CA), UINT64_C(0x0004BC6DBD8A8392),
+ UINT64_C(0x01F15A2D6E92E74A) },
+ { UINT64_C(0x010933993D4BD6B6), UINT64_C(0x028502613D6FDD77),
+ UINT64_C(0x0134D55E73D97A09), UINT64_C(0x001DB5E602D2AA86),
+ UINT64_C(0x00FE1E6979BF531F), UINT64_C(0x02AC99028117960B),
+ UINT64_C(0x03849A42EAAB4E66), UINT64_C(0x0190FBBD3B94D87F),
+ UINT64_C(0x011CAB9AC249065C) } },
+ { { UINT64_C(0x03000D01D5AD0B4E), UINT64_C(0x01E094F415439045),
+ UINT64_C(0x0071645EF32A823C), UINT64_C(0x013C18E27FCF9EA5),
+ UINT64_C(0x00B2733886CDC7A9), UINT64_C(0x02902330EF732EA5),
+ UINT64_C(0x003C25CEA5C5686B), UINT64_C(0x029DF5773028F0CD),
+ UINT64_C(0x016FB941FCD6583D) },
+ { UINT64_C(0x01DEA99AF3494AD9), UINT64_C(0x03BA2C1B9C712901),
+ UINT64_C(0x02E32E4B0A8430F2), UINT64_C(0x00CB695E8BF6F96B),
+ UINT64_C(0x0161F767B32907C2), UINT64_C(0x002FC8531B5E7CEC),
+ UINT64_C(0x00298C1304153AFA), UINT64_C(0x0189BCBF02EE4544),
+ UINT64_C(0x0035592EC7CAC39B) } },
+ { { UINT64_C(0x0359513866647B76), UINT64_C(0x00DB6945523879DD),
+ UINT64_C(0x0349C662AF030344), UINT64_C(0x03638440AAB5A275),
+ UINT64_C(0x02A0720FE9DC8A6B), UINT64_C(0x011CEE4DF271AE5F),
+ UINT64_C(0x00BC676869500BE5), UINT64_C(0x02F5135FF9B7674F),
+ UINT64_C(0x00142511483B55E9) },
+ { UINT64_C(0x02DE083E6D8A2C33), UINT64_C(0x014C0545D4B8062F),
+ UINT64_C(0x01AD94143AC28589), UINT64_C(0x01AEBAA37C00A634),
+ UINT64_C(0x0078E06973DA0209), UINT64_C(0x03F56A237FA0E6B0),
+ UINT64_C(0x02879F4A94D49E71), UINT64_C(0x01BE6BF822D1FD4F),
+ UINT64_C(0x00F9E2018F9FBF87) } },
+ { { UINT64_C(0x025B8DCB938F6A40), UINT64_C(0x0026725B42FA4F9B),
+ UINT64_C(0x039198D12A999847), UINT64_C(0x010A9C957A1EFA18),
+ UINT64_C(0x012FAA8E7E5D1356), UINT64_C(0x0205AB8BB7E3A8BA),
+ UINT64_C(0x015652F190E95489), UINT64_C(0x0231452E385A88C6),
+ UINT64_C(0x0096A500D25B0C46) },
+ { UINT64_C(0x01B6696514F1EAD3), UINT64_C(0x026BE39E6BD0E127),
+ UINT64_C(0x01725DEFE2C66DD3), UINT64_C(0x01FEAE05ECA5B5BB),
+ UINT64_C(0x015AA101430609C7), UINT64_C(0x0274AAB1807123A3),
+ UINT64_C(0x02A446B243B7DBAC), UINT64_C(0x007DC3A911987A6B),
+ UINT64_C(0x005309D7E2813F76) } },
+ { { UINT64_C(0x01966924104023FD), UINT64_C(0x0020B1F67AD27833),
+ UINT64_C(0x03DFD742FB1D5AC6), UINT64_C(0x017F6DD6D843D1C9),
+ UINT64_C(0x01DEAB06F70CFD0B), UINT64_C(0x00F3AAA1D84BA46E),
+ UINT64_C(0x01535D03B00F23FA), UINT64_C(0x02F223786ADE70A7),
+ UINT64_C(0x00DC3F149A4B2AAE) },
+ { UINT64_C(0x0318A8079CA626DD), UINT64_C(0x00A1DE38CE5C6BE6),
+ UINT64_C(0x032F55E2E4E50992), UINT64_C(0x0192257A6FB7EED9),
+ UINT64_C(0x020B9106C175FDEB), UINT64_C(0x001ACA988C739470),
+ UINT64_C(0x02A12D0A78C3DAD7), UINT64_C(0x02A0BFDBC1802E4D),
+ UINT64_C(0x0138CB75E6BBB8BA) } },
+ { { UINT64_C(0x00B271637F32AB3F), UINT64_C(0x02196867BE3CDC78),
+ UINT64_C(0x00647C1710CC4F5D), UINT64_C(0x00A0EDE0B8D8DB71),
+ UINT64_C(0x0092AB51B9BB942A), UINT64_C(0x030CEE5FF47C8C77),
+ UINT64_C(0x0172B6296758CE89), UINT64_C(0x03FBF70A184CFE5F),
+ UINT64_C(0x0101B88E67F1E05D) },
+ { UINT64_C(0x02FFBCD12737D38E), UINT64_C(0x02754305441EA3F7),
+ UINT64_C(0x0174766ADA98B6A0), UINT64_C(0x00EEEAD822C29CD7),
+ UINT64_C(0x02D88F6B991FA26B), UINT64_C(0x02CB655B1E5DF95B),
+ UINT64_C(0x03DD0BD505307E4F), UINT64_C(0x010182FDFC359D4A),
+ UINT64_C(0x00755C3675A01A9E) } },
+ { { UINT64_C(0x00371ACBFD4D4113), UINT64_C(0x01CD0CEE90EDA0C0),
+ UINT64_C(0x023F0667BA099F71), UINT64_C(0x0122476EC028AFF8),
+ UINT64_C(0x0057490C1B9D3C8E), UINT64_C(0x0037D1A2CAFBC030),
+ UINT64_C(0x0357613B144BA059), UINT64_C(0x030B5ED5F7E2DFAA),
+ UINT64_C(0x00C03407E66571BC) },
+ { UINT64_C(0x015B2051592A3113), UINT64_C(0x033C0B977FE1CA61),
+ UINT64_C(0x0114564ECE17F466), UINT64_C(0x02770F5D995C1ECC),
+ UINT64_C(0x01D8797648C617E7), UINT64_C(0x00B30F6FB78CAD34),
+ UINT64_C(0x036CD504495109EC), UINT64_C(0x02EA78A9F6758E7F),
+ UINT64_C(0x007A71C9E769E9C6) } },
+ { { UINT64_C(0x011D5BE35201CD59), UINT64_C(0x0209D1C58765C0EE),
+ UINT64_C(0x01D25192839B1DB8), UINT64_C(0x03EAD38ED4A2B60E),
+ UINT64_C(0x0057B36709A7B7AA), UINT64_C(0x0085B62AF338BC2B),
+ UINT64_C(0x030F3BEF5577F894), UINT64_C(0x0390BAA242140FD9),
+ UINT64_C(0x011B9BF27FA21CD6) },
+ { UINT64_C(0x031FF60458FFB263), UINT64_C(0x00D71C9EC589C2CE),
+ UINT64_C(0x006C50B6449B7493), UINT64_C(0x034EF7D63824AD56),
+ UINT64_C(0x038578A6820938F3), UINT64_C(0x00843B021ED27247),
+ UINT64_C(0x02672B0B7E864C01), UINT64_C(0x00FE28A0AD914F56),
+ UINT64_C(0x01870F7E6544AD26) } },
+ { { UINT64_C(0x03FABFF21E593E49), UINT64_C(0x01EB902CACEDCD38),
+ UINT64_C(0x010907F07EA1634E), UINT64_C(0x013A3B3D20F1ACCD),
+ UINT64_C(0x035F3C751269190C), UINT64_C(0x02F6BAE3746C46A6),
+ UINT64_C(0x00097CBB9F7B998C), UINT64_C(0x016B88BF2C151BD8),
+ UINT64_C(0x01317587E7C4BAF5) },
+ { UINT64_C(0x027516E2062B46F6), UINT64_C(0x01703ECD4583F2AB),
+ UINT64_C(0x007D01ABE67B4364), UINT64_C(0x00F1753628034E7C),
+ UINT64_C(0x0108FF0FECD3BD76), UINT64_C(0x033B697531A2F0AC),
+ UINT64_C(0x010AC9943B9A6425), UINT64_C(0x020BC633526FFAA7),
+ UINT64_C(0x0006E03EC9A132B1) } },
+ { { UINT64_C(0x016BC247531FFCBB), UINT64_C(0x02EE2DDBF721D516),
+ UINT64_C(0x0052E0725E10638A), UINT64_C(0x013566F49B1AAC88),
+ UINT64_C(0x007343ED5106C60D), UINT64_C(0x02985C4AAAB232AC),
+ UINT64_C(0x0113830C6312DE7A), UINT64_C(0x0136F1CF05895FFF),
+ UINT64_C(0x01ED7817C0B0027B) },
+ { UINT64_C(0x02716A42F749B010), UINT64_C(0x039DC807B7BDBC44),
+ UINT64_C(0x035DFD64A2C7F19C), UINT64_C(0x00AFE5B488D67F84),
+ UINT64_C(0x03831B1AD5D8B241), UINT64_C(0x00FEF3BA557CC901),
+ UINT64_C(0x0082C2A38F96B970), UINT64_C(0x027380F80F3D96E5),
+ UINT64_C(0x014FDF6544812C07) } },
+ { { UINT64_C(0x03600187B0C6A752), UINT64_C(0x019E405A0263FA53),
+ UINT64_C(0x000E0EA369E1C1BF), UINT64_C(0x0130C422E3895E24),
+ UINT64_C(0x035F4072E884BDCB), UINT64_C(0x0284B4DBC9FDB267),
+ UINT64_C(0x0159D4401B2054DE), UINT64_C(0x03649FACE16E526C),
+ UINT64_C(0x0100AC3AAFFE225D) },
+ { UINT64_C(0x03BA224ACAFA8C2B), UINT64_C(0x031E5C26E31FAF8C),
+ UINT64_C(0x00B183566D47E97E), UINT64_C(0x0020C64F9C9C2688),
+ UINT64_C(0x02F6655D04CC893B), UINT64_C(0x03908BE8D4648FE4),
+ UINT64_C(0x02F14F85922DC116), UINT64_C(0x031D345610C10114),
+ UINT64_C(0x00FC287447A5FA2D) } },
+ { { UINT64_C(0x020880798CEE5802), UINT64_C(0x03BE370A4C38C7FF),
+ UINT64_C(0x00934BE76CF041A3), UINT64_C(0x011B7A12BC50EEE4),
+ UINT64_C(0x0301BD4FC9636CD4), UINT64_C(0x03C53C2A0264C2CE),
+ UINT64_C(0x0347FF0A389DC319), UINT64_C(0x03A848048891AD07),
+ UINT64_C(0x0110D35394388CFB) },
+ { UINT64_C(0x0042E86EE18DA0C0), UINT64_C(0x0359DB5D730A12EE),
+ UINT64_C(0x03D8CD72D5690026), UINT64_C(0x01FD191FD18F2690),
+ UINT64_C(0x00B8691FD8727A16), UINT64_C(0x0135130205267C55),
+ UINT64_C(0x011FDBAF57A304DB), UINT64_C(0x012D7FC9DED7342D),
+ UINT64_C(0x01BFE56058019C74) } },
+ { { UINT64_C(0x00ADCF21754184BF), UINT64_C(0x02532EC18F101A1B),
+ UINT64_C(0x02E7AA58B7598AF4), UINT64_C(0x0297C67528666348),
+ UINT64_C(0x022BAF11DF85DAD5), UINT64_C(0x0097F7BCDA9CFFA7),
+ UINT64_C(0x03F0C563228A2E65), UINT64_C(0x0316126723B57D49),
+ UINT64_C(0x019B45ECCD3F5983) },
+ { UINT64_C(0x02B86D25E0A95EDC), UINT64_C(0x027ED42D9C73BD22),
+ UINT64_C(0x0385F10181D77392), UINT64_C(0x02C8AA05E16378DB),
+ UINT64_C(0x02962E884B04947C), UINT64_C(0x00A054D788CF48A9),
+ UINT64_C(0x006616654F6E2CF7), UINT64_C(0x021848D66B0ACC97),
+ UINT64_C(0x00E73704171C5696) } },
+ }
+};
+
+/*-
+ * Finite field inversion.
+ * Computed with exponentiation via FLT.
+ * Autogenerated: ecp/secp521r1/fe_inv.op3
+ * custom repunit addition chain
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp521r1_inv(fe_t output, const fe_t t1)
+{
+ int i;
+ /* temporary variables */
+ fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8;
+
+ fiat_secp521r1_carry_square(acc, t1);
+ fiat_secp521r1_carry_mul(t2, acc, t1);
+ fiat_secp521r1_carry_square(acc, t2);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t4, acc, t2);
+ fiat_secp521r1_carry_square(acc, t4);
+ for (i = 0; i < 3; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t8, acc, t4);
+ fiat_secp521r1_carry_square(acc, t8);
+ for (i = 0; i < 7; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t16, acc, t8);
+ fiat_secp521r1_carry_square(acc, t16);
+ for (i = 0; i < 15; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t32, acc, t16);
+ fiat_secp521r1_carry_square(acc, t32);
+ for (i = 0; i < 31; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t64, acc, t32);
+ fiat_secp521r1_carry_square(acc, t64);
+ for (i = 0; i < 63; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t128, acc, t64);
+ fiat_secp521r1_carry_square(acc, t128);
+ for (i = 0; i < 127; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t256, acc, t128);
+ fiat_secp521r1_carry_square(acc, t256);
+ for (i = 0; i < 255; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t512, acc, t256);
+ fiat_secp521r1_carry_square(acc, t512);
+ for (i = 0; i < 3; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t516, acc, t4);
+ fiat_secp521r1_carry_square(acc, t516);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t518, acc, t2);
+ fiat_secp521r1_carry_square(acc, t518);
+ fiat_secp521r1_carry_mul(t519, acc, t1);
+ fiat_secp521r1_carry_square(acc, t519);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(output, acc, t1);
+}
+
+/*-
+ * Q := 2P, both projective, Q and P same pointers OK
+ * Autogenerated: op3/dbl_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 6
+ * ASSERT: a = -3
+ */
+static void
+point_double(pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X = P->X;
+ const limb_t *Y = P->Y;
+ const limb_t *Z = P->Z;
+ limb_t *X3 = Q->X;
+ limb_t *Y3 = Q->Y;
+ limb_t *Z3 = Q->Z;
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_square(t0, X);
+ fiat_secp521r1_carry_square(t1, Y);
+ fiat_secp521r1_carry_square(t2, Z);
+ fiat_secp521r1_carry_mul(t3, X, Y);
+ fiat_secp521r1_carry_add(t3, t3, t3);
+ fiat_secp521r1_carry_mul(t4, Y, Z);
+ fiat_secp521r1_carry_mul(Z3, X, Z);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+ fiat_secp521r1_carry_mul(Y3, b, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, Z3);
+ fiat_secp521r1_carry_add(X3, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, X3, Y3);
+ fiat_secp521r1_carry_sub(X3, t1, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Y3);
+ fiat_secp521r1_carry_mul(X3, X3, t3);
+ fiat_secp521r1_carry_add(t3, t2, t2);
+ fiat_secp521r1_carry_add(t2, t2, t3);
+ fiat_secp521r1_carry_mul(Z3, b, Z3);
+ fiat_secp521r1_carry_sub(Z3, Z3, t2);
+ fiat_secp521r1_carry_sub(Z3, Z3, t0);
+ fiat_secp521r1_carry_add(t3, Z3, Z3);
+ fiat_secp521r1_carry_add(Z3, Z3, t3);
+ fiat_secp521r1_carry_add(t3, t0, t0);
+ fiat_secp521r1_carry_add(t0, t3, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t0, t0, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t0, t4, t4);
+ fiat_secp521r1_carry_mul(Z3, t0, Z3);
+ fiat_secp521r1_carry_sub(X3, X3, Z3);
+ fiat_secp521r1_carry_mul(Z3, t0, t1);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+}
+
+/*-
+ * out1 = (arg1 == 0) ? 0 : nz
+ * NB: this is not a "mod p equiv" 0, but literal 0
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1)
+{
+ limb_t x1 = 0;
+ int i;
+
+ for (i = 0; i < LIMB_CNT; i++)
+ x1 |= arg1[i];
+ *out1 = x1;
+}
+
+/*-
+ * R := Q + P where R and Q are projective, P affine.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_mixed.op3
+ * https://eprint.iacr.org/2015/1060 Alg 5
+ * ASSERT: a = -3
+ */
+static void
+point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ fe_t X3;
+ fe_t Y3;
+ fe_t Z3;
+ limb_t nz;
+
+ /* check P for affine inf */
+ fiat_secp521r1_nonzero(&nz, P->Y);
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_mul(t0, X1, X2);
+ fiat_secp521r1_carry_mul(t1, Y1, Y2);
+ fiat_secp521r1_carry_add(t3, X2, Y2);
+ fiat_secp521r1_carry_add(t4, X1, Y1);
+ fiat_secp521r1_carry_mul(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, t0, t1);
+ fiat_secp521r1_carry_sub(t3, t3, t4);
+ fiat_secp521r1_carry_mul(t4, Y2, Z1);
+ fiat_secp521r1_carry_add(t4, t4, Y1);
+ fiat_secp521r1_carry_mul(Y3, X2, Z1);
+ fiat_secp521r1_carry_add(Y3, Y3, X1);
+ fiat_secp521r1_carry_mul(Z3, b, Z1);
+ fiat_secp521r1_carry_sub(X3, Y3, Z3);
+ fiat_secp521r1_carry_add(Z3, X3, X3);
+ fiat_secp521r1_carry_add(X3, X3, Z3);
+ fiat_secp521r1_carry_sub(Z3, t1, X3);
+ fiat_secp521r1_carry_add(X3, t1, X3);
+ fiat_secp521r1_carry_mul(Y3, b, Y3);
+ fiat_secp521r1_carry_add(t1, Z1, Z1);
+ fiat_secp521r1_carry_add(t2, t1, Z1);
+ fiat_secp521r1_carry_sub(Y3, Y3, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t1, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_add(t1, t0, t0);
+ fiat_secp521r1_carry_add(t0, t1, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t1, t4, Y3);
+ fiat_secp521r1_carry_mul(t2, t0, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t2);
+ fiat_secp521r1_carry_mul(X3, t3, X3);
+ fiat_secp521r1_carry_sub(X3, X3, t1);
+ fiat_secp521r1_carry_mul(Z3, t4, Z3);
+ fiat_secp521r1_carry_mul(t1, t3, t0);
+ fiat_secp521r1_carry_add(Z3, Z3, t1);
+
+ /* if P is inf, throw all that away and take Q */
+ fiat_secp521r1_selectznz(R->X, nz, Q->X, X3);
+ fiat_secp521r1_selectznz(R->Y, nz, Q->Y, Y3);
+ fiat_secp521r1_selectznz(R->Z, nz, Q->Z, Z3);
+}
+
+/*-
+ * R := Q + P all projective.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 4
+ * ASSERT: a = -3
+ */
+static void
+point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4, t5;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ const limb_t *Z2 = P->Z;
+ limb_t *X3 = R->X;
+ limb_t *Y3 = R->Y;
+ limb_t *Z3 = R->Z;
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_mul(t0, X1, X2);
+ fiat_secp521r1_carry_mul(t1, Y1, Y2);
+ fiat_secp521r1_carry_mul(t2, Z1, Z2);
+ fiat_secp521r1_carry_add(t3, X1, Y1);
+ fiat_secp521r1_carry_add(t4, X2, Y2);
+ fiat_secp521r1_carry_mul(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, t0, t1);
+ fiat_secp521r1_carry_sub(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, Y1, Z1);
+ fiat_secp521r1_carry_add(t5, Y2, Z2);
+ fiat_secp521r1_carry_mul(t4, t4, t5);
+ fiat_secp521r1_carry_add(t5, t1, t2);
+ fiat_secp521r1_carry_sub(t4, t4, t5);
+ fiat_secp521r1_carry_add(X3, X1, Z1);
+ fiat_secp521r1_carry_add(Y3, X2, Z2);
+ fiat_secp521r1_carry_mul(X3, X3, Y3);
+ fiat_secp521r1_carry_add(Y3, t0, t2);
+ fiat_secp521r1_carry_sub(Y3, X3, Y3);
+ fiat_secp521r1_carry_mul(Z3, b, t2);
+ fiat_secp521r1_carry_sub(X3, Y3, Z3);
+ fiat_secp521r1_carry_add(Z3, X3, X3);
+ fiat_secp521r1_carry_add(X3, X3, Z3);
+ fiat_secp521r1_carry_sub(Z3, t1, X3);
+ fiat_secp521r1_carry_add(X3, t1, X3);
+ fiat_secp521r1_carry_mul(Y3, b, Y3);
+ fiat_secp521r1_carry_add(t1, t2, t2);
+ fiat_secp521r1_carry_add(t2, t1, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t1, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_add(t1, t0, t0);
+ fiat_secp521r1_carry_add(t0, t1, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t1, t4, Y3);
+ fiat_secp521r1_carry_mul(t2, t0, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t2);
+ fiat_secp521r1_carry_mul(X3, t3, X3);
+ fiat_secp521r1_carry_sub(X3, X3, t1);
+ fiat_secp521r1_carry_mul(Z3, t4, Z3);
+ fiat_secp521r1_carry_mul(t1, t3, t0);
+ fiat_secp521r1_carry_add(Z3, Z3, t1);
+}
+
+/* constants */
+#define RADIX 5
+#define DRADIX (1 << RADIX)
+#define DRADIX_WNAF ((DRADIX) << 1)
+
+/*-
+ * precomp for wnaf scalar multiplication:
+ * precomp[0] = 1P
+ * precomp[1] = 3P
+ * precomp[2] = 5P
+ * precomp[3] = 7P
+ * precomp[4] = 9P
+ * ...
+ */
+static void
+precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P)
+{
+ int i;
+
+ fe_copy(precomp[0].X, P->X);
+ fe_copy(precomp[0].Y, P->Y);
+ fe_copy(precomp[0].Z, const_one);
+ point_double(&precomp[DRADIX / 2 - 1], &precomp[0]);
+
+ for (i = 1; i < DRADIX / 2; i++)
+ point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]);
+}
+
+/* fetch a scalar bit */
+static int
+scalar_get_bit(const unsigned char in[66], int idx)
+{
+ int widx, rshift;
+
+ widx = idx >> 3;
+ rshift = idx & 0x7;
+
+ if (idx < 0 || widx >= 66)
+ return 0;
+
+ return (in[widx] >> rshift) & 0x1;
+}
+
+/*-
+ * Compute "regular" wnaf representation of a scalar.
+ * See "Exponent Recoding and Regular Exponentiation Algorithms",
+ * Tunstall et al., AfricaCrypt 2009, Alg 6.
+ * It forces an odd scalar and outputs digits in
+ * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...}
+ * i.e. signed odd digits with _no zeroes_ -- that makes it "regular".
+ */
+static void
+scalar_rwnaf(int8_t out[106], const unsigned char in[66])
+{
+ int i;
+ int8_t window, d;
+
+ window = (in[0] & (DRADIX_WNAF - 1)) | 1;
+ for (i = 0; i < 105; i++) {
+ d = (window & (DRADIX_WNAF - 1)) - DRADIX;
+ out[i] = d;
+ window = (window - d) >> RADIX;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5;
+ }
+ out[i] = window;
+}
+
+/*-
+ * Compute "textbook" wnaf representation of a scalar.
+ * NB: not constant time
+ */
+static void
+scalar_wnaf(int8_t out[529], const unsigned char in[66])
+{
+ int i;
+ int8_t window, d;
+
+ window = in[0] & (DRADIX_WNAF - 1);
+ for (i = 0; i < 529; i++) {
+ d = 0;
+ if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX))
+ d -= DRADIX_WNAF;
+ out[i] = d;
+ window = (window - d) >> 1;
+ window += scalar_get_bit(in, i + 1 + RADIX) << RADIX;
+ }
+}
+
+/*-
+ * Simultaneous scalar multiplication: interleaved "textbook" wnaf.
+ * NB: not constant time
+ */
+static void
+var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66],
+ const unsigned char b[66], const pt_aff_t *P)
+{
+ int i, d, is_neg, is_inf = 1, flipped = 0;
+ int8_t anaf[529] = { 0 };
+ int8_t bnaf[529] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_wnaf(anaf, a);
+ scalar_wnaf(bnaf, b);
+
+ for (i = 528; i >= 0; i--) {
+ if (!is_inf)
+ point_double(&Q, &Q);
+ if ((d = bnaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &precomp[d].X);
+ fe_copy(Q.Y, &precomp[d].Y);
+ fe_copy(Q.Z, &precomp[d].Z);
+ is_inf = 0;
+ } else
+ point_add_proj(&Q, &Q, &precomp[d]);
+ }
+ if ((d = anaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &lut_cmb[0][d].X);
+ fe_copy(Q.Y, &lut_cmb[0][d].Y);
+ fe_copy(Q.Z, const_one);
+ is_inf = 0;
+ } else
+ point_add_mixed(&Q, &Q, &lut_cmb[0][d]);
+ }
+ }
+
+ if (is_inf) {
+ /* initialize accumulator to inf: all-zero scalars */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+ }
+
+ if (flipped) {
+ /* correct sign */
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ }
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Variable point scalar multiplication with "regular" wnaf.
+ * Here "regular" means _no zeroes_, so the sequence of
+ * EC arithmetic ops is fixed.
+ */
+static void
+var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66],
+ const pt_aff_t *P)
+{
+ int i, j, d, diff, is_neg;
+ int8_t rnaf[106] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_rwnaf(rnaf, scalar);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ /* initialize accumulator to high digit */
+ d = (rnaf[105] - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(Q.X, diff, Q.X, precomp[j].X);
+ fiat_secp521r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y);
+ fiat_secp521r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z);
+ }
+
+ for (i = 104; i >= 0; i--) {
+ for (j = 0; j < RADIX; j++)
+ point_double(&Q, &Q);
+ d = rnaf[i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(lut.X, diff, lut.X, precomp[j].X);
+ fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y);
+ fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp521r1_carry_opp(out->Y, lut.Y);
+ fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_proj(&Q, &Q, &lut);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, precomp[0].X);
+ fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y);
+ fe_copy(lut.Z, precomp[0].Z);
+ point_add_proj(&lut, &lut, &Q);
+ fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X);
+ fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y);
+ fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Fixed scalar multiplication: comb with interleaving.
+ */
+static void
+fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66])
+{
+ int i, j, k, d, diff, is_neg = 0;
+ int8_t rnaf[106] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } };
+ pt_aff_t lut = { { 0 }, { 0 } };
+
+ scalar_rwnaf(rnaf, scalar);
+
+ /* initalize accumulator to inf */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ for (i = 8; i >= 0; i--) {
+ for (j = 0; i != 8 && j < RADIX; j++)
+ point_double(&Q, &Q);
+ for (j = 0; j < 13; j++) {
+ if (j * 9 + i > 105)
+ continue;
+ d = rnaf[j * 9 + i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (k = 0; k < DRADIX / 2; k++) {
+ diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X);
+ fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp521r1_carry_opp(out->Y, lut.Y);
+ fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_mixed(&Q, &Q, &lut);
+ }
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, lut_cmb[0][0].X);
+ fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y);
+ point_add_mixed(&R, &Q, &lut);
+ fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X);
+ fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y);
+ fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char a[66],
+ const unsigned char b[66],
+ const unsigned char inx[66],
+ const unsigned char iny[66])
+{
+ pt_aff_t P;
+
+ fiat_secp521r1_from_bytes(P.X, inx);
+ fiat_secp521r1_from_bytes(P.Y, iny);
+ /* simultaneous scalar multiplication */
+ var_smul_wnaf_two(&P, a, b, &P);
+
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char scalar[66])
+{
+ pt_aff_t P;
+
+ /* fixed scmul function */
+ fixed_smul_cmb(&P, scalar);
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char scalar[66],
+ const unsigned char inx[66],
+ const unsigned char iny[66])
+{
+ pt_aff_t P;
+
+ fiat_secp521r1_from_bytes(P.X, inx);
+ fiat_secp521r1_from_bytes(P.Y, iny);
+ /* var scmul function */
+ var_smul_rwnaf(&P, scalar, &P);
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+#else /* __SIZEOF_INT128__ */
+
+#include "ecp_secp521r1.h"
+#include <stdint.h>
+#include <string.h>
+#define LIMB_BITS 32
+#define LIMB_CNT 19
+/* Field elements */
+typedef uint32_t fe_t[LIMB_CNT];
+typedef uint32_t limb_t;
+
+#define fe_copy(d, s) memcpy(d, s, sizeof(fe_t))
+#define fe_set_zero(d) memset(d, 0, sizeof(fe_t))
+
+/* Projective points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+ fe_t Z;
+} pt_prj_t;
+
+/* Affine points */
+typedef struct {
+ fe_t X;
+ fe_t Y;
+} pt_aff_t;
+
+/* BEGIN verbatim fiat code https://github.com/mit-plv/fiat-crypto */
+/*-
+ * MIT License
+ *
+ * Copyright (c) 2015-2021 the fiat-crypto authors (see the AUTHORS file).
+ * https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Autogenerated: unsaturated_solinas --static --use-value-barrier secp521r1 32 '(auto)' '2^521 - 1' */
+/* curve description: secp521r1 */
+/* machine_wordsize = 32 (from "32") */
+/* requested operations: (all) */
+/* n = 19 (from "(auto)") */
+/* s-c = 2^521 - [(1, 1)] (from "2^521 - 1") */
+/* tight_bounds_multiplier = 1 (from "") */
+/* */
+/* Computed values: */
+/* carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1] */
+/* eval z = z[0] + (z[1] << 28) + (z[2] << 55) + (z[3] << 83) + (z[4] << 110) + (z[5] << 138) + (z[6] << 165) + (z[7] << 192) + (z[8] << 220) + (z[9] << 247) + (z[10] << 0x113) + (z[11] << 0x12e) + (z[12] << 0x14a) + (z[13] << 0x165) + (z[14] << 0x180) + (z[15] << 0x19c) + (z[16] << 0x1b7) + (z[17] << 0x1d3) + (z[18] << 0x1ee) */
+/* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208) */
+/* balance = [0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0x1ffffffe, 0xffffffe, 0xffffffe] */
+
+#include <stdint.h>
+typedef unsigned char fiat_secp521r1_uint1;
+typedef signed char fiat_secp521r1_int1;
+#ifdef __GNUC__
+#define FIAT_SECP521R1_FIAT_INLINE __inline__
+#else
+#define FIAT_SECP521R1_FIAT_INLINE
+#endif
+
+/* The type fiat_secp521r1_loose_field_element is a field element with loose bounds. */
+/* Bounds: [[0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x30000000], [0x0 ~> 0x18000000], [0x0 ~> 0x18000000]] */
+typedef uint32_t fiat_secp521r1_loose_field_element[19];
+
+/* The type fiat_secp521r1_tight_field_element is a field element with tight bounds. */
+/* Bounds: [[0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x10000000], [0x0 ~> 0x8000000], [0x0 ~> 0x8000000]] */
+typedef uint32_t fiat_secp521r1_tight_field_element[19];
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+#if !defined(FIAT_SECP521R1_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+static __inline__ uint32_t
+fiat_secp521r1_value_barrier_u32(uint32_t a)
+{
+ __asm__(""
+ : "+r"(a)
+ : /* no inputs */);
+ return a;
+}
+#else
+#define fiat_secp521r1_value_barrier_u32(x) (x)
+#endif
+
+/*
+ * The function fiat_secp521r1_addcarryx_u28 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^28
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^28⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xfffffff]
+ * arg3: [0x0 ~> 0xfffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xfffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_addcarryx_u28(uint32_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ uint32_t x1;
+ uint32_t x2;
+ fiat_secp521r1_uint1 x3;
+ x1 = ((arg1 + arg2) + arg3);
+ x2 = (x1 & UINT32_C(0xfffffff));
+ x3 = (fiat_secp521r1_uint1)(x1 >> 28);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_subborrowx_u28 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^28
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^28⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xfffffff]
+ * arg3: [0x0 ~> 0xfffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xfffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_subborrowx_u28(uint32_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ int32_t x1;
+ fiat_secp521r1_int1 x2;
+ uint32_t x3;
+ x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+ x2 = (fiat_secp521r1_int1)(x1 >> 28);
+ x3 = (x1 & UINT32_C(0xfffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp521r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp521r1_addcarryx_u27 is an addition with carry.
+ *
+ * Postconditions:
+ * out1 = (arg1 + arg2 + arg3) mod 2^27
+ * out2 = ⌊(arg1 + arg2 + arg3) / 2^27⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x7ffffff]
+ * arg3: [0x0 ~> 0x7ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x7ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_addcarryx_u27(uint32_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ uint32_t x1;
+ uint32_t x2;
+ fiat_secp521r1_uint1 x3;
+ x1 = ((arg1 + arg2) + arg3);
+ x2 = (x1 & UINT32_C(0x7ffffff));
+ x3 = (fiat_secp521r1_uint1)(x1 >> 27);
+ *out1 = x2;
+ *out2 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_subborrowx_u27 is a subtraction with borrow.
+ *
+ * Postconditions:
+ * out1 = (-arg1 + arg2 + -arg3) mod 2^27
+ * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^27⌋
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0x7ffffff]
+ * arg3: [0x0 ~> 0x7ffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0x7ffffff]
+ * out2: [0x0 ~> 0x1]
+ */
+static void
+fiat_secp521r1_subborrowx_u27(uint32_t *out1,
+ fiat_secp521r1_uint1 *out2,
+ fiat_secp521r1_uint1 arg1,
+ uint32_t arg2, uint32_t arg3)
+{
+ int32_t x1;
+ fiat_secp521r1_int1 x2;
+ uint32_t x3;
+ x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
+ x2 = (fiat_secp521r1_int1)(x1 >> 27);
+ x3 = (x1 & UINT32_C(0x7ffffff));
+ *out1 = x3;
+ *out2 = (fiat_secp521r1_uint1)(0x0 - x2);
+}
+
+/*
+ * The function fiat_secp521r1_cmovznz_u32 is a single-word conditional move.
+ *
+ * Postconditions:
+ * out1 = (if arg1 = 0 then arg2 else arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [0x0 ~> 0xffffffff]
+ * arg3: [0x0 ~> 0xffffffff]
+ * Output Bounds:
+ * out1: [0x0 ~> 0xffffffff]
+ */
+static void
+fiat_secp521r1_cmovznz_u32(uint32_t *out1,
+ fiat_secp521r1_uint1 arg1, uint32_t arg2,
+ uint32_t arg3)
+{
+ fiat_secp521r1_uint1 x1;
+ uint32_t x2;
+ uint32_t x3;
+ x1 = (!(!arg1));
+ x2 = ((fiat_secp521r1_int1)(0x0 - x1) & UINT32_C(0xffffffff));
+ x3 = ((fiat_secp521r1_value_barrier_u32(x2) & arg3) |
+ (fiat_secp521r1_value_barrier_u32((~x2)) & arg2));
+ *out1 = x3;
+}
+
+/*
+ * The function fiat_secp521r1_carry_mul multiplies two field elements and reduces the result.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_mul(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_loose_field_element arg1,
+ const fiat_secp521r1_loose_field_element arg2)
+{
+ uint64_t x1;
+ uint64_t x2;
+ uint64_t x3;
+ uint64_t x4;
+ uint64_t x5;
+ uint64_t x6;
+ uint64_t x7;
+ uint64_t x8;
+ uint64_t x9;
+ uint64_t x10;
+ uint64_t x11;
+ uint64_t x12;
+ uint64_t x13;
+ uint64_t x14;
+ uint64_t x15;
+ uint64_t x16;
+ uint64_t x17;
+ uint64_t x18;
+ uint64_t x19;
+ uint64_t x20;
+ uint64_t x21;
+ uint64_t x22;
+ uint64_t x23;
+ uint64_t x24;
+ uint64_t x25;
+ uint64_t x26;
+ uint64_t x27;
+ uint64_t x28;
+ uint64_t x29;
+ uint64_t x30;
+ uint64_t x31;
+ uint64_t x32;
+ uint64_t x33;
+ uint64_t x34;
+ uint64_t x35;
+ uint64_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ uint64_t x45;
+ uint64_t x46;
+ uint64_t x47;
+ uint64_t x48;
+ uint64_t x49;
+ uint64_t x50;
+ uint64_t x51;
+ uint64_t x52;
+ uint64_t x53;
+ uint64_t x54;
+ uint64_t x55;
+ uint64_t x56;
+ uint64_t x57;
+ uint64_t x58;
+ uint64_t x59;
+ uint64_t x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint64_t x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint64_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ uint64_t x82;
+ uint64_t x83;
+ uint64_t x84;
+ uint64_t x85;
+ uint64_t x86;
+ uint64_t x87;
+ uint64_t x88;
+ uint64_t x89;
+ uint64_t x90;
+ uint64_t x91;
+ uint64_t x92;
+ uint64_t x93;
+ uint64_t x94;
+ uint64_t x95;
+ uint64_t x96;
+ uint64_t x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ uint64_t x101;
+ uint64_t x102;
+ uint64_t x103;
+ uint64_t x104;
+ uint64_t x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ uint64_t x117;
+ uint64_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ uint64_t x121;
+ uint64_t x122;
+ uint64_t x123;
+ uint64_t x124;
+ uint64_t x125;
+ uint64_t x126;
+ uint64_t x127;
+ uint64_t x128;
+ uint64_t x129;
+ uint64_t x130;
+ uint64_t x131;
+ uint64_t x132;
+ uint64_t x133;
+ uint64_t x134;
+ uint64_t x135;
+ uint64_t x136;
+ uint64_t x137;
+ uint64_t x138;
+ uint64_t x139;
+ uint64_t x140;
+ uint64_t x141;
+ uint64_t x142;
+ uint64_t x143;
+ uint64_t x144;
+ uint64_t x145;
+ uint64_t x146;
+ uint64_t x147;
+ uint64_t x148;
+ uint64_t x149;
+ uint64_t x150;
+ uint64_t x151;
+ uint64_t x152;
+ uint64_t x153;
+ uint64_t x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ uint64_t x159;
+ uint64_t x160;
+ uint64_t x161;
+ uint64_t x162;
+ uint64_t x163;
+ uint64_t x164;
+ uint64_t x165;
+ uint64_t x166;
+ uint64_t x167;
+ uint64_t x168;
+ uint64_t x169;
+ uint64_t x170;
+ uint64_t x171;
+ uint64_t x172;
+ uint64_t x173;
+ uint64_t x174;
+ uint64_t x175;
+ uint64_t x176;
+ uint64_t x177;
+ uint64_t x178;
+ uint64_t x179;
+ uint64_t x180;
+ uint64_t x181;
+ uint64_t x182;
+ uint64_t x183;
+ uint64_t x184;
+ uint64_t x185;
+ uint64_t x186;
+ uint64_t x187;
+ uint64_t x188;
+ uint64_t x189;
+ uint64_t x190;
+ uint64_t x191;
+ uint64_t x192;
+ uint64_t x193;
+ uint64_t x194;
+ uint64_t x195;
+ uint64_t x196;
+ uint64_t x197;
+ uint64_t x198;
+ uint64_t x199;
+ uint64_t x200;
+ uint64_t x201;
+ uint64_t x202;
+ uint64_t x203;
+ uint64_t x204;
+ uint64_t x205;
+ uint64_t x206;
+ uint64_t x207;
+ uint64_t x208;
+ uint64_t x209;
+ uint64_t x210;
+ uint64_t x211;
+ uint64_t x212;
+ uint64_t x213;
+ uint64_t x214;
+ uint64_t x215;
+ uint64_t x216;
+ uint64_t x217;
+ uint64_t x218;
+ uint64_t x219;
+ uint64_t x220;
+ uint64_t x221;
+ uint64_t x222;
+ uint64_t x223;
+ uint64_t x224;
+ uint64_t x225;
+ uint64_t x226;
+ uint64_t x227;
+ uint64_t x228;
+ uint64_t x229;
+ uint64_t x230;
+ uint64_t x231;
+ uint64_t x232;
+ uint64_t x233;
+ uint64_t x234;
+ uint64_t x235;
+ uint64_t x236;
+ uint64_t x237;
+ uint64_t x238;
+ uint64_t x239;
+ uint64_t x240;
+ uint64_t x241;
+ uint64_t x242;
+ uint64_t x243;
+ uint64_t x244;
+ uint64_t x245;
+ uint64_t x246;
+ uint64_t x247;
+ uint64_t x248;
+ uint64_t x249;
+ uint64_t x250;
+ uint64_t x251;
+ uint64_t x252;
+ uint64_t x253;
+ uint64_t x254;
+ uint64_t x255;
+ uint64_t x256;
+ uint64_t x257;
+ uint64_t x258;
+ uint64_t x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint64_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint64_t x265;
+ uint64_t x266;
+ uint64_t x267;
+ uint64_t x268;
+ uint64_t x269;
+ uint64_t x270;
+ uint64_t x271;
+ uint64_t x272;
+ uint64_t x273;
+ uint64_t x274;
+ uint64_t x275;
+ uint64_t x276;
+ uint64_t x277;
+ uint64_t x278;
+ uint64_t x279;
+ uint64_t x280;
+ uint64_t x281;
+ uint64_t x282;
+ uint64_t x283;
+ uint64_t x284;
+ uint64_t x285;
+ uint64_t x286;
+ uint64_t x287;
+ uint64_t x288;
+ uint64_t x289;
+ uint64_t x290;
+ uint64_t x291;
+ uint64_t x292;
+ uint64_t x293;
+ uint64_t x294;
+ uint64_t x295;
+ uint64_t x296;
+ uint64_t x297;
+ uint64_t x298;
+ uint64_t x299;
+ uint64_t x300;
+ uint64_t x301;
+ uint64_t x302;
+ uint64_t x303;
+ uint64_t x304;
+ uint64_t x305;
+ uint64_t x306;
+ uint64_t x307;
+ uint64_t x308;
+ uint64_t x309;
+ uint64_t x310;
+ uint64_t x311;
+ uint64_t x312;
+ uint64_t x313;
+ uint64_t x314;
+ uint64_t x315;
+ uint64_t x316;
+ uint64_t x317;
+ uint64_t x318;
+ uint64_t x319;
+ uint64_t x320;
+ uint64_t x321;
+ uint64_t x322;
+ uint64_t x323;
+ uint64_t x324;
+ uint64_t x325;
+ uint64_t x326;
+ uint64_t x327;
+ uint64_t x328;
+ uint64_t x329;
+ uint64_t x330;
+ uint64_t x331;
+ uint64_t x332;
+ uint64_t x333;
+ uint64_t x334;
+ uint64_t x335;
+ uint64_t x336;
+ uint64_t x337;
+ uint64_t x338;
+ uint64_t x339;
+ uint64_t x340;
+ uint64_t x341;
+ uint64_t x342;
+ uint64_t x343;
+ uint64_t x344;
+ uint64_t x345;
+ uint64_t x346;
+ uint64_t x347;
+ uint64_t x348;
+ uint64_t x349;
+ uint64_t x350;
+ uint64_t x351;
+ uint64_t x352;
+ uint64_t x353;
+ uint64_t x354;
+ uint64_t x355;
+ uint64_t x356;
+ uint64_t x357;
+ uint64_t x358;
+ uint64_t x359;
+ uint64_t x360;
+ uint64_t x361;
+ uint64_t x362;
+ uint64_t x363;
+ uint32_t x364;
+ uint64_t x365;
+ uint64_t x366;
+ uint64_t x367;
+ uint64_t x368;
+ uint64_t x369;
+ uint64_t x370;
+ uint64_t x371;
+ uint64_t x372;
+ uint64_t x373;
+ uint64_t x374;
+ uint64_t x375;
+ uint64_t x376;
+ uint64_t x377;
+ uint64_t x378;
+ uint64_t x379;
+ uint64_t x380;
+ uint64_t x381;
+ uint64_t x382;
+ uint64_t x383;
+ uint64_t x384;
+ uint32_t x385;
+ uint64_t x386;
+ uint64_t x387;
+ uint32_t x388;
+ uint64_t x389;
+ uint64_t x390;
+ uint32_t x391;
+ uint64_t x392;
+ uint64_t x393;
+ uint32_t x394;
+ uint64_t x395;
+ uint64_t x396;
+ uint32_t x397;
+ uint64_t x398;
+ uint64_t x399;
+ uint32_t x400;
+ uint64_t x401;
+ uint64_t x402;
+ uint32_t x403;
+ uint64_t x404;
+ uint64_t x405;
+ uint32_t x406;
+ uint64_t x407;
+ uint64_t x408;
+ uint32_t x409;
+ uint64_t x410;
+ uint64_t x411;
+ uint32_t x412;
+ uint64_t x413;
+ uint64_t x414;
+ uint32_t x415;
+ uint64_t x416;
+ uint64_t x417;
+ uint32_t x418;
+ uint64_t x419;
+ uint64_t x420;
+ uint32_t x421;
+ uint64_t x422;
+ uint64_t x423;
+ uint32_t x424;
+ uint64_t x425;
+ uint64_t x426;
+ uint32_t x427;
+ uint64_t x428;
+ uint64_t x429;
+ uint32_t x430;
+ uint64_t x431;
+ uint64_t x432;
+ uint32_t x433;
+ uint64_t x434;
+ uint64_t x435;
+ uint32_t x436;
+ uint64_t x437;
+ uint32_t x438;
+ uint32_t x439;
+ uint32_t x440;
+ fiat_secp521r1_uint1 x441;
+ uint32_t x442;
+ uint32_t x443;
+ x1 = ((uint64_t)(arg1[18]) * (arg2[18]));
+ x2 = ((uint64_t)(arg1[18]) * ((arg2[17]) * 0x2));
+ x3 = ((uint64_t)(arg1[18]) * (arg2[16]));
+ x4 = ((uint64_t)(arg1[18]) * ((arg2[15]) * 0x2));
+ x5 = ((uint64_t)(arg1[18]) * (arg2[14]));
+ x6 = ((uint64_t)(arg1[18]) * (arg2[13]));
+ x7 = ((uint64_t)(arg1[18]) * ((arg2[12]) * 0x2));
+ x8 = ((uint64_t)(arg1[18]) * (arg2[11]));
+ x9 = ((uint64_t)(arg1[18]) * ((arg2[10]) * 0x2));
+ x10 = ((uint64_t)(arg1[18]) * (arg2[9]));
+ x11 = ((uint64_t)(arg1[18]) * ((arg2[8]) * 0x2));
+ x12 = ((uint64_t)(arg1[18]) * (arg2[7]));
+ x13 = ((uint64_t)(arg1[18]) * (arg2[6]));
+ x14 = ((uint64_t)(arg1[18]) * ((arg2[5]) * 0x2));
+ x15 = ((uint64_t)(arg1[18]) * (arg2[4]));
+ x16 = ((uint64_t)(arg1[18]) * ((arg2[3]) * 0x2));
+ x17 = ((uint64_t)(arg1[18]) * (arg2[2]));
+ x18 = ((uint64_t)(arg1[18]) * ((arg2[1]) * 0x2));
+ x19 = ((uint64_t)(arg1[17]) * ((arg2[18]) * 0x2));
+ x20 = ((uint64_t)(arg1[17]) * ((arg2[17]) * 0x2));
+ x21 = ((uint64_t)(arg1[17]) * ((arg2[16]) * 0x2));
+ x22 = ((uint64_t)(arg1[17]) * ((arg2[15]) * 0x2));
+ x23 = ((uint64_t)(arg1[17]) * (arg2[14]));
+ x24 = ((uint64_t)(arg1[17]) * ((arg2[13]) * 0x2));
+ x25 = ((uint64_t)(arg1[17]) * ((arg2[12]) * 0x2));
+ x26 = ((uint64_t)(arg1[17]) * ((arg2[11]) * 0x2));
+ x27 = ((uint64_t)(arg1[17]) * ((arg2[10]) * 0x2));
+ x28 = ((uint64_t)(arg1[17]) * ((arg2[9]) * 0x2));
+ x29 = ((uint64_t)(arg1[17]) * ((arg2[8]) * 0x2));
+ x30 = ((uint64_t)(arg1[17]) * (arg2[7]));
+ x31 = ((uint64_t)(arg1[17]) * ((arg2[6]) * 0x2));
+ x32 = ((uint64_t)(arg1[17]) * ((arg2[5]) * 0x2));
+ x33 = ((uint64_t)(arg1[17]) * ((arg2[4]) * 0x2));
+ x34 = ((uint64_t)(arg1[17]) * ((arg2[3]) * 0x2));
+ x35 = ((uint64_t)(arg1[17]) * ((arg2[2]) * 0x2));
+ x36 = ((uint64_t)(arg1[16]) * (arg2[18]));
+ x37 = ((uint64_t)(arg1[16]) * ((arg2[17]) * 0x2));
+ x38 = ((uint64_t)(arg1[16]) * (arg2[16]));
+ x39 = ((uint64_t)(arg1[16]) * (arg2[15]));
+ x40 = ((uint64_t)(arg1[16]) * (arg2[14]));
+ x41 = ((uint64_t)(arg1[16]) * (arg2[13]));
+ x42 = ((uint64_t)(arg1[16]) * ((arg2[12]) * 0x2));
+ x43 = ((uint64_t)(arg1[16]) * (arg2[11]));
+ x44 = ((uint64_t)(arg1[16]) * ((arg2[10]) * 0x2));
+ x45 = ((uint64_t)(arg1[16]) * (arg2[9]));
+ x46 = ((uint64_t)(arg1[16]) * (arg2[8]));
+ x47 = ((uint64_t)(arg1[16]) * (arg2[7]));
+ x48 = ((uint64_t)(arg1[16]) * (arg2[6]));
+ x49 = ((uint64_t)(arg1[16]) * ((arg2[5]) * 0x2));
+ x50 = ((uint64_t)(arg1[16]) * (arg2[4]));
+ x51 = ((uint64_t)(arg1[16]) * ((arg2[3]) * 0x2));
+ x52 = ((uint64_t)(arg1[15]) * ((arg2[18]) * 0x2));
+ x53 = ((uint64_t)(arg1[15]) * ((arg2[17]) * 0x2));
+ x54 = ((uint64_t)(arg1[15]) * (arg2[16]));
+ x55 = ((uint64_t)(arg1[15]) * ((arg2[15]) * 0x2));
+ x56 = ((uint64_t)(arg1[15]) * (arg2[14]));
+ x57 = ((uint64_t)(arg1[15]) * ((arg2[13]) * 0x2));
+ x58 = ((uint64_t)(arg1[15]) * ((arg2[12]) * 0x2));
+ x59 = ((uint64_t)(arg1[15]) * ((arg2[11]) * 0x2));
+ x60 = ((uint64_t)(arg1[15]) * ((arg2[10]) * 0x2));
+ x61 = ((uint64_t)(arg1[15]) * (arg2[9]));
+ x62 = ((uint64_t)(arg1[15]) * ((arg2[8]) * 0x2));
+ x63 = ((uint64_t)(arg1[15]) * (arg2[7]));
+ x64 = ((uint64_t)(arg1[15]) * ((arg2[6]) * 0x2));
+ x65 = ((uint64_t)(arg1[15]) * ((arg2[5]) * 0x2));
+ x66 = ((uint64_t)(arg1[15]) * ((arg2[4]) * 0x2));
+ x67 = ((uint64_t)(arg1[14]) * (arg2[18]));
+ x68 = ((uint64_t)(arg1[14]) * (arg2[17]));
+ x69 = ((uint64_t)(arg1[14]) * (arg2[16]));
+ x70 = ((uint64_t)(arg1[14]) * (arg2[15]));
+ x71 = ((uint64_t)(arg1[14]) * (arg2[14]));
+ x72 = ((uint64_t)(arg1[14]) * (arg2[13]));
+ x73 = ((uint64_t)(arg1[14]) * ((arg2[12]) * 0x2));
+ x74 = ((uint64_t)(arg1[14]) * (arg2[11]));
+ x75 = ((uint64_t)(arg1[14]) * (arg2[10]));
+ x76 = ((uint64_t)(arg1[14]) * (arg2[9]));
+ x77 = ((uint64_t)(arg1[14]) * (arg2[8]));
+ x78 = ((uint64_t)(arg1[14]) * (arg2[7]));
+ x79 = ((uint64_t)(arg1[14]) * (arg2[6]));
+ x80 = ((uint64_t)(arg1[14]) * ((arg2[5]) * 0x2));
+ x81 = ((uint64_t)(arg1[13]) * (arg2[18]));
+ x82 = ((uint64_t)(arg1[13]) * ((arg2[17]) * 0x2));
+ x83 = ((uint64_t)(arg1[13]) * (arg2[16]));
+ x84 = ((uint64_t)(arg1[13]) * ((arg2[15]) * 0x2));
+ x85 = ((uint64_t)(arg1[13]) * (arg2[14]));
+ x86 = ((uint64_t)(arg1[13]) * ((arg2[13]) * 0x2));
+ x87 = ((uint64_t)(arg1[13]) * ((arg2[12]) * 0x2));
+ x88 = ((uint64_t)(arg1[13]) * (arg2[11]));
+ x89 = ((uint64_t)(arg1[13]) * ((arg2[10]) * 0x2));
+ x90 = ((uint64_t)(arg1[13]) * (arg2[9]));
+ x91 = ((uint64_t)(arg1[13]) * ((arg2[8]) * 0x2));
+ x92 = ((uint64_t)(arg1[13]) * (arg2[7]));
+ x93 = ((uint64_t)(arg1[13]) * ((arg2[6]) * 0x2));
+ x94 = ((uint64_t)(arg1[12]) * ((arg2[18]) * 0x2));
+ x95 = ((uint64_t)(arg1[12]) * ((arg2[17]) * 0x2));
+ x96 = ((uint64_t)(arg1[12]) * ((arg2[16]) * 0x2));
+ x97 = ((uint64_t)(arg1[12]) * ((arg2[15]) * 0x2));
+ x98 = ((uint64_t)(arg1[12]) * ((arg2[14]) * 0x2));
+ x99 = ((uint64_t)(arg1[12]) * ((arg2[13]) * 0x2));
+ x100 = ((uint64_t)(arg1[12]) * ((arg2[12]) * 0x2));
+ x101 = ((uint64_t)(arg1[12]) * ((arg2[11]) * 0x2));
+ x102 = ((uint64_t)(arg1[12]) * ((arg2[10]) * 0x2));
+ x103 = ((uint64_t)(arg1[12]) * ((arg2[9]) * 0x2));
+ x104 = ((uint64_t)(arg1[12]) * ((arg2[8]) * 0x2));
+ x105 = ((uint64_t)(arg1[12]) * ((arg2[7]) * 0x2));
+ x106 = ((uint64_t)(arg1[11]) * (arg2[18]));
+ x107 = ((uint64_t)(arg1[11]) * ((arg2[17]) * 0x2));
+ x108 = ((uint64_t)(arg1[11]) * (arg2[16]));
+ x109 = ((uint64_t)(arg1[11]) * ((arg2[15]) * 0x2));
+ x110 = ((uint64_t)(arg1[11]) * (arg2[14]));
+ x111 = ((uint64_t)(arg1[11]) * (arg2[13]));
+ x112 = ((uint64_t)(arg1[11]) * ((arg2[12]) * 0x2));
+ x113 = ((uint64_t)(arg1[11]) * (arg2[11]));
+ x114 = ((uint64_t)(arg1[11]) * ((arg2[10]) * 0x2));
+ x115 = ((uint64_t)(arg1[11]) * (arg2[9]));
+ x116 = ((uint64_t)(arg1[11]) * ((arg2[8]) * 0x2));
+ x117 = ((uint64_t)(arg1[10]) * ((arg2[18]) * 0x2));
+ x118 = ((uint64_t)(arg1[10]) * ((arg2[17]) * 0x2));
+ x119 = ((uint64_t)(arg1[10]) * ((arg2[16]) * 0x2));
+ x120 = ((uint64_t)(arg1[10]) * ((arg2[15]) * 0x2));
+ x121 = ((uint64_t)(arg1[10]) * (arg2[14]));
+ x122 = ((uint64_t)(arg1[10]) * ((arg2[13]) * 0x2));
+ x123 = ((uint64_t)(arg1[10]) * ((arg2[12]) * 0x2));
+ x124 = ((uint64_t)(arg1[10]) * ((arg2[11]) * 0x2));
+ x125 = ((uint64_t)(arg1[10]) * ((arg2[10]) * 0x2));
+ x126 = ((uint64_t)(arg1[10]) * ((arg2[9]) * 0x2));
+ x127 = ((uint64_t)(arg1[9]) * (arg2[18]));
+ x128 = ((uint64_t)(arg1[9]) * ((arg2[17]) * 0x2));
+ x129 = ((uint64_t)(arg1[9]) * (arg2[16]));
+ x130 = ((uint64_t)(arg1[9]) * (arg2[15]));
+ x131 = ((uint64_t)(arg1[9]) * (arg2[14]));
+ x132 = ((uint64_t)(arg1[9]) * (arg2[13]));
+ x133 = ((uint64_t)(arg1[9]) * ((arg2[12]) * 0x2));
+ x134 = ((uint64_t)(arg1[9]) * (arg2[11]));
+ x135 = ((uint64_t)(arg1[9]) * ((arg2[10]) * 0x2));
+ x136 = ((uint64_t)(arg1[8]) * ((arg2[18]) * 0x2));
+ x137 = ((uint64_t)(arg1[8]) * ((arg2[17]) * 0x2));
+ x138 = ((uint64_t)(arg1[8]) * (arg2[16]));
+ x139 = ((uint64_t)(arg1[8]) * ((arg2[15]) * 0x2));
+ x140 = ((uint64_t)(arg1[8]) * (arg2[14]));
+ x141 = ((uint64_t)(arg1[8]) * ((arg2[13]) * 0x2));
+ x142 = ((uint64_t)(arg1[8]) * ((arg2[12]) * 0x2));
+ x143 = ((uint64_t)(arg1[8]) * ((arg2[11]) * 0x2));
+ x144 = ((uint64_t)(arg1[7]) * (arg2[18]));
+ x145 = ((uint64_t)(arg1[7]) * (arg2[17]));
+ x146 = ((uint64_t)(arg1[7]) * (arg2[16]));
+ x147 = ((uint64_t)(arg1[7]) * (arg2[15]));
+ x148 = ((uint64_t)(arg1[7]) * (arg2[14]));
+ x149 = ((uint64_t)(arg1[7]) * (arg2[13]));
+ x150 = ((uint64_t)(arg1[7]) * ((arg2[12]) * 0x2));
+ x151 = ((uint64_t)(arg1[6]) * (arg2[18]));
+ x152 = ((uint64_t)(arg1[6]) * ((arg2[17]) * 0x2));
+ x153 = ((uint64_t)(arg1[6]) * (arg2[16]));
+ x154 = ((uint64_t)(arg1[6]) * ((arg2[15]) * 0x2));
+ x155 = ((uint64_t)(arg1[6]) * (arg2[14]));
+ x156 = ((uint64_t)(arg1[6]) * ((arg2[13]) * 0x2));
+ x157 = ((uint64_t)(arg1[5]) * ((arg2[18]) * 0x2));
+ x158 = ((uint64_t)(arg1[5]) * ((arg2[17]) * 0x2));
+ x159 = ((uint64_t)(arg1[5]) * ((arg2[16]) * 0x2));
+ x160 = ((uint64_t)(arg1[5]) * ((arg2[15]) * 0x2));
+ x161 = ((uint64_t)(arg1[5]) * ((arg2[14]) * 0x2));
+ x162 = ((uint64_t)(arg1[4]) * (arg2[18]));
+ x163 = ((uint64_t)(arg1[4]) * ((arg2[17]) * 0x2));
+ x164 = ((uint64_t)(arg1[4]) * (arg2[16]));
+ x165 = ((uint64_t)(arg1[4]) * ((arg2[15]) * 0x2));
+ x166 = ((uint64_t)(arg1[3]) * ((arg2[18]) * 0x2));
+ x167 = ((uint64_t)(arg1[3]) * ((arg2[17]) * 0x2));
+ x168 = ((uint64_t)(arg1[3]) * ((arg2[16]) * 0x2));
+ x169 = ((uint64_t)(arg1[2]) * (arg2[18]));
+ x170 = ((uint64_t)(arg1[2]) * ((arg2[17]) * 0x2));
+ x171 = ((uint64_t)(arg1[1]) * ((arg2[18]) * 0x2));
+ x172 = ((uint64_t)(arg1[18]) * (arg2[0]));
+ x173 = ((uint64_t)(arg1[17]) * ((arg2[1]) * 0x2));
+ x174 = ((uint64_t)(arg1[17]) * (arg2[0]));
+ x175 = ((uint64_t)(arg1[16]) * (arg2[2]));
+ x176 = ((uint64_t)(arg1[16]) * (arg2[1]));
+ x177 = ((uint64_t)(arg1[16]) * (arg2[0]));
+ x178 = ((uint64_t)(arg1[15]) * ((arg2[3]) * 0x2));
+ x179 = ((uint64_t)(arg1[15]) * (arg2[2]));
+ x180 = ((uint64_t)(arg1[15]) * ((arg2[1]) * 0x2));
+ x181 = ((uint64_t)(arg1[15]) * (arg2[0]));
+ x182 = ((uint64_t)(arg1[14]) * (arg2[4]));
+ x183 = ((uint64_t)(arg1[14]) * (arg2[3]));
+ x184 = ((uint64_t)(arg1[14]) * (arg2[2]));
+ x185 = ((uint64_t)(arg1[14]) * (arg2[1]));
+ x186 = ((uint64_t)(arg1[14]) * (arg2[0]));
+ x187 = ((uint64_t)(arg1[13]) * ((arg2[5]) * 0x2));
+ x188 = ((uint64_t)(arg1[13]) * (arg2[4]));
+ x189 = ((uint64_t)(arg1[13]) * ((arg2[3]) * 0x2));
+ x190 = ((uint64_t)(arg1[13]) * (arg2[2]));
+ x191 = ((uint64_t)(arg1[13]) * ((arg2[1]) * 0x2));
+ x192 = ((uint64_t)(arg1[13]) * (arg2[0]));
+ x193 = ((uint64_t)(arg1[12]) * ((arg2[6]) * 0x2));
+ x194 = ((uint64_t)(arg1[12]) * ((arg2[5]) * 0x2));
+ x195 = ((uint64_t)(arg1[12]) * ((arg2[4]) * 0x2));
+ x196 = ((uint64_t)(arg1[12]) * ((arg2[3]) * 0x2));
+ x197 = ((uint64_t)(arg1[12]) * ((arg2[2]) * 0x2));
+ x198 = ((uint64_t)(arg1[12]) * ((arg2[1]) * 0x2));
+ x199 = ((uint64_t)(arg1[12]) * (arg2[0]));
+ x200 = ((uint64_t)(arg1[11]) * (arg2[7]));
+ x201 = ((uint64_t)(arg1[11]) * (arg2[6]));
+ x202 = ((uint64_t)(arg1[11]) * ((arg2[5]) * 0x2));
+ x203 = ((uint64_t)(arg1[11]) * (arg2[4]));
+ x204 = ((uint64_t)(arg1[11]) * ((arg2[3]) * 0x2));
+ x205 = ((uint64_t)(arg1[11]) * (arg2[2]));
+ x206 = ((uint64_t)(arg1[11]) * (arg2[1]));
+ x207 = ((uint64_t)(arg1[11]) * (arg2[0]));
+ x208 = ((uint64_t)(arg1[10]) * ((arg2[8]) * 0x2));
+ x209 = ((uint64_t)(arg1[10]) * (arg2[7]));
+ x210 = ((uint64_t)(arg1[10]) * ((arg2[6]) * 0x2));
+ x211 = ((uint64_t)(arg1[10]) * ((arg2[5]) * 0x2));
+ x212 = ((uint64_t)(arg1[10]) * ((arg2[4]) * 0x2));
+ x213 = ((uint64_t)(arg1[10]) * ((arg2[3]) * 0x2));
+ x214 = ((uint64_t)(arg1[10]) * (arg2[2]));
+ x215 = ((uint64_t)(arg1[10]) * ((arg2[1]) * 0x2));
+ x216 = ((uint64_t)(arg1[10]) * (arg2[0]));
+ x217 = ((uint64_t)(arg1[9]) * (arg2[9]));
+ x218 = ((uint64_t)(arg1[9]) * (arg2[8]));
+ x219 = ((uint64_t)(arg1[9]) * (arg2[7]));
+ x220 = ((uint64_t)(arg1[9]) * (arg2[6]));
+ x221 = ((uint64_t)(arg1[9]) * ((arg2[5]) * 0x2));
+ x222 = ((uint64_t)(arg1[9]) * (arg2[4]));
+ x223 = ((uint64_t)(arg1[9]) * (arg2[3]));
+ x224 = ((uint64_t)(arg1[9]) * (arg2[2]));
+ x225 = ((uint64_t)(arg1[9]) * (arg2[1]));
+ x226 = ((uint64_t)(arg1[9]) * (arg2[0]));
+ x227 = ((uint64_t)(arg1[8]) * ((arg2[10]) * 0x2));
+ x228 = ((uint64_t)(arg1[8]) * (arg2[9]));
+ x229 = ((uint64_t)(arg1[8]) * ((arg2[8]) * 0x2));
+ x230 = ((uint64_t)(arg1[8]) * (arg2[7]));
+ x231 = ((uint64_t)(arg1[8]) * ((arg2[6]) * 0x2));
+ x232 = ((uint64_t)(arg1[8]) * ((arg2[5]) * 0x2));
+ x233 = ((uint64_t)(arg1[8]) * (arg2[4]));
+ x234 = ((uint64_t)(arg1[8]) * ((arg2[3]) * 0x2));
+ x235 = ((uint64_t)(arg1[8]) * (arg2[2]));
+ x236 = ((uint64_t)(arg1[8]) * ((arg2[1]) * 0x2));
+ x237 = ((uint64_t)(arg1[8]) * (arg2[0]));
+ x238 = ((uint64_t)(arg1[7]) * (arg2[11]));
+ x239 = ((uint64_t)(arg1[7]) * (arg2[10]));
+ x240 = ((uint64_t)(arg1[7]) * (arg2[9]));
+ x241 = ((uint64_t)(arg1[7]) * (arg2[8]));
+ x242 = ((uint64_t)(arg1[7]) * (arg2[7]));
+ x243 = ((uint64_t)(arg1[7]) * (arg2[6]));
+ x244 = ((uint64_t)(arg1[7]) * (arg2[5]));
+ x245 = ((uint64_t)(arg1[7]) * (arg2[4]));
+ x246 = ((uint64_t)(arg1[7]) * (arg2[3]));
+ x247 = ((uint64_t)(arg1[7]) * (arg2[2]));
+ x248 = ((uint64_t)(arg1[7]) * (arg2[1]));
+ x249 = ((uint64_t)(arg1[7]) * (arg2[0]));
+ x250 = ((uint64_t)(arg1[6]) * ((arg2[12]) * 0x2));
+ x251 = ((uint64_t)(arg1[6]) * (arg2[11]));
+ x252 = ((uint64_t)(arg1[6]) * ((arg2[10]) * 0x2));
+ x253 = ((uint64_t)(arg1[6]) * (arg2[9]));
+ x254 = ((uint64_t)(arg1[6]) * ((arg2[8]) * 0x2));
+ x255 = ((uint64_t)(arg1[6]) * (arg2[7]));
+ x256 = ((uint64_t)(arg1[6]) * (arg2[6]));
+ x257 = ((uint64_t)(arg1[6]) * ((arg2[5]) * 0x2));
+ x258 = ((uint64_t)(arg1[6]) * (arg2[4]));
+ x259 = ((uint64_t)(arg1[6]) * ((arg2[3]) * 0x2));
+ x260 = ((uint64_t)(arg1[6]) * (arg2[2]));
+ x261 = ((uint64_t)(arg1[6]) * ((arg2[1]) * 0x2));
+ x262 = ((uint64_t)(arg1[6]) * (arg2[0]));
+ x263 = ((uint64_t)(arg1[5]) * ((arg2[13]) * 0x2));
+ x264 = ((uint64_t)(arg1[5]) * ((arg2[12]) * 0x2));
+ x265 = ((uint64_t)(arg1[5]) * ((arg2[11]) * 0x2));
+ x266 = ((uint64_t)(arg1[5]) * ((arg2[10]) * 0x2));
+ x267 = ((uint64_t)(arg1[5]) * ((arg2[9]) * 0x2));
+ x268 = ((uint64_t)(arg1[5]) * ((arg2[8]) * 0x2));
+ x269 = ((uint64_t)(arg1[5]) * (arg2[7]));
+ x270 = ((uint64_t)(arg1[5]) * ((arg2[6]) * 0x2));
+ x271 = ((uint64_t)(arg1[5]) * ((arg2[5]) * 0x2));
+ x272 = ((uint64_t)(arg1[5]) * ((arg2[4]) * 0x2));
+ x273 = ((uint64_t)(arg1[5]) * ((arg2[3]) * 0x2));
+ x274 = ((uint64_t)(arg1[5]) * ((arg2[2]) * 0x2));
+ x275 = ((uint64_t)(arg1[5]) * ((arg2[1]) * 0x2));
+ x276 = ((uint64_t)(arg1[5]) * (arg2[0]));
+ x277 = ((uint64_t)(arg1[4]) * (arg2[14]));
+ x278 = ((uint64_t)(arg1[4]) * (arg2[13]));
+ x279 = ((uint64_t)(arg1[4]) * ((arg2[12]) * 0x2));
+ x280 = ((uint64_t)(arg1[4]) * (arg2[11]));
+ x281 = ((uint64_t)(arg1[4]) * ((arg2[10]) * 0x2));
+ x282 = ((uint64_t)(arg1[4]) * (arg2[9]));
+ x283 = ((uint64_t)(arg1[4]) * (arg2[8]));
+ x284 = ((uint64_t)(arg1[4]) * (arg2[7]));
+ x285 = ((uint64_t)(arg1[4]) * (arg2[6]));
+ x286 = ((uint64_t)(arg1[4]) * ((arg2[5]) * 0x2));
+ x287 = ((uint64_t)(arg1[4]) * (arg2[4]));
+ x288 = ((uint64_t)(arg1[4]) * ((arg2[3]) * 0x2));
+ x289 = ((uint64_t)(arg1[4]) * (arg2[2]));
+ x290 = ((uint64_t)(arg1[4]) * (arg2[1]));
+ x291 = ((uint64_t)(arg1[4]) * (arg2[0]));
+ x292 = ((uint64_t)(arg1[3]) * ((arg2[15]) * 0x2));
+ x293 = ((uint64_t)(arg1[3]) * (arg2[14]));
+ x294 = ((uint64_t)(arg1[3]) * ((arg2[13]) * 0x2));
+ x295 = ((uint64_t)(arg1[3]) * ((arg2[12]) * 0x2));
+ x296 = ((uint64_t)(arg1[3]) * ((arg2[11]) * 0x2));
+ x297 = ((uint64_t)(arg1[3]) * ((arg2[10]) * 0x2));
+ x298 = ((uint64_t)(arg1[3]) * (arg2[9]));
+ x299 = ((uint64_t)(arg1[3]) * ((arg2[8]) * 0x2));
+ x300 = ((uint64_t)(arg1[3]) * (arg2[7]));
+ x301 = ((uint64_t)(arg1[3]) * ((arg2[6]) * 0x2));
+ x302 = ((uint64_t)(arg1[3]) * ((arg2[5]) * 0x2));
+ x303 = ((uint64_t)(arg1[3]) * ((arg2[4]) * 0x2));
+ x304 = ((uint64_t)(arg1[3]) * ((arg2[3]) * 0x2));
+ x305 = ((uint64_t)(arg1[3]) * (arg2[2]));
+ x306 = ((uint64_t)(arg1[3]) * ((arg2[1]) * 0x2));
+ x307 = ((uint64_t)(arg1[3]) * (arg2[0]));
+ x308 = ((uint64_t)(arg1[2]) * (arg2[16]));
+ x309 = ((uint64_t)(arg1[2]) * (arg2[15]));
+ x310 = ((uint64_t)(arg1[2]) * (arg2[14]));
+ x311 = ((uint64_t)(arg1[2]) * (arg2[13]));
+ x312 = ((uint64_t)(arg1[2]) * ((arg2[12]) * 0x2));
+ x313 = ((uint64_t)(arg1[2]) * (arg2[11]));
+ x314 = ((uint64_t)(arg1[2]) * (arg2[10]));
+ x315 = ((uint64_t)(arg1[2]) * (arg2[9]));
+ x316 = ((uint64_t)(arg1[2]) * (arg2[8]));
+ x317 = ((uint64_t)(arg1[2]) * (arg2[7]));
+ x318 = ((uint64_t)(arg1[2]) * (arg2[6]));
+ x319 = ((uint64_t)(arg1[2]) * ((arg2[5]) * 0x2));
+ x320 = ((uint64_t)(arg1[2]) * (arg2[4]));
+ x321 = ((uint64_t)(arg1[2]) * (arg2[3]));
+ x322 = ((uint64_t)(arg1[2]) * (arg2[2]));
+ x323 = ((uint64_t)(arg1[2]) * (arg2[1]));
+ x324 = ((uint64_t)(arg1[2]) * (arg2[0]));
+ x325 = ((uint64_t)(arg1[1]) * ((arg2[17]) * 0x2));
+ x326 = ((uint64_t)(arg1[1]) * (arg2[16]));
+ x327 = ((uint64_t)(arg1[1]) * ((arg2[15]) * 0x2));
+ x328 = ((uint64_t)(arg1[1]) * (arg2[14]));
+ x329 = ((uint64_t)(arg1[1]) * ((arg2[13]) * 0x2));
+ x330 = ((uint64_t)(arg1[1]) * ((arg2[12]) * 0x2));
+ x331 = ((uint64_t)(arg1[1]) * (arg2[11]));
+ x332 = ((uint64_t)(arg1[1]) * ((arg2[10]) * 0x2));
+ x333 = ((uint64_t)(arg1[1]) * (arg2[9]));
+ x334 = ((uint64_t)(arg1[1]) * ((arg2[8]) * 0x2));
+ x335 = ((uint64_t)(arg1[1]) * (arg2[7]));
+ x336 = ((uint64_t)(arg1[1]) * ((arg2[6]) * 0x2));
+ x337 = ((uint64_t)(arg1[1]) * ((arg2[5]) * 0x2));
+ x338 = ((uint64_t)(arg1[1]) * (arg2[4]));
+ x339 = ((uint64_t)(arg1[1]) * ((arg2[3]) * 0x2));
+ x340 = ((uint64_t)(arg1[1]) * (arg2[2]));
+ x341 = ((uint64_t)(arg1[1]) * ((arg2[1]) * 0x2));
+ x342 = ((uint64_t)(arg1[1]) * (arg2[0]));
+ x343 = ((uint64_t)(arg1[0]) * (arg2[18]));
+ x344 = ((uint64_t)(arg1[0]) * (arg2[17]));
+ x345 = ((uint64_t)(arg1[0]) * (arg2[16]));
+ x346 = ((uint64_t)(arg1[0]) * (arg2[15]));
+ x347 = ((uint64_t)(arg1[0]) * (arg2[14]));
+ x348 = ((uint64_t)(arg1[0]) * (arg2[13]));
+ x349 = ((uint64_t)(arg1[0]) * (arg2[12]));
+ x350 = ((uint64_t)(arg1[0]) * (arg2[11]));
+ x351 = ((uint64_t)(arg1[0]) * (arg2[10]));
+ x352 = ((uint64_t)(arg1[0]) * (arg2[9]));
+ x353 = ((uint64_t)(arg1[0]) * (arg2[8]));
+ x354 = ((uint64_t)(arg1[0]) * (arg2[7]));
+ x355 = ((uint64_t)(arg1[0]) * (arg2[6]));
+ x356 = ((uint64_t)(arg1[0]) * (arg2[5]));
+ x357 = ((uint64_t)(arg1[0]) * (arg2[4]));
+ x358 = ((uint64_t)(arg1[0]) * (arg2[3]));
+ x359 = ((uint64_t)(arg1[0]) * (arg2[2]));
+ x360 = ((uint64_t)(arg1[0]) * (arg2[1]));
+ x361 = ((uint64_t)(arg1[0]) * (arg2[0]));
+ x362 =
+ (x361 +
+ (x171 +
+ (x170 +
+ (x168 +
+ (x165 +
+ (x161 +
+ (x156 +
+ (x150 +
+ (x143 +
+ (x135 +
+ (x126 +
+ (x116 +
+ (x105 +
+ (x93 + (x80 + (x66 + (x51 + (x35 + x18))))))))))))))))));
+ x363 = (x362 >> 28);
+ x364 = (uint32_t)(x362 & UINT32_C(0xfffffff));
+ x365 = (x343 +
+ (x325 +
+ (x308 +
+ (x292 +
+ (x277 +
+ (x263 +
+ (x250 +
+ (x238 +
+ (x227 +
+ (x217 +
+ (x208 +
+ (x200 +
+ (x193 +
+ (x187 +
+ (x182 + (x178 + (x175 + (x173 + x172))))))))))))))))));
+ x366 = (x344 +
+ (x326 +
+ (x309 +
+ (x293 +
+ (x278 +
+ (x264 +
+ (x251 +
+ (x239 +
+ (x228 +
+ (x218 +
+ (x209 +
+ (x201 +
+ (x194 +
+ (x188 +
+ (x183 + (x179 + (x176 + (x174 + x1))))))))))))))))));
+ x367 = (x345 +
+ (x327 +
+ (x310 +
+ (x294 +
+ (x279 +
+ (x265 +
+ (x252 +
+ (x240 +
+ (x229 +
+ (x219 +
+ (x210 +
+ (x202 +
+ (x195 +
+ (x189 +
+ (x184 + (x180 + (x177 + (x19 + x2))))))))))))))))));
+ x368 =
+ (x346 +
+ (x328 +
+ (x311 +
+ (x295 +
+ (x280 +
+ (x266 +
+ (x253 +
+ (x241 +
+ (x230 +
+ (x220 +
+ (x211 +
+ (x203 +
+ (x196 +
+ (x190 + (x185 + (x181 + (x36 + (x20 + x3))))))))))))))))));
+ x369 =
+ (x347 +
+ (x329 +
+ (x312 +
+ (x296 +
+ (x281 +
+ (x267 +
+ (x254 +
+ (x242 +
+ (x231 +
+ (x221 +
+ (x212 +
+ (x204 +
+ (x197 +
+ (x191 + (x186 + (x52 + (x37 + (x21 + x4))))))))))))))))));
+ x370 =
+ (x348 +
+ (x330 +
+ (x313 +
+ (x297 +
+ (x282 +
+ (x268 +
+ (x255 +
+ (x243 +
+ (x232 +
+ (x222 +
+ (x213 +
+ (x205 +
+ (x198 +
+ (x192 + (x67 + (x53 + (x38 + (x22 + x5))))))))))))))))));
+ x371 = (x349 +
+ (x331 +
+ (x314 +
+ (x298 +
+ (x283 +
+ (x269 +
+ (x256 +
+ (x244 +
+ (x233 +
+ (x223 +
+ (x214 +
+ (x206 +
+ (x199 +
+ (x81 + (x68 + (x54 + (x39 + (x23 + x6))))))))))))))))));
+ x372 = (x350 +
+ (x332 +
+ (x315 +
+ (x299 +
+ (x284 +
+ (x270 +
+ (x257 +
+ (x245 +
+ (x234 +
+ (x224 +
+ (x215 +
+ (x207 +
+ (x94 +
+ (x82 + (x69 + (x55 + (x40 + (x24 + x7))))))))))))))))));
+ x373 = (x351 +
+ (x333 +
+ (x316 +
+ (x300 +
+ (x285 +
+ (x271 +
+ (x258 +
+ (x246 +
+ (x235 +
+ (x225 +
+ (x216 +
+ (x106 +
+ (x95 +
+ (x83 + (x70 + (x56 + (x41 + (x25 + x8))))))))))))))))));
+ x374 = (x352 +
+ (x334 +
+ (x317 +
+ (x301 +
+ (x286 +
+ (x272 +
+ (x259 +
+ (x247 +
+ (x236 +
+ (x226 +
+ (x117 +
+ (x107 +
+ (x96 +
+ (x84 + (x71 + (x57 + (x42 + (x26 + x9))))))))))))))))));
+ x375 =
+ (x353 +
+ (x335 +
+ (x318 +
+ (x302 +
+ (x287 +
+ (x273 +
+ (x260 +
+ (x248 +
+ (x237 +
+ (x127 +
+ (x118 +
+ (x108 +
+ (x97 +
+ (x85 + (x72 + (x58 + (x43 + (x27 + x10))))))))))))))))));
+ x376 =
+ (x354 +
+ (x336 +
+ (x319 +
+ (x303 +
+ (x288 +
+ (x274 +
+ (x261 +
+ (x249 +
+ (x136 +
+ (x128 +
+ (x119 +
+ (x109 +
+ (x98 +
+ (x86 + (x73 + (x59 + (x44 + (x28 + x11))))))))))))))))));
+ x377 =
+ (x355 +
+ (x337 +
+ (x320 +
+ (x304 +
+ (x289 +
+ (x275 +
+ (x262 +
+ (x144 +
+ (x137 +
+ (x129 +
+ (x120 +
+ (x110 +
+ (x99 +
+ (x87 + (x74 + (x60 + (x45 + (x29 + x12))))))))))))))))));
+ x378 =
+ (x356 +
+ (x338 +
+ (x321 +
+ (x305 +
+ (x290 +
+ (x276 +
+ (x151 +
+ (x145 +
+ (x138 +
+ (x130 +
+ (x121 +
+ (x111 +
+ (x100 +
+ (x88 + (x75 + (x61 + (x46 + (x30 + x13))))))))))))))))));
+ x379 =
+ (x357 +
+ (x339 +
+ (x322 +
+ (x306 +
+ (x291 +
+ (x157 +
+ (x152 +
+ (x146 +
+ (x139 +
+ (x131 +
+ (x122 +
+ (x112 +
+ (x101 +
+ (x89 + (x76 + (x62 + (x47 + (x31 + x14))))))))))))))))));
+ x380 =
+ (x358 +
+ (x340 +
+ (x323 +
+ (x307 +
+ (x162 +
+ (x158 +
+ (x153 +
+ (x147 +
+ (x140 +
+ (x132 +
+ (x123 +
+ (x113 +
+ (x102 +
+ (x90 + (x77 + (x63 + (x48 + (x32 + x15))))))))))))))))));
+ x381 =
+ (x359 +
+ (x341 +
+ (x324 +
+ (x166 +
+ (x163 +
+ (x159 +
+ (x154 +
+ (x148 +
+ (x141 +
+ (x133 +
+ (x124 +
+ (x114 +
+ (x103 +
+ (x91 + (x78 + (x64 + (x49 + (x33 + x16))))))))))))))))));
+ x382 =
+ (x360 +
+ (x342 +
+ (x169 +
+ (x167 +
+ (x164 +
+ (x160 +
+ (x155 +
+ (x149 +
+ (x142 +
+ (x134 +
+ (x125 +
+ (x115 +
+ (x104 +
+ (x92 + (x79 + (x65 + (x50 + (x34 + x17))))))))))))))))));
+ x383 = (x363 + x382);
+ x384 = (x383 >> 27);
+ x385 = (uint32_t)(x383 & UINT32_C(0x7ffffff));
+ x386 = (x384 + x381);
+ x387 = (x386 >> 28);
+ x388 = (uint32_t)(x386 & UINT32_C(0xfffffff));
+ x389 = (x387 + x380);
+ x390 = (x389 >> 27);
+ x391 = (uint32_t)(x389 & UINT32_C(0x7ffffff));
+ x392 = (x390 + x379);
+ x393 = (x392 >> 28);
+ x394 = (uint32_t)(x392 & UINT32_C(0xfffffff));
+ x395 = (x393 + x378);
+ x396 = (x395 >> 27);
+ x397 = (uint32_t)(x395 & UINT32_C(0x7ffffff));
+ x398 = (x396 + x377);
+ x399 = (x398 >> 27);
+ x400 = (uint32_t)(x398 & UINT32_C(0x7ffffff));
+ x401 = (x399 + x376);
+ x402 = (x401 >> 28);
+ x403 = (uint32_t)(x401 & UINT32_C(0xfffffff));
+ x404 = (x402 + x375);
+ x405 = (x404 >> 27);
+ x406 = (uint32_t)(x404 & UINT32_C(0x7ffffff));
+ x407 = (x405 + x374);
+ x408 = (x407 >> 28);
+ x409 = (uint32_t)(x407 & UINT32_C(0xfffffff));
+ x410 = (x408 + x373);
+ x411 = (x410 >> 27);
+ x412 = (uint32_t)(x410 & UINT32_C(0x7ffffff));
+ x413 = (x411 + x372);
+ x414 = (x413 >> 28);
+ x415 = (uint32_t)(x413 & UINT32_C(0xfffffff));
+ x416 = (x414 + x371);
+ x417 = (x416 >> 27);
+ x418 = (uint32_t)(x416 & UINT32_C(0x7ffffff));
+ x419 = (x417 + x370);
+ x420 = (x419 >> 27);
+ x421 = (uint32_t)(x419 & UINT32_C(0x7ffffff));
+ x422 = (x420 + x369);
+ x423 = (x422 >> 28);
+ x424 = (uint32_t)(x422 & UINT32_C(0xfffffff));
+ x425 = (x423 + x368);
+ x426 = (x425 >> 27);
+ x427 = (uint32_t)(x425 & UINT32_C(0x7ffffff));
+ x428 = (x426 + x367);
+ x429 = (x428 >> 28);
+ x430 = (uint32_t)(x428 & UINT32_C(0xfffffff));
+ x431 = (x429 + x366);
+ x432 = (x431 >> 27);
+ x433 = (uint32_t)(x431 & UINT32_C(0x7ffffff));
+ x434 = (x432 + x365);
+ x435 = (x434 >> 27);
+ x436 = (uint32_t)(x434 & UINT32_C(0x7ffffff));
+ x437 = (x364 + x435);
+ x438 = (uint32_t)(x437 >> 28);
+ x439 = (uint32_t)(x437 & UINT32_C(0xfffffff));
+ x440 = (x438 + x385);
+ x441 = (fiat_secp521r1_uint1)(x440 >> 27);
+ x442 = (x440 & UINT32_C(0x7ffffff));
+ x443 = (x441 + x388);
+ out1[0] = x439;
+ out1[1] = x442;
+ out1[2] = x443;
+ out1[3] = x391;
+ out1[4] = x394;
+ out1[5] = x397;
+ out1[6] = x400;
+ out1[7] = x403;
+ out1[8] = x406;
+ out1[9] = x409;
+ out1[10] = x412;
+ out1[11] = x415;
+ out1[12] = x418;
+ out1[13] = x421;
+ out1[14] = x424;
+ out1[15] = x427;
+ out1[16] = x430;
+ out1[17] = x433;
+ out1[18] = x436;
+}
+
+/*
+ * The function fiat_secp521r1_carry_square squares a field element and reduces the result.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 * eval arg1) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_square(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_loose_field_element arg1)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint64_t x37;
+ uint64_t x38;
+ uint64_t x39;
+ uint64_t x40;
+ uint64_t x41;
+ uint64_t x42;
+ uint64_t x43;
+ uint64_t x44;
+ uint64_t x45;
+ uint64_t x46;
+ uint64_t x47;
+ uint64_t x48;
+ uint64_t x49;
+ uint64_t x50;
+ uint64_t x51;
+ uint64_t x52;
+ uint64_t x53;
+ uint64_t x54;
+ uint64_t x55;
+ uint64_t x56;
+ uint64_t x57;
+ uint64_t x58;
+ uint64_t x59;
+ uint64_t x60;
+ uint64_t x61;
+ uint64_t x62;
+ uint64_t x63;
+ uint64_t x64;
+ uint64_t x65;
+ uint64_t x66;
+ uint64_t x67;
+ uint64_t x68;
+ uint64_t x69;
+ uint64_t x70;
+ uint64_t x71;
+ uint64_t x72;
+ uint64_t x73;
+ uint64_t x74;
+ uint64_t x75;
+ uint64_t x76;
+ uint64_t x77;
+ uint64_t x78;
+ uint64_t x79;
+ uint64_t x80;
+ uint64_t x81;
+ uint64_t x82;
+ uint64_t x83;
+ uint64_t x84;
+ uint64_t x85;
+ uint64_t x86;
+ uint64_t x87;
+ uint64_t x88;
+ uint64_t x89;
+ uint64_t x90;
+ uint64_t x91;
+ uint64_t x92;
+ uint64_t x93;
+ uint64_t x94;
+ uint64_t x95;
+ uint64_t x96;
+ uint64_t x97;
+ uint64_t x98;
+ uint64_t x99;
+ uint64_t x100;
+ uint64_t x101;
+ uint64_t x102;
+ uint64_t x103;
+ uint64_t x104;
+ uint64_t x105;
+ uint64_t x106;
+ uint64_t x107;
+ uint64_t x108;
+ uint64_t x109;
+ uint64_t x110;
+ uint64_t x111;
+ uint64_t x112;
+ uint64_t x113;
+ uint64_t x114;
+ uint64_t x115;
+ uint64_t x116;
+ uint64_t x117;
+ uint64_t x118;
+ uint64_t x119;
+ uint64_t x120;
+ uint64_t x121;
+ uint64_t x122;
+ uint64_t x123;
+ uint64_t x124;
+ uint64_t x125;
+ uint64_t x126;
+ uint64_t x127;
+ uint64_t x128;
+ uint64_t x129;
+ uint64_t x130;
+ uint64_t x131;
+ uint64_t x132;
+ uint64_t x133;
+ uint64_t x134;
+ uint64_t x135;
+ uint64_t x136;
+ uint64_t x137;
+ uint64_t x138;
+ uint64_t x139;
+ uint64_t x140;
+ uint64_t x141;
+ uint64_t x142;
+ uint64_t x143;
+ uint64_t x144;
+ uint64_t x145;
+ uint64_t x146;
+ uint64_t x147;
+ uint64_t x148;
+ uint64_t x149;
+ uint64_t x150;
+ uint64_t x151;
+ uint64_t x152;
+ uint64_t x153;
+ uint64_t x154;
+ uint64_t x155;
+ uint64_t x156;
+ uint64_t x157;
+ uint64_t x158;
+ uint64_t x159;
+ uint64_t x160;
+ uint64_t x161;
+ uint64_t x162;
+ uint64_t x163;
+ uint64_t x164;
+ uint64_t x165;
+ uint64_t x166;
+ uint64_t x167;
+ uint64_t x168;
+ uint64_t x169;
+ uint64_t x170;
+ uint64_t x171;
+ uint64_t x172;
+ uint64_t x173;
+ uint64_t x174;
+ uint64_t x175;
+ uint64_t x176;
+ uint64_t x177;
+ uint64_t x178;
+ uint64_t x179;
+ uint64_t x180;
+ uint64_t x181;
+ uint64_t x182;
+ uint64_t x183;
+ uint64_t x184;
+ uint64_t x185;
+ uint64_t x186;
+ uint64_t x187;
+ uint64_t x188;
+ uint64_t x189;
+ uint64_t x190;
+ uint64_t x191;
+ uint64_t x192;
+ uint64_t x193;
+ uint64_t x194;
+ uint64_t x195;
+ uint64_t x196;
+ uint64_t x197;
+ uint64_t x198;
+ uint64_t x199;
+ uint64_t x200;
+ uint64_t x201;
+ uint64_t x202;
+ uint64_t x203;
+ uint64_t x204;
+ uint64_t x205;
+ uint64_t x206;
+ uint64_t x207;
+ uint64_t x208;
+ uint64_t x209;
+ uint64_t x210;
+ uint64_t x211;
+ uint64_t x212;
+ uint64_t x213;
+ uint64_t x214;
+ uint64_t x215;
+ uint64_t x216;
+ uint64_t x217;
+ uint64_t x218;
+ uint64_t x219;
+ uint64_t x220;
+ uint64_t x221;
+ uint64_t x222;
+ uint64_t x223;
+ uint64_t x224;
+ uint64_t x225;
+ uint64_t x226;
+ uint64_t x227;
+ uint64_t x228;
+ uint32_t x229;
+ uint64_t x230;
+ uint64_t x231;
+ uint64_t x232;
+ uint64_t x233;
+ uint64_t x234;
+ uint64_t x235;
+ uint64_t x236;
+ uint64_t x237;
+ uint64_t x238;
+ uint64_t x239;
+ uint64_t x240;
+ uint64_t x241;
+ uint64_t x242;
+ uint64_t x243;
+ uint64_t x244;
+ uint64_t x245;
+ uint64_t x246;
+ uint64_t x247;
+ uint64_t x248;
+ uint64_t x249;
+ uint32_t x250;
+ uint64_t x251;
+ uint64_t x252;
+ uint32_t x253;
+ uint64_t x254;
+ uint64_t x255;
+ uint32_t x256;
+ uint64_t x257;
+ uint64_t x258;
+ uint32_t x259;
+ uint64_t x260;
+ uint64_t x261;
+ uint32_t x262;
+ uint64_t x263;
+ uint64_t x264;
+ uint32_t x265;
+ uint64_t x266;
+ uint64_t x267;
+ uint32_t x268;
+ uint64_t x269;
+ uint64_t x270;
+ uint32_t x271;
+ uint64_t x272;
+ uint64_t x273;
+ uint32_t x274;
+ uint64_t x275;
+ uint64_t x276;
+ uint32_t x277;
+ uint64_t x278;
+ uint64_t x279;
+ uint32_t x280;
+ uint64_t x281;
+ uint64_t x282;
+ uint32_t x283;
+ uint64_t x284;
+ uint64_t x285;
+ uint32_t x286;
+ uint64_t x287;
+ uint64_t x288;
+ uint32_t x289;
+ uint64_t x290;
+ uint64_t x291;
+ uint32_t x292;
+ uint64_t x293;
+ uint64_t x294;
+ uint32_t x295;
+ uint64_t x296;
+ uint64_t x297;
+ uint32_t x298;
+ uint64_t x299;
+ uint64_t x300;
+ uint32_t x301;
+ uint64_t x302;
+ uint32_t x303;
+ uint32_t x304;
+ uint32_t x305;
+ fiat_secp521r1_uint1 x306;
+ uint32_t x307;
+ uint32_t x308;
+ x1 = (arg1[18]);
+ x2 = (x1 * 0x2);
+ x3 = ((arg1[18]) * 0x2);
+ x4 = (arg1[17]);
+ x5 = (x4 * 0x2);
+ x6 = ((arg1[17]) * 0x2);
+ x7 = (arg1[16]);
+ x8 = (x7 * 0x2);
+ x9 = ((arg1[16]) * 0x2);
+ x10 = (arg1[15]);
+ x11 = (x10 * 0x2);
+ x12 = ((arg1[15]) * 0x2);
+ x13 = (arg1[14]);
+ x14 = (x13 * 0x2);
+ x15 = ((arg1[14]) * 0x2);
+ x16 = (arg1[13]);
+ x17 = (x16 * 0x2);
+ x18 = ((arg1[13]) * 0x2);
+ x19 = (arg1[12]);
+ x20 = (x19 * 0x2);
+ x21 = ((arg1[12]) * 0x2);
+ x22 = (arg1[11]);
+ x23 = (x22 * 0x2);
+ x24 = ((arg1[11]) * 0x2);
+ x25 = (arg1[10]);
+ x26 = (x25 * 0x2);
+ x27 = ((arg1[10]) * 0x2);
+ x28 = ((arg1[9]) * 0x2);
+ x29 = ((arg1[8]) * 0x2);
+ x30 = ((arg1[7]) * 0x2);
+ x31 = ((arg1[6]) * 0x2);
+ x32 = ((arg1[5]) * 0x2);
+ x33 = ((arg1[4]) * 0x2);
+ x34 = ((arg1[3]) * 0x2);
+ x35 = ((arg1[2]) * 0x2);
+ x36 = ((arg1[1]) * 0x2);
+ x37 = ((uint64_t)(arg1[18]) * x1);
+ x38 = ((uint64_t)(arg1[17]) * (x2 * 0x2));
+ x39 = ((uint64_t)(arg1[17]) * (x4 * 0x2));
+ x40 = ((uint64_t)(arg1[16]) * x2);
+ x41 = ((uint64_t)(arg1[16]) * (x5 * 0x2));
+ x42 = ((uint64_t)(arg1[16]) * x7);
+ x43 = ((uint64_t)(arg1[15]) * (x2 * 0x2));
+ x44 = ((uint64_t)(arg1[15]) * (x5 * 0x2));
+ x45 = ((uint64_t)(arg1[15]) * x8);
+ x46 = ((uint64_t)(arg1[15]) * (x10 * 0x2));
+ x47 = ((uint64_t)(arg1[14]) * x2);
+ x48 = ((uint64_t)(arg1[14]) * x5);
+ x49 = ((uint64_t)(arg1[14]) * x8);
+ x50 = ((uint64_t)(arg1[14]) * x11);
+ x51 = ((uint64_t)(arg1[14]) * x13);
+ x52 = ((uint64_t)(arg1[13]) * x2);
+ x53 = ((uint64_t)(arg1[13]) * (x5 * 0x2));
+ x54 = ((uint64_t)(arg1[13]) * x8);
+ x55 = ((uint64_t)(arg1[13]) * (x11 * 0x2));
+ x56 = ((uint64_t)(arg1[13]) * x14);
+ x57 = ((uint64_t)(arg1[13]) * (x16 * 0x2));
+ x58 = ((uint64_t)(arg1[12]) * (x2 * 0x2));
+ x59 = ((uint64_t)(arg1[12]) * (x5 * 0x2));
+ x60 = ((uint64_t)(arg1[12]) * (x8 * 0x2));
+ x61 = ((uint64_t)(arg1[12]) * (x11 * 0x2));
+ x62 = ((uint64_t)(arg1[12]) * (x14 * 0x2));
+ x63 = ((uint64_t)(arg1[12]) * (x17 * 0x2));
+ x64 = ((uint64_t)(arg1[12]) * (x19 * 0x2));
+ x65 = ((uint64_t)(arg1[11]) * x2);
+ x66 = ((uint64_t)(arg1[11]) * (x5 * 0x2));
+ x67 = ((uint64_t)(arg1[11]) * x8);
+ x68 = ((uint64_t)(arg1[11]) * (x11 * 0x2));
+ x69 = ((uint64_t)(arg1[11]) * x14);
+ x70 = ((uint64_t)(arg1[11]) * x17);
+ x71 = ((uint64_t)(arg1[11]) * (x20 * 0x2));
+ x72 = ((uint64_t)(arg1[11]) * x22);
+ x73 = ((uint64_t)(arg1[10]) * (x2 * 0x2));
+ x74 = ((uint64_t)(arg1[10]) * (x5 * 0x2));
+ x75 = ((uint64_t)(arg1[10]) * (x8 * 0x2));
+ x76 = ((uint64_t)(arg1[10]) * (x11 * 0x2));
+ x77 = ((uint64_t)(arg1[10]) * x14);
+ x78 = ((uint64_t)(arg1[10]) * (x17 * 0x2));
+ x79 = ((uint64_t)(arg1[10]) * (x20 * 0x2));
+ x80 = ((uint64_t)(arg1[10]) * (x23 * 0x2));
+ x81 = ((uint64_t)(arg1[10]) * (x25 * 0x2));
+ x82 = ((uint64_t)(arg1[9]) * x2);
+ x83 = ((uint64_t)(arg1[9]) * (x5 * 0x2));
+ x84 = ((uint64_t)(arg1[9]) * x8);
+ x85 = ((uint64_t)(arg1[9]) * x11);
+ x86 = ((uint64_t)(arg1[9]) * x14);
+ x87 = ((uint64_t)(arg1[9]) * x17);
+ x88 = ((uint64_t)(arg1[9]) * (x20 * 0x2));
+ x89 = ((uint64_t)(arg1[9]) * x23);
+ x90 = ((uint64_t)(arg1[9]) * (x26 * 0x2));
+ x91 = ((uint64_t)(arg1[9]) * (arg1[9]));
+ x92 = ((uint64_t)(arg1[8]) * (x2 * 0x2));
+ x93 = ((uint64_t)(arg1[8]) * (x5 * 0x2));
+ x94 = ((uint64_t)(arg1[8]) * x8);
+ x95 = ((uint64_t)(arg1[8]) * (x11 * 0x2));
+ x96 = ((uint64_t)(arg1[8]) * x14);
+ x97 = ((uint64_t)(arg1[8]) * (x17 * 0x2));
+ x98 = ((uint64_t)(arg1[8]) * (x20 * 0x2));
+ x99 = ((uint64_t)(arg1[8]) * (x23 * 0x2));
+ x100 = ((uint64_t)(arg1[8]) * (x27 * 0x2));
+ x101 = ((uint64_t)(arg1[8]) * x28);
+ x102 = ((uint64_t)(arg1[8]) * ((arg1[8]) * 0x2));
+ x103 = ((uint64_t)(arg1[7]) * x2);
+ x104 = ((uint64_t)(arg1[7]) * x5);
+ x105 = ((uint64_t)(arg1[7]) * x8);
+ x106 = ((uint64_t)(arg1[7]) * x11);
+ x107 = ((uint64_t)(arg1[7]) * x14);
+ x108 = ((uint64_t)(arg1[7]) * x17);
+ x109 = ((uint64_t)(arg1[7]) * (x20 * 0x2));
+ x110 = ((uint64_t)(arg1[7]) * x24);
+ x111 = ((uint64_t)(arg1[7]) * x27);
+ x112 = ((uint64_t)(arg1[7]) * x28);
+ x113 = ((uint64_t)(arg1[7]) * x29);
+ x114 = ((uint64_t)(arg1[7]) * (arg1[7]));
+ x115 = ((uint64_t)(arg1[6]) * x2);
+ x116 = ((uint64_t)(arg1[6]) * (x5 * 0x2));
+ x117 = ((uint64_t)(arg1[6]) * x8);
+ x118 = ((uint64_t)(arg1[6]) * (x11 * 0x2));
+ x119 = ((uint64_t)(arg1[6]) * x14);
+ x120 = ((uint64_t)(arg1[6]) * (x17 * 0x2));
+ x121 = ((uint64_t)(arg1[6]) * (x21 * 0x2));
+ x122 = ((uint64_t)(arg1[6]) * x24);
+ x123 = ((uint64_t)(arg1[6]) * (x27 * 0x2));
+ x124 = ((uint64_t)(arg1[6]) * x28);
+ x125 = ((uint64_t)(arg1[6]) * (x29 * 0x2));
+ x126 = ((uint64_t)(arg1[6]) * x30);
+ x127 = ((uint64_t)(arg1[6]) * (arg1[6]));
+ x128 = ((uint64_t)(arg1[5]) * (x2 * 0x2));
+ x129 = ((uint64_t)(arg1[5]) * (x5 * 0x2));
+ x130 = ((uint64_t)(arg1[5]) * (x8 * 0x2));
+ x131 = ((uint64_t)(arg1[5]) * (x11 * 0x2));
+ x132 = ((uint64_t)(arg1[5]) * (x14 * 0x2));
+ x133 = ((uint64_t)(arg1[5]) * (x18 * 0x2));
+ x134 = ((uint64_t)(arg1[5]) * (x21 * 0x2));
+ x135 = ((uint64_t)(arg1[5]) * (x24 * 0x2));
+ x136 = ((uint64_t)(arg1[5]) * (x27 * 0x2));
+ x137 = ((uint64_t)(arg1[5]) * (x28 * 0x2));
+ x138 = ((uint64_t)(arg1[5]) * (x29 * 0x2));
+ x139 = ((uint64_t)(arg1[5]) * x30);
+ x140 = ((uint64_t)(arg1[5]) * (x31 * 0x2));
+ x141 = ((uint64_t)(arg1[5]) * ((arg1[5]) * 0x2));
+ x142 = ((uint64_t)(arg1[4]) * x2);
+ x143 = ((uint64_t)(arg1[4]) * (x5 * 0x2));
+ x144 = ((uint64_t)(arg1[4]) * x8);
+ x145 = ((uint64_t)(arg1[4]) * (x11 * 0x2));
+ x146 = ((uint64_t)(arg1[4]) * x15);
+ x147 = ((uint64_t)(arg1[4]) * x18);
+ x148 = ((uint64_t)(arg1[4]) * (x21 * 0x2));
+ x149 = ((uint64_t)(arg1[4]) * x24);
+ x150 = ((uint64_t)(arg1[4]) * (x27 * 0x2));
+ x151 = ((uint64_t)(arg1[4]) * x28);
+ x152 = ((uint64_t)(arg1[4]) * x29);
+ x153 = ((uint64_t)(arg1[4]) * x30);
+ x154 = ((uint64_t)(arg1[4]) * x31);
+ x155 = ((uint64_t)(arg1[4]) * (x32 * 0x2));
+ x156 = ((uint64_t)(arg1[4]) * (arg1[4]));
+ x157 = ((uint64_t)(arg1[3]) * (x2 * 0x2));
+ x158 = ((uint64_t)(arg1[3]) * (x5 * 0x2));
+ x159 = ((uint64_t)(arg1[3]) * (x8 * 0x2));
+ x160 = ((uint64_t)(arg1[3]) * (x12 * 0x2));
+ x161 = ((uint64_t)(arg1[3]) * x15);
+ x162 = ((uint64_t)(arg1[3]) * (x18 * 0x2));
+ x163 = ((uint64_t)(arg1[3]) * (x21 * 0x2));
+ x164 = ((uint64_t)(arg1[3]) * (x24 * 0x2));
+ x165 = ((uint64_t)(arg1[3]) * (x27 * 0x2));
+ x166 = ((uint64_t)(arg1[3]) * x28);
+ x167 = ((uint64_t)(arg1[3]) * (x29 * 0x2));
+ x168 = ((uint64_t)(arg1[3]) * x30);
+ x169 = ((uint64_t)(arg1[3]) * (x31 * 0x2));
+ x170 = ((uint64_t)(arg1[3]) * (x32 * 0x2));
+ x171 = ((uint64_t)(arg1[3]) * (x33 * 0x2));
+ x172 = ((uint64_t)(arg1[3]) * ((arg1[3]) * 0x2));
+ x173 = ((uint64_t)(arg1[2]) * x2);
+ x174 = ((uint64_t)(arg1[2]) * (x5 * 0x2));
+ x175 = ((uint64_t)(arg1[2]) * x9);
+ x176 = ((uint64_t)(arg1[2]) * x12);
+ x177 = ((uint64_t)(arg1[2]) * x15);
+ x178 = ((uint64_t)(arg1[2]) * x18);
+ x179 = ((uint64_t)(arg1[2]) * (x21 * 0x2));
+ x180 = ((uint64_t)(arg1[2]) * x24);
+ x181 = ((uint64_t)(arg1[2]) * x27);
+ x182 = ((uint64_t)(arg1[2]) * x28);
+ x183 = ((uint64_t)(arg1[2]) * x29);
+ x184 = ((uint64_t)(arg1[2]) * x30);
+ x185 = ((uint64_t)(arg1[2]) * x31);
+ x186 = ((uint64_t)(arg1[2]) * (x32 * 0x2));
+ x187 = ((uint64_t)(arg1[2]) * x33);
+ x188 = ((uint64_t)(arg1[2]) * x34);
+ x189 = ((uint64_t)(arg1[2]) * (arg1[2]));
+ x190 = ((uint64_t)(arg1[1]) * (x2 * 0x2));
+ x191 = ((uint64_t)(arg1[1]) * (x6 * 0x2));
+ x192 = ((uint64_t)(arg1[1]) * x9);
+ x193 = ((uint64_t)(arg1[1]) * (x12 * 0x2));
+ x194 = ((uint64_t)(arg1[1]) * x15);
+ x195 = ((uint64_t)(arg1[1]) * (x18 * 0x2));
+ x196 = ((uint64_t)(arg1[1]) * (x21 * 0x2));
+ x197 = ((uint64_t)(arg1[1]) * x24);
+ x198 = ((uint64_t)(arg1[1]) * (x27 * 0x2));
+ x199 = ((uint64_t)(arg1[1]) * x28);
+ x200 = ((uint64_t)(arg1[1]) * (x29 * 0x2));
+ x201 = ((uint64_t)(arg1[1]) * x30);
+ x202 = ((uint64_t)(arg1[1]) * (x31 * 0x2));
+ x203 = ((uint64_t)(arg1[1]) * (x32 * 0x2));
+ x204 = ((uint64_t)(arg1[1]) * x33);
+ x205 = ((uint64_t)(arg1[1]) * (x34 * 0x2));
+ x206 = ((uint64_t)(arg1[1]) * x35);
+ x207 = ((uint64_t)(arg1[1]) * ((arg1[1]) * 0x2));
+ x208 = ((uint64_t)(arg1[0]) * x3);
+ x209 = ((uint64_t)(arg1[0]) * x6);
+ x210 = ((uint64_t)(arg1[0]) * x9);
+ x211 = ((uint64_t)(arg1[0]) * x12);
+ x212 = ((uint64_t)(arg1[0]) * x15);
+ x213 = ((uint64_t)(arg1[0]) * x18);
+ x214 = ((uint64_t)(arg1[0]) * x21);
+ x215 = ((uint64_t)(arg1[0]) * x24);
+ x216 = ((uint64_t)(arg1[0]) * x27);
+ x217 = ((uint64_t)(arg1[0]) * x28);
+ x218 = ((uint64_t)(arg1[0]) * x29);
+ x219 = ((uint64_t)(arg1[0]) * x30);
+ x220 = ((uint64_t)(arg1[0]) * x31);
+ x221 = ((uint64_t)(arg1[0]) * x32);
+ x222 = ((uint64_t)(arg1[0]) * x33);
+ x223 = ((uint64_t)(arg1[0]) * x34);
+ x224 = ((uint64_t)(arg1[0]) * x35);
+ x225 = ((uint64_t)(arg1[0]) * x36);
+ x226 = ((uint64_t)(arg1[0]) * (arg1[0]));
+ x227 =
+ (x226 +
+ (x190 +
+ (x174 + (x159 + (x145 + (x132 + (x120 + (x109 + (x99 + x90)))))))));
+ x228 = (x227 >> 28);
+ x229 = (uint32_t)(x227 & UINT32_C(0xfffffff));
+ x230 =
+ (x208 +
+ (x191 +
+ (x175 + (x160 + (x146 + (x133 + (x121 + (x110 + (x100 + x91)))))))));
+ x231 =
+ (x209 +
+ (x192 +
+ (x176 + (x161 + (x147 + (x134 + (x122 + (x111 + (x101 + x37)))))))));
+ x232 =
+ (x210 +
+ (x193 +
+ (x177 + (x162 + (x148 + (x135 + (x123 + (x112 + (x102 + x38)))))))));
+ x233 =
+ (x211 +
+ (x194 +
+ (x178 + (x163 + (x149 + (x136 + (x124 + (x113 + (x40 + x39)))))))));
+ x234 =
+ (x212 +
+ (x195 +
+ (x179 + (x164 + (x150 + (x137 + (x125 + (x114 + (x43 + x41)))))))));
+ x235 = (x213 +
+ (x196 +
+ (x180 + (x165 + (x151 + (x138 + (x126 + (x47 + (x44 + x42)))))))));
+ x236 = (x214 +
+ (x197 +
+ (x181 + (x166 + (x152 + (x139 + (x127 + (x52 + (x48 + x45)))))))));
+ x237 = (x215 +
+ (x198 +
+ (x182 + (x167 + (x153 + (x140 + (x58 + (x53 + (x49 + x46)))))))));
+ x238 = (x216 +
+ (x199 +
+ (x183 + (x168 + (x154 + (x141 + (x65 + (x59 + (x54 + x50)))))))));
+ x239 = (x217 +
+ (x200 +
+ (x184 + (x169 + (x155 + (x73 + (x66 + (x60 + (x55 + x51)))))))));
+ x240 = (x218 +
+ (x201 +
+ (x185 + (x170 + (x156 + (x82 + (x74 + (x67 + (x61 + x56)))))))));
+ x241 = (x219 +
+ (x202 +
+ (x186 + (x171 + (x92 + (x83 + (x75 + (x68 + (x62 + x57)))))))));
+ x242 = (x220 +
+ (x203 +
+ (x187 + (x172 + (x103 + (x93 + (x84 + (x76 + (x69 + x63)))))))));
+ x243 = (x221 +
+ (x204 +
+ (x188 + (x115 + (x104 + (x94 + (x85 + (x77 + (x70 + x64)))))))));
+ x244 = (x222 +
+ (x205 +
+ (x189 + (x128 + (x116 + (x105 + (x95 + (x86 + (x78 + x71)))))))));
+ x245 = (x223 +
+ (x206 +
+ (x142 + (x129 + (x117 + (x106 + (x96 + (x87 + (x79 + x72)))))))));
+ x246 = (x224 +
+ (x207 +
+ (x157 + (x143 + (x130 + (x118 + (x107 + (x97 + (x88 + x80)))))))));
+ x247 = (x225 +
+ (x173 +
+ (x158 + (x144 + (x131 + (x119 + (x108 + (x98 + (x89 + x81)))))))));
+ x248 = (x228 + x247);
+ x249 = (x248 >> 27);
+ x250 = (uint32_t)(x248 & UINT32_C(0x7ffffff));
+ x251 = (x249 + x246);
+ x252 = (x251 >> 28);
+ x253 = (uint32_t)(x251 & UINT32_C(0xfffffff));
+ x254 = (x252 + x245);
+ x255 = (x254 >> 27);
+ x256 = (uint32_t)(x254 & UINT32_C(0x7ffffff));
+ x257 = (x255 + x244);
+ x258 = (x257 >> 28);
+ x259 = (uint32_t)(x257 & UINT32_C(0xfffffff));
+ x260 = (x258 + x243);
+ x261 = (x260 >> 27);
+ x262 = (uint32_t)(x260 & UINT32_C(0x7ffffff));
+ x263 = (x261 + x242);
+ x264 = (x263 >> 27);
+ x265 = (uint32_t)(x263 & UINT32_C(0x7ffffff));
+ x266 = (x264 + x241);
+ x267 = (x266 >> 28);
+ x268 = (uint32_t)(x266 & UINT32_C(0xfffffff));
+ x269 = (x267 + x240);
+ x270 = (x269 >> 27);
+ x271 = (uint32_t)(x269 & UINT32_C(0x7ffffff));
+ x272 = (x270 + x239);
+ x273 = (x272 >> 28);
+ x274 = (uint32_t)(x272 & UINT32_C(0xfffffff));
+ x275 = (x273 + x238);
+ x276 = (x275 >> 27);
+ x277 = (uint32_t)(x275 & UINT32_C(0x7ffffff));
+ x278 = (x276 + x237);
+ x279 = (x278 >> 28);
+ x280 = (uint32_t)(x278 & UINT32_C(0xfffffff));
+ x281 = (x279 + x236);
+ x282 = (x281 >> 27);
+ x283 = (uint32_t)(x281 & UINT32_C(0x7ffffff));
+ x284 = (x282 + x235);
+ x285 = (x284 >> 27);
+ x286 = (uint32_t)(x284 & UINT32_C(0x7ffffff));
+ x287 = (x285 + x234);
+ x288 = (x287 >> 28);
+ x289 = (uint32_t)(x287 & UINT32_C(0xfffffff));
+ x290 = (x288 + x233);
+ x291 = (x290 >> 27);
+ x292 = (uint32_t)(x290 & UINT32_C(0x7ffffff));
+ x293 = (x291 + x232);
+ x294 = (x293 >> 28);
+ x295 = (uint32_t)(x293 & UINT32_C(0xfffffff));
+ x296 = (x294 + x231);
+ x297 = (x296 >> 27);
+ x298 = (uint32_t)(x296 & UINT32_C(0x7ffffff));
+ x299 = (x297 + x230);
+ x300 = (x299 >> 27);
+ x301 = (uint32_t)(x299 & UINT32_C(0x7ffffff));
+ x302 = (x229 + x300);
+ x303 = (uint32_t)(x302 >> 28);
+ x304 = (uint32_t)(x302 & UINT32_C(0xfffffff));
+ x305 = (x303 + x250);
+ x306 = (fiat_secp521r1_uint1)(x305 >> 27);
+ x307 = (x305 & UINT32_C(0x7ffffff));
+ x308 = (x306 + x253);
+ out1[0] = x304;
+ out1[1] = x307;
+ out1[2] = x308;
+ out1[3] = x256;
+ out1[4] = x259;
+ out1[5] = x262;
+ out1[6] = x265;
+ out1[7] = x268;
+ out1[8] = x271;
+ out1[9] = x274;
+ out1[10] = x277;
+ out1[11] = x280;
+ out1[12] = x283;
+ out1[13] = x286;
+ out1[14] = x289;
+ out1[15] = x292;
+ out1[16] = x295;
+ out1[17] = x298;
+ out1[18] = x301;
+}
+
+/*
+ * The function fiat_secp521r1_carry_add adds two field elements.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 + eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_add(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1,
+ const fiat_secp521r1_tight_field_element arg2)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint32_t x40;
+ x1 = ((arg1[0]) + (arg2[0]));
+ x2 = ((x1 >> 28) + ((arg1[1]) + (arg2[1])));
+ x3 = ((x2 >> 27) + ((arg1[2]) + (arg2[2])));
+ x4 = ((x3 >> 28) + ((arg1[3]) + (arg2[3])));
+ x5 = ((x4 >> 27) + ((arg1[4]) + (arg2[4])));
+ x6 = ((x5 >> 28) + ((arg1[5]) + (arg2[5])));
+ x7 = ((x6 >> 27) + ((arg1[6]) + (arg2[6])));
+ x8 = ((x7 >> 27) + ((arg1[7]) + (arg2[7])));
+ x9 = ((x8 >> 28) + ((arg1[8]) + (arg2[8])));
+ x10 = ((x9 >> 27) + ((arg1[9]) + (arg2[9])));
+ x11 = ((x10 >> 28) + ((arg1[10]) + (arg2[10])));
+ x12 = ((x11 >> 27) + ((arg1[11]) + (arg2[11])));
+ x13 = ((x12 >> 28) + ((arg1[12]) + (arg2[12])));
+ x14 = ((x13 >> 27) + ((arg1[13]) + (arg2[13])));
+ x15 = ((x14 >> 27) + ((arg1[14]) + (arg2[14])));
+ x16 = ((x15 >> 28) + ((arg1[15]) + (arg2[15])));
+ x17 = ((x16 >> 27) + ((arg1[16]) + (arg2[16])));
+ x18 = ((x17 >> 28) + ((arg1[17]) + (arg2[17])));
+ x19 = ((x18 >> 27) + ((arg1[18]) + (arg2[18])));
+ x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27));
+ x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff)));
+ x22 = (x20 & UINT32_C(0xfffffff));
+ x23 = (x21 & UINT32_C(0x7ffffff));
+ x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff)));
+ x25 = (x4 & UINT32_C(0x7ffffff));
+ x26 = (x5 & UINT32_C(0xfffffff));
+ x27 = (x6 & UINT32_C(0x7ffffff));
+ x28 = (x7 & UINT32_C(0x7ffffff));
+ x29 = (x8 & UINT32_C(0xfffffff));
+ x30 = (x9 & UINT32_C(0x7ffffff));
+ x31 = (x10 & UINT32_C(0xfffffff));
+ x32 = (x11 & UINT32_C(0x7ffffff));
+ x33 = (x12 & UINT32_C(0xfffffff));
+ x34 = (x13 & UINT32_C(0x7ffffff));
+ x35 = (x14 & UINT32_C(0x7ffffff));
+ x36 = (x15 & UINT32_C(0xfffffff));
+ x37 = (x16 & UINT32_C(0x7ffffff));
+ x38 = (x17 & UINT32_C(0xfffffff));
+ x39 = (x18 & UINT32_C(0x7ffffff));
+ x40 = (x19 & UINT32_C(0x7ffffff));
+ out1[0] = x22;
+ out1[1] = x23;
+ out1[2] = x24;
+ out1[3] = x25;
+ out1[4] = x26;
+ out1[5] = x27;
+ out1[6] = x28;
+ out1[7] = x29;
+ out1[8] = x30;
+ out1[9] = x31;
+ out1[10] = x32;
+ out1[11] = x33;
+ out1[12] = x34;
+ out1[13] = x35;
+ out1[14] = x36;
+ out1[15] = x37;
+ out1[16] = x38;
+ out1[17] = x39;
+ out1[18] = x40;
+}
+
+/*
+ * The function fiat_secp521r1_carry_sub subtracts two field elements.
+ *
+ * Postconditions:
+ * eval out1 mod m = (eval arg1 - eval arg2) mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_sub(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1,
+ const fiat_secp521r1_tight_field_element arg2)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint32_t x40;
+ x1 = ((UINT32_C(0x1ffffffe) + (arg1[0])) - (arg2[0]));
+ x2 = ((x1 >> 28) + ((UINT32_C(0xffffffe) + (arg1[1])) - (arg2[1])));
+ x3 = ((x2 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[2])) - (arg2[2])));
+ x4 = ((x3 >> 28) + ((UINT32_C(0xffffffe) + (arg1[3])) - (arg2[3])));
+ x5 = ((x4 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[4])) - (arg2[4])));
+ x6 = ((x5 >> 28) + ((UINT32_C(0xffffffe) + (arg1[5])) - (arg2[5])));
+ x7 = ((x6 >> 27) + ((UINT32_C(0xffffffe) + (arg1[6])) - (arg2[6])));
+ x8 = ((x7 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[7])) - (arg2[7])));
+ x9 = ((x8 >> 28) + ((UINT32_C(0xffffffe) + (arg1[8])) - (arg2[8])));
+ x10 = ((x9 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[9])) - (arg2[9])));
+ x11 = ((x10 >> 28) + ((UINT32_C(0xffffffe) + (arg1[10])) - (arg2[10])));
+ x12 = ((x11 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[11])) - (arg2[11])));
+ x13 = ((x12 >> 28) + ((UINT32_C(0xffffffe) + (arg1[12])) - (arg2[12])));
+ x14 = ((x13 >> 27) + ((UINT32_C(0xffffffe) + (arg1[13])) - (arg2[13])));
+ x15 = ((x14 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[14])) - (arg2[14])));
+ x16 = ((x15 >> 28) + ((UINT32_C(0xffffffe) + (arg1[15])) - (arg2[15])));
+ x17 = ((x16 >> 27) + ((UINT32_C(0x1ffffffe) + (arg1[16])) - (arg2[16])));
+ x18 = ((x17 >> 28) + ((UINT32_C(0xffffffe) + (arg1[17])) - (arg2[17])));
+ x19 = ((x18 >> 27) + ((UINT32_C(0xffffffe) + (arg1[18])) - (arg2[18])));
+ x20 = ((x1 & UINT32_C(0xfffffff)) + (x19 >> 27));
+ x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff)));
+ x22 = (x20 & UINT32_C(0xfffffff));
+ x23 = (x21 & UINT32_C(0x7ffffff));
+ x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff)));
+ x25 = (x4 & UINT32_C(0x7ffffff));
+ x26 = (x5 & UINT32_C(0xfffffff));
+ x27 = (x6 & UINT32_C(0x7ffffff));
+ x28 = (x7 & UINT32_C(0x7ffffff));
+ x29 = (x8 & UINT32_C(0xfffffff));
+ x30 = (x9 & UINT32_C(0x7ffffff));
+ x31 = (x10 & UINT32_C(0xfffffff));
+ x32 = (x11 & UINT32_C(0x7ffffff));
+ x33 = (x12 & UINT32_C(0xfffffff));
+ x34 = (x13 & UINT32_C(0x7ffffff));
+ x35 = (x14 & UINT32_C(0x7ffffff));
+ x36 = (x15 & UINT32_C(0xfffffff));
+ x37 = (x16 & UINT32_C(0x7ffffff));
+ x38 = (x17 & UINT32_C(0xfffffff));
+ x39 = (x18 & UINT32_C(0x7ffffff));
+ x40 = (x19 & UINT32_C(0x7ffffff));
+ out1[0] = x22;
+ out1[1] = x23;
+ out1[2] = x24;
+ out1[3] = x25;
+ out1[4] = x26;
+ out1[5] = x27;
+ out1[6] = x28;
+ out1[7] = x29;
+ out1[8] = x30;
+ out1[9] = x31;
+ out1[10] = x32;
+ out1[11] = x33;
+ out1[12] = x34;
+ out1[13] = x35;
+ out1[14] = x36;
+ out1[15] = x37;
+ out1[16] = x38;
+ out1[17] = x39;
+ out1[18] = x40;
+}
+
+/*
+ * The function fiat_secp521r1_carry_opp negates a field element.
+ *
+ * Postconditions:
+ * eval out1 mod m = -eval arg1 mod m
+ *
+ */
+static void
+fiat_secp521r1_carry_opp(
+ fiat_secp521r1_tight_field_element out1,
+ const fiat_secp521r1_tight_field_element arg1)
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint32_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint32_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint32_t x40;
+ x1 = (UINT32_C(0x1ffffffe) - (arg1[0]));
+ x2 = ((fiat_secp521r1_uint1)(x1 >> 28) + (UINT32_C(0xffffffe) - (arg1[1])));
+ x3 =
+ ((fiat_secp521r1_uint1)(x2 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[2])));
+ x4 = ((fiat_secp521r1_uint1)(x3 >> 28) + (UINT32_C(0xffffffe) - (arg1[3])));
+ x5 =
+ ((fiat_secp521r1_uint1)(x4 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[4])));
+ x6 = ((fiat_secp521r1_uint1)(x5 >> 28) + (UINT32_C(0xffffffe) - (arg1[5])));
+ x7 = ((fiat_secp521r1_uint1)(x6 >> 27) + (UINT32_C(0xffffffe) - (arg1[6])));
+ x8 =
+ ((fiat_secp521r1_uint1)(x7 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[7])));
+ x9 = ((fiat_secp521r1_uint1)(x8 >> 28) + (UINT32_C(0xffffffe) - (arg1[8])));
+ x10 =
+ ((fiat_secp521r1_uint1)(x9 >> 27) + (UINT32_C(0x1ffffffe) - (arg1[9])));
+ x11 = ((fiat_secp521r1_uint1)(x10 >> 28) +
+ (UINT32_C(0xffffffe) - (arg1[10])));
+ x12 = ((fiat_secp521r1_uint1)(x11 >> 27) +
+ (UINT32_C(0x1ffffffe) - (arg1[11])));
+ x13 = ((fiat_secp521r1_uint1)(x12 >> 28) +
+ (UINT32_C(0xffffffe) - (arg1[12])));
+ x14 = ((fiat_secp521r1_uint1)(x13 >> 27) +
+ (UINT32_C(0xffffffe) - (arg1[13])));
+ x15 = ((fiat_secp521r1_uint1)(x14 >> 27) +
+ (UINT32_C(0x1ffffffe) - (arg1[14])));
+ x16 = ((fiat_secp521r1_uint1)(x15 >> 28) +
+ (UINT32_C(0xffffffe) - (arg1[15])));
+ x17 = ((fiat_secp521r1_uint1)(x16 >> 27) +
+ (UINT32_C(0x1ffffffe) - (arg1[16])));
+ x18 = ((fiat_secp521r1_uint1)(x17 >> 28) +
+ (UINT32_C(0xffffffe) - (arg1[17])));
+ x19 = ((fiat_secp521r1_uint1)(x18 >> 27) +
+ (UINT32_C(0xffffffe) - (arg1[18])));
+ x20 = ((x1 & UINT32_C(0xfffffff)) +
+ (uint32_t)(fiat_secp521r1_uint1)(x19 >> 27));
+ x21 = ((fiat_secp521r1_uint1)(x20 >> 28) + (x2 & UINT32_C(0x7ffffff)));
+ x22 = (x20 & UINT32_C(0xfffffff));
+ x23 = (x21 & UINT32_C(0x7ffffff));
+ x24 = ((fiat_secp521r1_uint1)(x21 >> 27) + (x3 & UINT32_C(0xfffffff)));
+ x25 = (x4 & UINT32_C(0x7ffffff));
+ x26 = (x5 & UINT32_C(0xfffffff));
+ x27 = (x6 & UINT32_C(0x7ffffff));
+ x28 = (x7 & UINT32_C(0x7ffffff));
+ x29 = (x8 & UINT32_C(0xfffffff));
+ x30 = (x9 & UINT32_C(0x7ffffff));
+ x31 = (x10 & UINT32_C(0xfffffff));
+ x32 = (x11 & UINT32_C(0x7ffffff));
+ x33 = (x12 & UINT32_C(0xfffffff));
+ x34 = (x13 & UINT32_C(0x7ffffff));
+ x35 = (x14 & UINT32_C(0x7ffffff));
+ x36 = (x15 & UINT32_C(0xfffffff));
+ x37 = (x16 & UINT32_C(0x7ffffff));
+ x38 = (x17 & UINT32_C(0xfffffff));
+ x39 = (x18 & UINT32_C(0x7ffffff));
+ x40 = (x19 & UINT32_C(0x7ffffff));
+ out1[0] = x22;
+ out1[1] = x23;
+ out1[2] = x24;
+ out1[3] = x25;
+ out1[4] = x26;
+ out1[5] = x27;
+ out1[6] = x28;
+ out1[7] = x29;
+ out1[8] = x30;
+ out1[9] = x31;
+ out1[10] = x32;
+ out1[11] = x33;
+ out1[12] = x34;
+ out1[13] = x35;
+ out1[14] = x36;
+ out1[15] = x37;
+ out1[16] = x38;
+ out1[17] = x39;
+ out1[18] = x40;
+}
+
+/*
+ * The function fiat_secp521r1_selectznz is a multi-limb conditional select.
+ *
+ * Postconditions:
+ * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+ *
+ * Input Bounds:
+ * arg1: [0x0 ~> 0x1]
+ * arg2: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * arg3: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff], [0x0 ~> 0xffffffff]]
+ */
+static void
+fiat_secp521r1_selectznz(uint32_t out1[19],
+ fiat_secp521r1_uint1 arg1,
+ const uint32_t arg2[19],
+ const uint32_t arg3[19])
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint32_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint32_t x18;
+ uint32_t x19;
+ fiat_secp521r1_cmovznz_u32(&x1, arg1, (arg2[0]), (arg3[0]));
+ fiat_secp521r1_cmovznz_u32(&x2, arg1, (arg2[1]), (arg3[1]));
+ fiat_secp521r1_cmovznz_u32(&x3, arg1, (arg2[2]), (arg3[2]));
+ fiat_secp521r1_cmovznz_u32(&x4, arg1, (arg2[3]), (arg3[3]));
+ fiat_secp521r1_cmovznz_u32(&x5, arg1, (arg2[4]), (arg3[4]));
+ fiat_secp521r1_cmovznz_u32(&x6, arg1, (arg2[5]), (arg3[5]));
+ fiat_secp521r1_cmovznz_u32(&x7, arg1, (arg2[6]), (arg3[6]));
+ fiat_secp521r1_cmovznz_u32(&x8, arg1, (arg2[7]), (arg3[7]));
+ fiat_secp521r1_cmovznz_u32(&x9, arg1, (arg2[8]), (arg3[8]));
+ fiat_secp521r1_cmovznz_u32(&x10, arg1, (arg2[9]), (arg3[9]));
+ fiat_secp521r1_cmovznz_u32(&x11, arg1, (arg2[10]), (arg3[10]));
+ fiat_secp521r1_cmovznz_u32(&x12, arg1, (arg2[11]), (arg3[11]));
+ fiat_secp521r1_cmovznz_u32(&x13, arg1, (arg2[12]), (arg3[12]));
+ fiat_secp521r1_cmovznz_u32(&x14, arg1, (arg2[13]), (arg3[13]));
+ fiat_secp521r1_cmovznz_u32(&x15, arg1, (arg2[14]), (arg3[14]));
+ fiat_secp521r1_cmovznz_u32(&x16, arg1, (arg2[15]), (arg3[15]));
+ fiat_secp521r1_cmovznz_u32(&x17, arg1, (arg2[16]), (arg3[16]));
+ fiat_secp521r1_cmovznz_u32(&x18, arg1, (arg2[17]), (arg3[17]));
+ fiat_secp521r1_cmovznz_u32(&x19, arg1, (arg2[18]), (arg3[18]));
+ out1[0] = x1;
+ out1[1] = x2;
+ out1[2] = x3;
+ out1[3] = x4;
+ out1[4] = x5;
+ out1[5] = x6;
+ out1[6] = x7;
+ out1[7] = x8;
+ out1[8] = x9;
+ out1[9] = x10;
+ out1[10] = x11;
+ out1[11] = x12;
+ out1[12] = x13;
+ out1[13] = x14;
+ out1[14] = x15;
+ out1[15] = x16;
+ out1[16] = x17;
+ out1[17] = x18;
+ out1[18] = x19;
+}
+
+/*
+ * The function fiat_secp521r1_to_bytes serializes a field element to bytes in little-endian order.
+ *
+ * Postconditions:
+ * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65]
+ *
+ * Output Bounds:
+ * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+ */
+static void
+fiat_secp521r1_to_bytes(
+ uint8_t out1[66], const fiat_secp521r1_tight_field_element arg1)
+{
+ uint32_t x1;
+ fiat_secp521r1_uint1 x2;
+ uint32_t x3;
+ fiat_secp521r1_uint1 x4;
+ uint32_t x5;
+ fiat_secp521r1_uint1 x6;
+ uint32_t x7;
+ fiat_secp521r1_uint1 x8;
+ uint32_t x9;
+ fiat_secp521r1_uint1 x10;
+ uint32_t x11;
+ fiat_secp521r1_uint1 x12;
+ uint32_t x13;
+ fiat_secp521r1_uint1 x14;
+ uint32_t x15;
+ fiat_secp521r1_uint1 x16;
+ uint32_t x17;
+ fiat_secp521r1_uint1 x18;
+ uint32_t x19;
+ fiat_secp521r1_uint1 x20;
+ uint32_t x21;
+ fiat_secp521r1_uint1 x22;
+ uint32_t x23;
+ fiat_secp521r1_uint1 x24;
+ uint32_t x25;
+ fiat_secp521r1_uint1 x26;
+ uint32_t x27;
+ fiat_secp521r1_uint1 x28;
+ uint32_t x29;
+ fiat_secp521r1_uint1 x30;
+ uint32_t x31;
+ fiat_secp521r1_uint1 x32;
+ uint32_t x33;
+ fiat_secp521r1_uint1 x34;
+ uint32_t x35;
+ fiat_secp521r1_uint1 x36;
+ uint32_t x37;
+ fiat_secp521r1_uint1 x38;
+ uint32_t x39;
+ uint32_t x40;
+ fiat_secp521r1_uint1 x41;
+ uint32_t x42;
+ fiat_secp521r1_uint1 x43;
+ uint32_t x44;
+ fiat_secp521r1_uint1 x45;
+ uint32_t x46;
+ fiat_secp521r1_uint1 x47;
+ uint32_t x48;
+ fiat_secp521r1_uint1 x49;
+ uint32_t x50;
+ fiat_secp521r1_uint1 x51;
+ uint32_t x52;
+ fiat_secp521r1_uint1 x53;
+ uint32_t x54;
+ fiat_secp521r1_uint1 x55;
+ uint32_t x56;
+ fiat_secp521r1_uint1 x57;
+ uint32_t x58;
+ fiat_secp521r1_uint1 x59;
+ uint32_t x60;
+ fiat_secp521r1_uint1 x61;
+ uint32_t x62;
+ fiat_secp521r1_uint1 x63;
+ uint32_t x64;
+ fiat_secp521r1_uint1 x65;
+ uint32_t x66;
+ fiat_secp521r1_uint1 x67;
+ uint32_t x68;
+ fiat_secp521r1_uint1 x69;
+ uint32_t x70;
+ fiat_secp521r1_uint1 x71;
+ uint32_t x72;
+ fiat_secp521r1_uint1 x73;
+ uint32_t x74;
+ fiat_secp521r1_uint1 x75;
+ uint32_t x76;
+ fiat_secp521r1_uint1 x77;
+ uint64_t x78;
+ uint32_t x79;
+ uint64_t x80;
+ uint32_t x81;
+ uint32_t x82;
+ uint32_t x83;
+ uint64_t x84;
+ uint32_t x85;
+ uint64_t x86;
+ uint32_t x87;
+ uint32_t x88;
+ uint32_t x89;
+ uint64_t x90;
+ uint32_t x91;
+ uint64_t x92;
+ uint32_t x93;
+ uint8_t x94;
+ uint32_t x95;
+ uint8_t x96;
+ uint32_t x97;
+ uint8_t x98;
+ uint8_t x99;
+ uint32_t x100;
+ uint8_t x101;
+ uint32_t x102;
+ uint8_t x103;
+ uint32_t x104;
+ uint8_t x105;
+ uint8_t x106;
+ uint64_t x107;
+ uint8_t x108;
+ uint32_t x109;
+ uint8_t x110;
+ uint32_t x111;
+ uint8_t x112;
+ uint32_t x113;
+ uint8_t x114;
+ uint8_t x115;
+ uint32_t x116;
+ uint8_t x117;
+ uint32_t x118;
+ uint8_t x119;
+ uint32_t x120;
+ uint8_t x121;
+ uint8_t x122;
+ uint64_t x123;
+ uint8_t x124;
+ uint32_t x125;
+ uint8_t x126;
+ uint32_t x127;
+ uint8_t x128;
+ uint32_t x129;
+ uint8_t x130;
+ uint8_t x131;
+ uint32_t x132;
+ uint8_t x133;
+ uint32_t x134;
+ uint8_t x135;
+ uint32_t x136;
+ uint8_t x137;
+ uint8_t x138;
+ uint32_t x139;
+ uint8_t x140;
+ uint32_t x141;
+ uint8_t x142;
+ uint32_t x143;
+ uint8_t x144;
+ uint8_t x145;
+ uint8_t x146;
+ uint32_t x147;
+ uint8_t x148;
+ uint32_t x149;
+ uint8_t x150;
+ uint8_t x151;
+ uint32_t x152;
+ uint8_t x153;
+ uint32_t x154;
+ uint8_t x155;
+ uint32_t x156;
+ uint8_t x157;
+ uint8_t x158;
+ uint64_t x159;
+ uint8_t x160;
+ uint32_t x161;
+ uint8_t x162;
+ uint32_t x163;
+ uint8_t x164;
+ uint32_t x165;
+ uint8_t x166;
+ uint8_t x167;
+ uint32_t x168;
+ uint8_t x169;
+ uint32_t x170;
+ uint8_t x171;
+ uint32_t x172;
+ uint8_t x173;
+ uint8_t x174;
+ uint64_t x175;
+ uint8_t x176;
+ uint32_t x177;
+ uint8_t x178;
+ uint32_t x179;
+ uint8_t x180;
+ uint32_t x181;
+ uint8_t x182;
+ uint8_t x183;
+ uint32_t x184;
+ uint8_t x185;
+ uint32_t x186;
+ uint8_t x187;
+ uint32_t x188;
+ uint8_t x189;
+ uint8_t x190;
+ uint32_t x191;
+ uint8_t x192;
+ uint32_t x193;
+ uint8_t x194;
+ uint32_t x195;
+ uint8_t x196;
+ uint8_t x197;
+ uint8_t x198;
+ uint32_t x199;
+ uint8_t x200;
+ uint32_t x201;
+ uint8_t x202;
+ uint8_t x203;
+ uint32_t x204;
+ uint8_t x205;
+ uint32_t x206;
+ uint8_t x207;
+ uint32_t x208;
+ uint8_t x209;
+ uint8_t x210;
+ uint64_t x211;
+ uint8_t x212;
+ uint32_t x213;
+ uint8_t x214;
+ uint32_t x215;
+ uint8_t x216;
+ uint32_t x217;
+ uint8_t x218;
+ uint8_t x219;
+ uint32_t x220;
+ uint8_t x221;
+ uint32_t x222;
+ uint8_t x223;
+ uint32_t x224;
+ uint8_t x225;
+ uint8_t x226;
+ uint64_t x227;
+ uint8_t x228;
+ uint32_t x229;
+ uint8_t x230;
+ uint32_t x231;
+ uint8_t x232;
+ uint32_t x233;
+ uint8_t x234;
+ fiat_secp521r1_uint1 x235;
+ fiat_secp521r1_subborrowx_u28(&x1, &x2, 0x0, (arg1[0]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x3, &x4, x2, (arg1[1]), UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x5, &x6, x4, (arg1[2]), UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x7, &x8, x6, (arg1[3]), UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x9, &x10, x8, (arg1[4]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x11, &x12, x10, (arg1[5]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u27(&x13, &x14, x12, (arg1[6]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x15, &x16, x14, (arg1[7]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x17, &x18, x16, (arg1[8]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x19, &x20, x18, (arg1[9]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x21, &x22, x20, (arg1[10]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x23, &x24, x22, (arg1[11]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x25, &x26, x24, (arg1[12]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u27(&x27, &x28, x26, (arg1[13]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x29, &x30, x28, (arg1[14]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x31, &x32, x30, (arg1[15]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u28(&x33, &x34, x32, (arg1[16]),
+ UINT32_C(0xfffffff));
+ fiat_secp521r1_subborrowx_u27(&x35, &x36, x34, (arg1[17]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_subborrowx_u27(&x37, &x38, x36, (arg1[18]),
+ UINT32_C(0x7ffffff));
+ fiat_secp521r1_cmovznz_u32(&x39, x38, 0x0, UINT32_C(0xffffffff));
+ fiat_secp521r1_addcarryx_u28(&x40, &x41, 0x0, x1,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x42, &x43, x41, x3,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x44, &x45, x43, x5,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x46, &x47, x45, x7,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x48, &x49, x47, x9,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x50, &x51, x49, x11,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u27(&x52, &x53, x51, x13,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x54, &x55, x53, x15,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x56, &x57, x55, x17,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x58, &x59, x57, x19,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x60, &x61, x59, x21,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x62, &x63, x61, x23,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x64, &x65, x63, x25,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u27(&x66, &x67, x65, x27,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x68, &x69, x67, x29,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x70, &x71, x69, x31,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u28(&x72, &x73, x71, x33,
+ (x39 & UINT32_C(0xfffffff)));
+ fiat_secp521r1_addcarryx_u27(&x74, &x75, x73, x35,
+ (x39 & UINT32_C(0x7ffffff)));
+ fiat_secp521r1_addcarryx_u27(&x76, &x77, x75, x37,
+ (x39 & UINT32_C(0x7ffffff)));
+ x78 = ((uint64_t)x76 << 6);
+ x79 = (x74 << 3);
+ x80 = ((uint64_t)x72 << 7);
+ x81 = (x70 << 4);
+ x82 = (x66 << 5);
+ x83 = (x64 << 2);
+ x84 = ((uint64_t)x62 << 6);
+ x85 = (x60 << 3);
+ x86 = ((uint64_t)x58 << 7);
+ x87 = (x56 << 4);
+ x88 = (x52 << 5);
+ x89 = (x50 << 2);
+ x90 = ((uint64_t)x48 << 6);
+ x91 = (x46 << 3);
+ x92 = ((uint64_t)x44 << 7);
+ x93 = (x42 << 4);
+ x94 = (uint8_t)(x40 & UINT8_C(0xff));
+ x95 = (x40 >> 8);
+ x96 = (uint8_t)(x95 & UINT8_C(0xff));
+ x97 = (x95 >> 8);
+ x98 = (uint8_t)(x97 & UINT8_C(0xff));
+ x99 = (uint8_t)(x97 >> 8);
+ x100 = (x93 + (uint32_t)x99);
+ x101 = (uint8_t)(x100 & UINT8_C(0xff));
+ x102 = (x100 >> 8);
+ x103 = (uint8_t)(x102 & UINT8_C(0xff));
+ x104 = (x102 >> 8);
+ x105 = (uint8_t)(x104 & UINT8_C(0xff));
+ x106 = (uint8_t)(x104 >> 8);
+ x107 = (x92 + (uint64_t)x106);
+ x108 = (uint8_t)(x107 & UINT8_C(0xff));
+ x109 = (uint32_t)(x107 >> 8);
+ x110 = (uint8_t)(x109 & UINT8_C(0xff));
+ x111 = (x109 >> 8);
+ x112 = (uint8_t)(x111 & UINT8_C(0xff));
+ x113 = (x111 >> 8);
+ x114 = (uint8_t)(x113 & UINT8_C(0xff));
+ x115 = (uint8_t)(x113 >> 8);
+ x116 = (x91 + (uint32_t)x115);
+ x117 = (uint8_t)(x116 & UINT8_C(0xff));
+ x118 = (x116 >> 8);
+ x119 = (uint8_t)(x118 & UINT8_C(0xff));
+ x120 = (x118 >> 8);
+ x121 = (uint8_t)(x120 & UINT8_C(0xff));
+ x122 = (uint8_t)(x120 >> 8);
+ x123 = (x90 + (uint64_t)x122);
+ x124 = (uint8_t)(x123 & UINT8_C(0xff));
+ x125 = (uint32_t)(x123 >> 8);
+ x126 = (uint8_t)(x125 & UINT8_C(0xff));
+ x127 = (x125 >> 8);
+ x128 = (uint8_t)(x127 & UINT8_C(0xff));
+ x129 = (x127 >> 8);
+ x130 = (uint8_t)(x129 & UINT8_C(0xff));
+ x131 = (uint8_t)(x129 >> 8);
+ x132 = (x89 + (uint32_t)x131);
+ x133 = (uint8_t)(x132 & UINT8_C(0xff));
+ x134 = (x132 >> 8);
+ x135 = (uint8_t)(x134 & UINT8_C(0xff));
+ x136 = (x134 >> 8);
+ x137 = (uint8_t)(x136 & UINT8_C(0xff));
+ x138 = (uint8_t)(x136 >> 8);
+ x139 = (x88 + (uint32_t)x138);
+ x140 = (uint8_t)(x139 & UINT8_C(0xff));
+ x141 = (x139 >> 8);
+ x142 = (uint8_t)(x141 & UINT8_C(0xff));
+ x143 = (x141 >> 8);
+ x144 = (uint8_t)(x143 & UINT8_C(0xff));
+ x145 = (uint8_t)(x143 >> 8);
+ x146 = (uint8_t)(x54 & UINT8_C(0xff));
+ x147 = (x54 >> 8);
+ x148 = (uint8_t)(x147 & UINT8_C(0xff));
+ x149 = (x147 >> 8);
+ x150 = (uint8_t)(x149 & UINT8_C(0xff));
+ x151 = (uint8_t)(x149 >> 8);
+ x152 = (x87 + (uint32_t)x151);
+ x153 = (uint8_t)(x152 & UINT8_C(0xff));
+ x154 = (x152 >> 8);
+ x155 = (uint8_t)(x154 & UINT8_C(0xff));
+ x156 = (x154 >> 8);
+ x157 = (uint8_t)(x156 & UINT8_C(0xff));
+ x158 = (uint8_t)(x156 >> 8);
+ x159 = (x86 + (uint64_t)x158);
+ x160 = (uint8_t)(x159 & UINT8_C(0xff));
+ x161 = (uint32_t)(x159 >> 8);
+ x162 = (uint8_t)(x161 & UINT8_C(0xff));
+ x163 = (x161 >> 8);
+ x164 = (uint8_t)(x163 & UINT8_C(0xff));
+ x165 = (x163 >> 8);
+ x166 = (uint8_t)(x165 & UINT8_C(0xff));
+ x167 = (uint8_t)(x165 >> 8);
+ x168 = (x85 + (uint32_t)x167);
+ x169 = (uint8_t)(x168 & UINT8_C(0xff));
+ x170 = (x168 >> 8);
+ x171 = (uint8_t)(x170 & UINT8_C(0xff));
+ x172 = (x170 >> 8);
+ x173 = (uint8_t)(x172 & UINT8_C(0xff));
+ x174 = (uint8_t)(x172 >> 8);
+ x175 = (x84 + (uint64_t)x174);
+ x176 = (uint8_t)(x175 & UINT8_C(0xff));
+ x177 = (uint32_t)(x175 >> 8);
+ x178 = (uint8_t)(x177 & UINT8_C(0xff));
+ x179 = (x177 >> 8);
+ x180 = (uint8_t)(x179 & UINT8_C(0xff));
+ x181 = (x179 >> 8);
+ x182 = (uint8_t)(x181 & UINT8_C(0xff));
+ x183 = (uint8_t)(x181 >> 8);
+ x184 = (x83 + (uint32_t)x183);
+ x185 = (uint8_t)(x184 & UINT8_C(0xff));
+ x186 = (x184 >> 8);
+ x187 = (uint8_t)(x186 & UINT8_C(0xff));
+ x188 = (x186 >> 8);
+ x189 = (uint8_t)(x188 & UINT8_C(0xff));
+ x190 = (uint8_t)(x188 >> 8);
+ x191 = (x82 + (uint32_t)x190);
+ x192 = (uint8_t)(x191 & UINT8_C(0xff));
+ x193 = (x191 >> 8);
+ x194 = (uint8_t)(x193 & UINT8_C(0xff));
+ x195 = (x193 >> 8);
+ x196 = (uint8_t)(x195 & UINT8_C(0xff));
+ x197 = (uint8_t)(x195 >> 8);
+ x198 = (uint8_t)(x68 & UINT8_C(0xff));
+ x199 = (x68 >> 8);
+ x200 = (uint8_t)(x199 & UINT8_C(0xff));
+ x201 = (x199 >> 8);
+ x202 = (uint8_t)(x201 & UINT8_C(0xff));
+ x203 = (uint8_t)(x201 >> 8);
+ x204 = (x81 + (uint32_t)x203);
+ x205 = (uint8_t)(x204 & UINT8_C(0xff));
+ x206 = (x204 >> 8);
+ x207 = (uint8_t)(x206 & UINT8_C(0xff));
+ x208 = (x206 >> 8);
+ x209 = (uint8_t)(x208 & UINT8_C(0xff));
+ x210 = (uint8_t)(x208 >> 8);
+ x211 = (x80 + (uint64_t)x210);
+ x212 = (uint8_t)(x211 & UINT8_C(0xff));
+ x213 = (uint32_t)(x211 >> 8);
+ x214 = (uint8_t)(x213 & UINT8_C(0xff));
+ x215 = (x213 >> 8);
+ x216 = (uint8_t)(x215 & UINT8_C(0xff));
+ x217 = (x215 >> 8);
+ x218 = (uint8_t)(x217 & UINT8_C(0xff));
+ x219 = (uint8_t)(x217 >> 8);
+ x220 = (x79 + (uint32_t)x219);
+ x221 = (uint8_t)(x220 & UINT8_C(0xff));
+ x222 = (x220 >> 8);
+ x223 = (uint8_t)(x222 & UINT8_C(0xff));
+ x224 = (x222 >> 8);
+ x225 = (uint8_t)(x224 & UINT8_C(0xff));
+ x226 = (uint8_t)(x224 >> 8);
+ x227 = (x78 + (uint64_t)x226);
+ x228 = (uint8_t)(x227 & UINT8_C(0xff));
+ x229 = (uint32_t)(x227 >> 8);
+ x230 = (uint8_t)(x229 & UINT8_C(0xff));
+ x231 = (x229 >> 8);
+ x232 = (uint8_t)(x231 & UINT8_C(0xff));
+ x233 = (x231 >> 8);
+ x234 = (uint8_t)(x233 & UINT8_C(0xff));
+ x235 = (fiat_secp521r1_uint1)(x233 >> 8);
+ out1[0] = x94;
+ out1[1] = x96;
+ out1[2] = x98;
+ out1[3] = x101;
+ out1[4] = x103;
+ out1[5] = x105;
+ out1[6] = x108;
+ out1[7] = x110;
+ out1[8] = x112;
+ out1[9] = x114;
+ out1[10] = x117;
+ out1[11] = x119;
+ out1[12] = x121;
+ out1[13] = x124;
+ out1[14] = x126;
+ out1[15] = x128;
+ out1[16] = x130;
+ out1[17] = x133;
+ out1[18] = x135;
+ out1[19] = x137;
+ out1[20] = x140;
+ out1[21] = x142;
+ out1[22] = x144;
+ out1[23] = x145;
+ out1[24] = x146;
+ out1[25] = x148;
+ out1[26] = x150;
+ out1[27] = x153;
+ out1[28] = x155;
+ out1[29] = x157;
+ out1[30] = x160;
+ out1[31] = x162;
+ out1[32] = x164;
+ out1[33] = x166;
+ out1[34] = x169;
+ out1[35] = x171;
+ out1[36] = x173;
+ out1[37] = x176;
+ out1[38] = x178;
+ out1[39] = x180;
+ out1[40] = x182;
+ out1[41] = x185;
+ out1[42] = x187;
+ out1[43] = x189;
+ out1[44] = x192;
+ out1[45] = x194;
+ out1[46] = x196;
+ out1[47] = x197;
+ out1[48] = x198;
+ out1[49] = x200;
+ out1[50] = x202;
+ out1[51] = x205;
+ out1[52] = x207;
+ out1[53] = x209;
+ out1[54] = x212;
+ out1[55] = x214;
+ out1[56] = x216;
+ out1[57] = x218;
+ out1[58] = x221;
+ out1[59] = x223;
+ out1[60] = x225;
+ out1[61] = x228;
+ out1[62] = x230;
+ out1[63] = x232;
+ out1[64] = x234;
+ out1[65] = x235;
+}
+
+/*
+ * The function fiat_secp521r1_from_bytes deserializes a field element from bytes in little-endian order.
+ *
+ * Postconditions:
+ * eval out1 mod m = bytes_eval arg1 mod m
+ *
+ * Input Bounds:
+ * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+ */
+static void
+fiat_secp521r1_from_bytes(fiat_secp521r1_tight_field_element out1,
+ const uint8_t arg1[66])
+{
+ uint32_t x1;
+ uint32_t x2;
+ uint32_t x3;
+ uint32_t x4;
+ uint32_t x5;
+ uint32_t x6;
+ uint32_t x7;
+ uint64_t x8;
+ uint32_t x9;
+ uint32_t x10;
+ uint32_t x11;
+ uint32_t x12;
+ uint32_t x13;
+ uint32_t x14;
+ uint32_t x15;
+ uint32_t x16;
+ uint32_t x17;
+ uint8_t x18;
+ uint32_t x19;
+ uint32_t x20;
+ uint32_t x21;
+ uint32_t x22;
+ uint32_t x23;
+ uint32_t x24;
+ uint64_t x25;
+ uint32_t x26;
+ uint32_t x27;
+ uint32_t x28;
+ uint32_t x29;
+ uint32_t x30;
+ uint32_t x31;
+ uint64_t x32;
+ uint32_t x33;
+ uint32_t x34;
+ uint32_t x35;
+ uint32_t x36;
+ uint32_t x37;
+ uint32_t x38;
+ uint32_t x39;
+ uint32_t x40;
+ uint32_t x41;
+ uint8_t x42;
+ uint32_t x43;
+ uint32_t x44;
+ uint32_t x45;
+ uint32_t x46;
+ uint32_t x47;
+ uint32_t x48;
+ uint64_t x49;
+ uint32_t x50;
+ uint32_t x51;
+ uint32_t x52;
+ uint32_t x53;
+ uint32_t x54;
+ uint32_t x55;
+ uint64_t x56;
+ uint32_t x57;
+ uint32_t x58;
+ uint32_t x59;
+ uint32_t x60;
+ uint32_t x61;
+ uint32_t x62;
+ uint32_t x63;
+ uint32_t x64;
+ uint32_t x65;
+ uint8_t x66;
+ uint32_t x67;
+ uint32_t x68;
+ uint32_t x69;
+ uint32_t x70;
+ uint8_t x71;
+ uint32_t x72;
+ uint32_t x73;
+ uint32_t x74;
+ uint32_t x75;
+ fiat_secp521r1_uint1 x76;
+ uint32_t x77;
+ uint32_t x78;
+ uint32_t x79;
+ uint64_t x80;
+ uint32_t x81;
+ uint8_t x82;
+ uint32_t x83;
+ uint32_t x84;
+ uint32_t x85;
+ uint32_t x86;
+ uint8_t x87;
+ uint32_t x88;
+ uint32_t x89;
+ uint32_t x90;
+ uint64_t x91;
+ uint32_t x92;
+ uint8_t x93;
+ uint32_t x94;
+ uint32_t x95;
+ uint32_t x96;
+ uint32_t x97;
+ uint8_t x98;
+ uint32_t x99;
+ uint32_t x100;
+ uint32_t x101;
+ uint32_t x102;
+ uint32_t x103;
+ uint32_t x104;
+ uint32_t x105;
+ uint8_t x106;
+ uint32_t x107;
+ uint32_t x108;
+ uint32_t x109;
+ uint32_t x110;
+ fiat_secp521r1_uint1 x111;
+ uint32_t x112;
+ uint32_t x113;
+ uint32_t x114;
+ uint64_t x115;
+ uint32_t x116;
+ uint8_t x117;
+ uint32_t x118;
+ uint32_t x119;
+ uint32_t x120;
+ uint32_t x121;
+ uint8_t x122;
+ uint32_t x123;
+ uint32_t x124;
+ uint32_t x125;
+ uint64_t x126;
+ uint32_t x127;
+ uint8_t x128;
+ uint32_t x129;
+ uint32_t x130;
+ uint32_t x131;
+ uint32_t x132;
+ uint8_t x133;
+ uint32_t x134;
+ uint32_t x135;
+ uint32_t x136;
+ uint32_t x137;
+ uint32_t x138;
+ uint32_t x139;
+ uint32_t x140;
+ uint8_t x141;
+ uint32_t x142;
+ uint32_t x143;
+ uint32_t x144;
+ uint32_t x145;
+ fiat_secp521r1_uint1 x146;
+ uint32_t x147;
+ uint32_t x148;
+ uint32_t x149;
+ uint64_t x150;
+ uint32_t x151;
+ uint8_t x152;
+ uint32_t x153;
+ uint32_t x154;
+ uint32_t x155;
+ uint32_t x156;
+ uint8_t x157;
+ uint32_t x158;
+ uint32_t x159;
+ uint32_t x160;
+ uint32_t x161;
+ x1 = ((uint32_t)(fiat_secp521r1_uint1)(arg1[65]) << 26);
+ x2 = ((uint32_t)(arg1[64]) << 18);
+ x3 = ((uint32_t)(arg1[63]) << 10);
+ x4 = ((uint32_t)(arg1[62]) << 2);
+ x5 = ((uint32_t)(arg1[61]) << 21);
+ x6 = ((uint32_t)(arg1[60]) << 13);
+ x7 = ((uint32_t)(arg1[59]) << 5);
+ x8 = ((uint64_t)(arg1[58]) << 25);
+ x9 = ((uint32_t)(arg1[57]) << 17);
+ x10 = ((uint32_t)(arg1[56]) << 9);
+ x11 = ((uint32_t)(arg1[55]) * 0x2);
+ x12 = ((uint32_t)(arg1[54]) << 20);
+ x13 = ((uint32_t)(arg1[53]) << 12);
+ x14 = ((uint32_t)(arg1[52]) << 4);
+ x15 = ((uint32_t)(arg1[51]) << 24);
+ x16 = ((uint32_t)(arg1[50]) << 16);
+ x17 = ((uint32_t)(arg1[49]) << 8);
+ x18 = (arg1[48]);
+ x19 = ((uint32_t)(arg1[47]) << 19);
+ x20 = ((uint32_t)(arg1[46]) << 11);
+ x21 = ((uint32_t)(arg1[45]) << 3);
+ x22 = ((uint32_t)(arg1[44]) << 22);
+ x23 = ((uint32_t)(arg1[43]) << 14);
+ x24 = ((uint32_t)(arg1[42]) << 6);
+ x25 = ((uint64_t)(arg1[41]) << 26);
+ x26 = ((uint32_t)(arg1[40]) << 18);
+ x27 = ((uint32_t)(arg1[39]) << 10);
+ x28 = ((uint32_t)(arg1[38]) << 2);
+ x29 = ((uint32_t)(arg1[37]) << 21);
+ x30 = ((uint32_t)(arg1[36]) << 13);
+ x31 = ((uint32_t)(arg1[35]) << 5);
+ x32 = ((uint64_t)(arg1[34]) << 25);
+ x33 = ((uint32_t)(arg1[33]) << 17);
+ x34 = ((uint32_t)(arg1[32]) << 9);
+ x35 = ((uint32_t)(arg1[31]) * 0x2);
+ x36 = ((uint32_t)(arg1[30]) << 20);
+ x37 = ((uint32_t)(arg1[29]) << 12);
+ x38 = ((uint32_t)(arg1[28]) << 4);
+ x39 = ((uint32_t)(arg1[27]) << 24);
+ x40 = ((uint32_t)(arg1[26]) << 16);
+ x41 = ((uint32_t)(arg1[25]) << 8);
+ x42 = (arg1[24]);
+ x43 = ((uint32_t)(arg1[23]) << 19);
+ x44 = ((uint32_t)(arg1[22]) << 11);
+ x45 = ((uint32_t)(arg1[21]) << 3);
+ x46 = ((uint32_t)(arg1[20]) << 22);
+ x47 = ((uint32_t)(arg1[19]) << 14);
+ x48 = ((uint32_t)(arg1[18]) << 6);
+ x49 = ((uint64_t)(arg1[17]) << 26);
+ x50 = ((uint32_t)(arg1[16]) << 18);
+ x51 = ((uint32_t)(arg1[15]) << 10);
+ x52 = ((uint32_t)(arg1[14]) << 2);
+ x53 = ((uint32_t)(arg1[13]) << 21);
+ x54 = ((uint32_t)(arg1[12]) << 13);
+ x55 = ((uint32_t)(arg1[11]) << 5);
+ x56 = ((uint64_t)(arg1[10]) << 25);
+ x57 = ((uint32_t)(arg1[9]) << 17);
+ x58 = ((uint32_t)(arg1[8]) << 9);
+ x59 = ((uint32_t)(arg1[7]) * 0x2);
+ x60 = ((uint32_t)(arg1[6]) << 20);
+ x61 = ((uint32_t)(arg1[5]) << 12);
+ x62 = ((uint32_t)(arg1[4]) << 4);
+ x63 = ((uint32_t)(arg1[3]) << 24);
+ x64 = ((uint32_t)(arg1[2]) << 16);
+ x65 = ((uint32_t)(arg1[1]) << 8);
+ x66 = (arg1[0]);
+ x67 = (x65 + (uint32_t)x66);
+ x68 = (x64 + x67);
+ x69 = (x63 + x68);
+ x70 = (x69 & UINT32_C(0xfffffff));
+ x71 = (uint8_t)(x69 >> 28);
+ x72 = (x62 + (uint32_t)x71);
+ x73 = (x61 + x72);
+ x74 = (x60 + x73);
+ x75 = (x74 & UINT32_C(0x7ffffff));
+ x76 = (fiat_secp521r1_uint1)(x74 >> 27);
+ x77 = (x59 + (uint32_t)x76);
+ x78 = (x58 + x77);
+ x79 = (x57 + x78);
+ x80 = (x56 + x79);
+ x81 = (uint32_t)(x80 & UINT32_C(0xfffffff));
+ x82 = (uint8_t)(x80 >> 28);
+ x83 = (x55 + (uint32_t)x82);
+ x84 = (x54 + x83);
+ x85 = (x53 + x84);
+ x86 = (x85 & UINT32_C(0x7ffffff));
+ x87 = (uint8_t)(x85 >> 27);
+ x88 = (x52 + (uint32_t)x87);
+ x89 = (x51 + x88);
+ x90 = (x50 + x89);
+ x91 = (x49 + x90);
+ x92 = (uint32_t)(x91 & UINT32_C(0xfffffff));
+ x93 = (uint8_t)(x91 >> 28);
+ x94 = (x48 + (uint32_t)x93);
+ x95 = (x47 + x94);
+ x96 = (x46 + x95);
+ x97 = (x96 & UINT32_C(0x7ffffff));
+ x98 = (uint8_t)(x96 >> 27);
+ x99 = (x45 + (uint32_t)x98);
+ x100 = (x44 + x99);
+ x101 = (x43 + x100);
+ x102 = (x41 + (uint32_t)x42);
+ x103 = (x40 + x102);
+ x104 = (x39 + x103);
+ x105 = (x104 & UINT32_C(0xfffffff));
+ x106 = (uint8_t)(x104 >> 28);
+ x107 = (x38 + (uint32_t)x106);
+ x108 = (x37 + x107);
+ x109 = (x36 + x108);
+ x110 = (x109 & UINT32_C(0x7ffffff));
+ x111 = (fiat_secp521r1_uint1)(x109 >> 27);
+ x112 = (x35 + (uint32_t)x111);
+ x113 = (x34 + x112);
+ x114 = (x33 + x113);
+ x115 = (x32 + x114);
+ x116 = (uint32_t)(x115 & UINT32_C(0xfffffff));
+ x117 = (uint8_t)(x115 >> 28);
+ x118 = (x31 + (uint32_t)x117);
+ x119 = (x30 + x118);
+ x120 = (x29 + x119);
+ x121 = (x120 & UINT32_C(0x7ffffff));
+ x122 = (uint8_t)(x120 >> 27);
+ x123 = (x28 + (uint32_t)x122);
+ x124 = (x27 + x123);
+ x125 = (x26 + x124);
+ x126 = (x25 + x125);
+ x127 = (uint32_t)(x126 & UINT32_C(0xfffffff));
+ x128 = (uint8_t)(x126 >> 28);
+ x129 = (x24 + (uint32_t)x128);
+ x130 = (x23 + x129);
+ x131 = (x22 + x130);
+ x132 = (x131 & UINT32_C(0x7ffffff));
+ x133 = (uint8_t)(x131 >> 27);
+ x134 = (x21 + (uint32_t)x133);
+ x135 = (x20 + x134);
+ x136 = (x19 + x135);
+ x137 = (x17 + (uint32_t)x18);
+ x138 = (x16 + x137);
+ x139 = (x15 + x138);
+ x140 = (x139 & UINT32_C(0xfffffff));
+ x141 = (uint8_t)(x139 >> 28);
+ x142 = (x14 + (uint32_t)x141);
+ x143 = (x13 + x142);
+ x144 = (x12 + x143);
+ x145 = (x144 & UINT32_C(0x7ffffff));
+ x146 = (fiat_secp521r1_uint1)(x144 >> 27);
+ x147 = (x11 + (uint32_t)x146);
+ x148 = (x10 + x147);
+ x149 = (x9 + x148);
+ x150 = (x8 + x149);
+ x151 = (uint32_t)(x150 & UINT32_C(0xfffffff));
+ x152 = (uint8_t)(x150 >> 28);
+ x153 = (x7 + (uint32_t)x152);
+ x154 = (x6 + x153);
+ x155 = (x5 + x154);
+ x156 = (x155 & UINT32_C(0x7ffffff));
+ x157 = (uint8_t)(x155 >> 27);
+ x158 = (x4 + (uint32_t)x157);
+ x159 = (x3 + x158);
+ x160 = (x2 + x159);
+ x161 = (x1 + x160);
+ out1[0] = x70;
+ out1[1] = x75;
+ out1[2] = x81;
+ out1[3] = x86;
+ out1[4] = x92;
+ out1[5] = x97;
+ out1[6] = x101;
+ out1[7] = x105;
+ out1[8] = x110;
+ out1[9] = x116;
+ out1[10] = x121;
+ out1[11] = x127;
+ out1[12] = x132;
+ out1[13] = x136;
+ out1[14] = x140;
+ out1[15] = x145;
+ out1[16] = x151;
+ out1[17] = x156;
+ out1[18] = x161;
+}
+
+/* END verbatim fiat code */
+
+/* curve-related constants */
+
+static const limb_t const_one[19] = {
+ UINT32_C(0x00000001), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000), UINT32_C(0x00000000), UINT32_C(0x00000000),
+ UINT32_C(0x00000000)
+};
+
+static const limb_t const_b[19] = {
+ UINT32_C(0x0B503F00), UINT32_C(0x0451FD46), UINT32_C(0x0869E3DE),
+ UINT32_C(0x03F107A5), UINT32_C(0x0C1CD5CF), UINT32_C(0x074EEC6F),
+ UINT32_C(0x00B29605), UINT32_C(0x0C7E937B), UINT32_C(0x0193951E),
+ UINT32_C(0x0213C2AC), UINT32_C(0x013231DE), UINT32_C(0x07CEE2D2),
+ UINT32_C(0x06E66CC5), UINT32_C(0x0516D392), UINT32_C(0x068540EE),
+ UINT32_C(0x01A21A0B), UINT32_C(0x09343F25), UINT32_C(0x072C31C3),
+ UINT32_C(0x014654FA)
+};
+
+/* LUT for scalar multiplication by comb interleaving */
+static const pt_aff_t lut_cmb[13][16] = {
+ {
+ { { UINT32_C(0x02E5BD66), UINT32_C(0x07E7E31C), UINT32_C(0x048537F2),
+ UINT32_C(0x067830AD), UINT32_C(0x0378CD22), UINT32_C(0x01E8BFEA),
+ UINT32_C(0x07F0EE09), UINT32_C(0x0FE75928), UINT32_C(0x04B5E77E),
+ UINT32_C(0x0A7B7542), UINT32_C(0x05EC0D69), UINT32_C(0x0487E0A2),
+ UINT32_C(0x06414FED), UINT32_C(0x04E32409), UINT32_C(0x0395B442),
+ UINT32_C(0x03ECB662), UINT32_C(0x09D39B3C), UINT32_C(0x00D6E080),
+ UINT32_C(0x031A1638) },
+ { UINT32_C(0x0FD16650), UINT32_C(0x03E94769), UINT32_C(0x05848111),
+ UINT32_C(0x0610D44E), UINT32_C(0x0D84D4F1), UINT32_C(0x004FEB41),
+ UINT32_C(0x062A85C8), UINT32_C(0x0EF42640), UINT32_C(0x06E72995),
+ UINT32_C(0x0CCC592F), UINT32_C(0x07A2E4E7), UINT32_C(0x01A05EBE),
+ UINT32_C(0x0255E6D1), UINT32_C(0x04C7AA22), UINT32_C(0x0C7D1BD9),
+ UINT32_C(0x00A5FB42), UINT32_C(0x078008B9), UINT32_C(0x054F1347),
+ UINT32_C(0x0460E4A5) } },
+ { { UINT32_C(0x0E37AD7D), UINT32_C(0x0119D2ED), UINT32_C(0x05D40B4B),
+ UINT32_C(0x0210C586), UINT32_C(0x086EBAD2), UINT32_C(0x05AD67F8),
+ UINT32_C(0x00ED35E8), UINT32_C(0x0A483205), UINT32_C(0x03F164A3),
+ UINT32_C(0x051BA35A), UINT32_C(0x074225AF), UINT32_C(0x0AE796B5),
+ UINT32_C(0x06C48F66), UINT32_C(0x05A95372), UINT32_C(0x05959479),
+ UINT32_C(0x01D6A64B), UINT32_C(0x0232BBB2), UINT32_C(0x04887BC5),
+ UINT32_C(0x069CF4D4) },
+ { UINT32_C(0x0E86C0E5), UINT32_C(0x0588CA1E), UINT32_C(0x0B2084BE),
+ UINT32_C(0x01379274), UINT32_C(0x0C33C417), UINT32_C(0x0477B0F1),
+ UINT32_C(0x016AD676), UINT32_C(0x0DC575B0), UINT32_C(0x02DD4CF8),
+ UINT32_C(0x0B9DD85C), UINT32_C(0x0563F46A), UINT32_C(0x0C5F4BE2),
+ UINT32_C(0x020AA740), UINT32_C(0x078AABFD), UINT32_C(0x0AB814F2),
+ UINT32_C(0x01F86C6C), UINT32_C(0x05BBB32F), UINT32_C(0x072FBF4C),
+ UINT32_C(0x04FA6C0E) } },
+ { { UINT32_C(0x0C8F3078), UINT32_C(0x02B5096E), UINT32_C(0x062E71AB),
+ UINT32_C(0x043CDB12), UINT32_C(0x068CA75F), UINT32_C(0x03C4DF9E),
+ UINT32_C(0x038897F5), UINT32_C(0x0E301423), UINT32_C(0x03C0C6D5),
+ UINT32_C(0x0F59C870), UINT32_C(0x03571E2E), UINT32_C(0x04933C0F),
+ UINT32_C(0x076D4FC3), UINT32_C(0x03D2CB77), UINT32_C(0x004EB0BF),
+ UINT32_C(0x03C3391C), UINT32_C(0x08658E7B), UINT32_C(0x00A524F4),
+ UINT32_C(0x0194AFCF) },
+ { UINT32_C(0x0EB090CB), UINT32_C(0x03CC3E8D), UINT32_C(0x09EFF02E),
+ UINT32_C(0x00E4AE6A), UINT32_C(0x0DE747C0), UINT32_C(0x00473D7F),
+ UINT32_C(0x0188AA01), UINT32_C(0x072CF374), UINT32_C(0x06897C90),
+ UINT32_C(0x08E10F76), UINT32_C(0x02F93406), UINT32_C(0x0147B760),
+ UINT32_C(0x03A1CB80), UINT32_C(0x00E6C7F4), UINT32_C(0x0A811291),
+ UINT32_C(0x02B73114), UINT32_C(0x03ADD914), UINT32_C(0x037BACC0),
+ UINT32_C(0x056F9BBC) } },
+ { { UINT32_C(0x0816ECD4), UINT32_C(0x04EAD882), UINT32_C(0x04C33403),
+ UINT32_C(0x07EA1FB8), UINT32_C(0x0F11BE54), UINT32_C(0x043738EE),
+ UINT32_C(0x064D36F9), UINT32_C(0x0FC698D8), UINT32_C(0x0308D0AB),
+ UINT32_C(0x0298BB18), UINT32_C(0x02585EE2), UINT32_C(0x08A3C063),
+ UINT32_C(0x023D520C), UINT32_C(0x02F91707), UINT32_C(0x0B073A0C),
+ UINT32_C(0x0365FDA0), UINT32_C(0x0EC68DDD), UINT32_C(0x0333AB6F),
+ UINT32_C(0x015B5747) },
+ { UINT32_C(0x0525251B), UINT32_C(0x06B8BC90), UINT32_C(0x0DF8F6B8),
+ UINT32_C(0x06254BBB), UINT32_C(0x097E79D9), UINT32_C(0x01647386),
+ UINT32_C(0x04A91D1A), UINT32_C(0x0DEC9E2B), UINT32_C(0x050F293C),
+ UINT32_C(0x07BCAAD7), UINT32_C(0x033144D9), UINT32_C(0x0375C76F),
+ UINT32_C(0x040A093C), UINT32_C(0x05AE2C16), UINT32_C(0x09D68478),
+ UINT32_C(0x058317A3), UINT32_C(0x054221A3), UINT32_C(0x07B37554),
+ UINT32_C(0x00F4B46D) } },
+ { { UINT32_C(0x07CBE207), UINT32_C(0x04562796), UINT32_C(0x0A50CC3E),
+ UINT32_C(0x0757B0B9), UINT32_C(0x063D3D42), UINT32_C(0x07DC968C),
+ UINT32_C(0x079E2AB6), UINT32_C(0x0134DA35), UINT32_C(0x029E1396),
+ UINT32_C(0x0D6CCAE8), UINT32_C(0x0628B718), UINT32_C(0x0A64B12A),
+ UINT32_C(0x06E621D1), UINT32_C(0x0769A2A0), UINT32_C(0x0156D488),
+ UINT32_C(0x075BF157), UINT32_C(0x04304D45), UINT32_C(0x046B3C3C),
+ UINT32_C(0x05614E27) },
+ { UINT32_C(0x09AD2A4E), UINT32_C(0x020EA86B), UINT32_C(0x001E6875),
+ UINT32_C(0x055D2511), UINT32_C(0x01F5CDB0), UINT32_C(0x03D2AFF6),
+ UINT32_C(0x007FAB76), UINT32_C(0x0057AC84), UINT32_C(0x069E5756),
+ UINT32_C(0x0688DC1A), UINT32_C(0x0744C7BB), UINT32_C(0x0EDB2096),
+ UINT32_C(0x053B873A), UINT32_C(0x01844532), UINT32_C(0x07AE938E),
+ UINT32_C(0x055557A2), UINT32_C(0x0BE73E16), UINT32_C(0x0193515D),
+ UINT32_C(0x00A8B986) } },
+ { { UINT32_C(0x0A0CDB9A), UINT32_C(0x040E02DD), UINT32_C(0x035205D9),
+ UINT32_C(0x0049F499), UINT32_C(0x02140570), UINT32_C(0x02F8C644),
+ UINT32_C(0x068CD8D7), UINT32_C(0x0663DA1B), UINT32_C(0x05BC5332),
+ UINT32_C(0x022CA5E7), UINT32_C(0x058A9E53), UINT32_C(0x02550FBC),
+ UINT32_C(0x035F05E1), UINT32_C(0x076AEE3F), UINT32_C(0x0B4315CF),
+ UINT32_C(0x01A39573), UINT32_C(0x0BFEA8DE), UINT32_C(0x024B3FBD),
+ UINT32_C(0x0229D610) },
+ { UINT32_C(0x0E48C808), UINT32_C(0x0074F92C), UINT32_C(0x0336BAB1),
+ UINT32_C(0x001C7E90), UINT32_C(0x0CDB72B2), UINT32_C(0x06452A54),
+ UINT32_C(0x01C49198), UINT32_C(0x0B42A4AB), UINT32_C(0x048A90E8),
+ UINT32_C(0x03705637), UINT32_C(0x02BA9C17), UINT32_C(0x024FB4BA),
+ UINT32_C(0x00842F41), UINT32_C(0x01D6EAB3), UINT32_C(0x054FB229),
+ UINT32_C(0x00CA8770), UINT32_C(0x0253093A), UINT32_C(0x07F97744),
+ UINT32_C(0x025BECC0) } },
+ { { UINT32_C(0x02FBCDA7), UINT32_C(0x007848D3), UINT32_C(0x01DFF031),
+ UINT32_C(0x07601567), UINT32_C(0x0BA52FB0), UINT32_C(0x01E6AE23),
+ UINT32_C(0x01AA852F), UINT32_C(0x003C996A), UINT32_C(0x0445908E),
+ UINT32_C(0x070CC265), UINT32_C(0x0257D5EB), UINT32_C(0x08E13BB7),
+ UINT32_C(0x03786D30), UINT32_C(0x049FB9B6), UINT32_C(0x0924861A),
+ UINT32_C(0x0065D2B4), UINT32_C(0x0D5B39AF), UINT32_C(0x07309872),
+ UINT32_C(0x01F8FA63) },
+ { UINT32_C(0x022A71C9), UINT32_C(0x01A01FB0), UINT32_C(0x0FD3EE52),
+ UINT32_C(0x0555F222), UINT32_C(0x0F0D8667), UINT32_C(0x05472FEE),
+ UINT32_C(0x0136FEE9), UINT32_C(0x08BC763F), UINT32_C(0x03D5D583),
+ UINT32_C(0x0C425583), UINT32_C(0x04F5CB83), UINT32_C(0x071A71E9),
+ UINT32_C(0x061B5508), UINT32_C(0x0676A851), UINT32_C(0x03ED5A08),
+ UINT32_C(0x01926DAA), UINT32_C(0x0FDB5234), UINT32_C(0x056DAF03),
+ UINT32_C(0x0423B963) } },
+ { { UINT32_C(0x0CB8DB55), UINT32_C(0x02FE337B), UINT32_C(0x0F257BD3),
+ UINT32_C(0x02D303C7), UINT32_C(0x0C766E36), UINT32_C(0x0723F00C),
+ UINT32_C(0x03C3ADE8), UINT32_C(0x0BD00FFE), UINT32_C(0x01CCE27D),
+ UINT32_C(0x051C2372), UINT32_C(0x06A65BE2), UINT32_C(0x014B5A5E),
+ UINT32_C(0x042D0282), UINT32_C(0x05C7DE61), UINT32_C(0x06D4300F),
+ UINT32_C(0x0558FC54), UINT32_C(0x08CBE082), UINT32_C(0x03579724),
+ UINT32_C(0x01ADAB62) },
+ { UINT32_C(0x01475465), UINT32_C(0x0343480A), UINT32_C(0x057BB2AC),
+ UINT32_C(0x0219888D), UINT32_C(0x06491BF6), UINT32_C(0x00CB25B2),
+ UINT32_C(0x010A4711), UINT32_C(0x09470A80), UINT32_C(0x01062C89),
+ UINT32_C(0x00BDAAFD), UINT32_C(0x020D32E9), UINT32_C(0x02E92D88),
+ UINT32_C(0x026EB483), UINT32_C(0x06F824B5), UINT32_C(0x03EDBF63),
+ UINT32_C(0x0664D233), UINT32_C(0x023AD4F9), UINT32_C(0x04E2AE27),
+ UINT32_C(0x06D1A368) } },
+ { { UINT32_C(0x03110AE0), UINT32_C(0x07817A85), UINT32_C(0x034820ED),
+ UINT32_C(0x00855E1A), UINT32_C(0x003FE30C), UINT32_C(0x06D5A04E),
+ UINT32_C(0x06FA73CC), UINT32_C(0x04FE0287), UINT32_C(0x00A69E67),
+ UINT32_C(0x0A10B0EC), UINT32_C(0x049E4D24), UINT32_C(0x0ED35994),
+ UINT32_C(0x01A7E8AC), UINT32_C(0x04CF74F1), UINT32_C(0x0923906A),
+ UINT32_C(0x03874645), UINT32_C(0x0DB42741), UINT32_C(0x060FE261),
+ UINT32_C(0x06C0376D) },
+ { UINT32_C(0x00E64647), UINT32_C(0x039CB7C7), UINT32_C(0x0EABEA6B),
+ UINT32_C(0x02B29856), UINT32_C(0x00839A41), UINT32_C(0x07C5AB7D),
+ UINT32_C(0x0697B3AB), UINT32_C(0x06DD0BF0), UINT32_C(0x05A564EF),
+ UINT32_C(0x02647BF3), UINT32_C(0x05856454), UINT32_C(0x02A635A2),
+ UINT32_C(0x033DA644), UINT32_C(0x05BCCA9A), UINT32_C(0x0EDDD106),
+ UINT32_C(0x011D4E4A), UINT32_C(0x0AEDB782), UINT32_C(0x03AFB62C),
+ UINT32_C(0x0215A0FC) } },
+ { { UINT32_C(0x08D6A19B), UINT32_C(0x07F0B241), UINT32_C(0x077BC8F1),
+ UINT32_C(0x0063CE4B), UINT32_C(0x0C37FB3D), UINT32_C(0x075E9165),
+ UINT32_C(0x049192AB), UINT32_C(0x06266967), UINT32_C(0x03B30963),
+ UINT32_C(0x01CFE3F4), UINT32_C(0x059B66F2), UINT32_C(0x01FBFFC2),
+ UINT32_C(0x01D577D5), UINT32_C(0x022DBBF0), UINT32_C(0x05A1A072),
+ UINT32_C(0x07948C2D), UINT32_C(0x08690F81), UINT32_C(0x0490C833),
+ UINT32_C(0x02663733) },
+ { UINT32_C(0x0BFD0575), UINT32_C(0x0091A695), UINT32_C(0x07FC8952),
+ UINT32_C(0x0313D53F), UINT32_C(0x0DDFD693), UINT32_C(0x06458C70),
+ UINT32_C(0x058761CC), UINT32_C(0x02EB8CF9), UINT32_C(0x02D963FF),
+ UINT32_C(0x0AEE4EE7), UINT32_C(0x05DC6CA8), UINT32_C(0x0D2B3143),
+ UINT32_C(0x038ADEF3), UINT32_C(0x033E9457), UINT32_C(0x035B245D),
+ UINT32_C(0x01424975), UINT32_C(0x03DAB987), UINT32_C(0x00C4D404),
+ UINT32_C(0x04DF5768) } },
+ { { UINT32_C(0x03C8C9ED), UINT32_C(0x06F39969), UINT32_C(0x08DA5A85),
+ UINT32_C(0x02407274), UINT32_C(0x0D6CDEB2), UINT32_C(0x03B609F5),
+ UINT32_C(0x06CA4BF5), UINT32_C(0x0D62A309), UINT32_C(0x0257EAE4),
+ UINT32_C(0x0CFF528C), UINT32_C(0x07CEB388), UINT32_C(0x0A606548),
+ UINT32_C(0x030BB457), UINT32_C(0x01345DCC), UINT32_C(0x09ED3B10),
+ UINT32_C(0x04855085), UINT32_C(0x07A5F679), UINT32_C(0x00234E85),
+ UINT32_C(0x06872ECB) },
+ { UINT32_C(0x0CBA4DF5), UINT32_C(0x00BC43C9), UINT32_C(0x0996C3CC),
+ UINT32_C(0x01E2EC93), UINT32_C(0x0B15F26C), UINT32_C(0x05CB18FB),
+ UINT32_C(0x05F5A1D1), UINT32_C(0x0A483295), UINT32_C(0x0741A53D),
+ UINT32_C(0x0F4FEFBE), UINT32_C(0x052DED75), UINT32_C(0x09B06028),
+ UINT32_C(0x0671464F), UINT32_C(0x0741E002), UINT32_C(0x0E40CE62),
+ UINT32_C(0x012DA7C5), UINT32_C(0x067A9058), UINT32_C(0x07A9F1DD),
+ UINT32_C(0x04688275) } },
+ { { UINT32_C(0x02AF535C), UINT32_C(0x046A5ECE), UINT32_C(0x0CB00D43),
+ UINT32_C(0x063584D5), UINT32_C(0x0F881F87), UINT32_C(0x02697B14),
+ UINT32_C(0x074F1FC7), UINT32_C(0x0AF5B0AF), UINT32_C(0x06F83FC9),
+ UINT32_C(0x0A8A203E), UINT32_C(0x0469A19B), UINT32_C(0x0A092434),
+ UINT32_C(0x069E17EC), UINT32_C(0x0773D1CD), UINT32_C(0x0F547B8E),
+ UINT32_C(0x01CACEC5), UINT32_C(0x0B26EDB6), UINT32_C(0x03AE5202),
+ UINT32_C(0x06B82C9D) },
+ { UINT32_C(0x0FA0D000), UINT32_C(0x015C3536), UINT32_C(0x0470ADB0),
+ UINT32_C(0x008A151A), UINT32_C(0x030884ED), UINT32_C(0x06EC1F74),
+ UINT32_C(0x01E13D93), UINT32_C(0x0E97FCF4), UINT32_C(0x0043361E),
+ UINT32_C(0x05B81C21), UINT32_C(0x048F0898), UINT32_C(0x00CAD0C5),
+ UINT32_C(0x06243416), UINT32_C(0x03EBACFF), UINT32_C(0x0068471C),
+ UINT32_C(0x022858FC), UINT32_C(0x0A700CD1), UINT32_C(0x004BCA70),
+ UINT32_C(0x03CB25EA) } },
+ { { UINT32_C(0x0F70ACE0), UINT32_C(0x00C2460B), UINT32_C(0x0A7F627F),
+ UINT32_C(0x01D6384B), UINT32_C(0x0C9F9078), UINT32_C(0x02A9923F),
+ UINT32_C(0x02B743F1), UINT32_C(0x0C36EE4D), UINT32_C(0x01856917),
+ UINT32_C(0x03329552), UINT32_C(0x05918A93), UINT32_C(0x0EC471DC),
+ UINT32_C(0x01946C41), UINT32_C(0x00039881), UINT32_C(0x05DFF9D2),
+ UINT32_C(0x05874A6F), UINT32_C(0x04306946), UINT32_C(0x05AB8B53),
+ UINT32_C(0x0553A131) },
+ { UINT32_C(0x04C78230), UINT32_C(0x025BCE40), UINT32_C(0x0CD6DA86),
+ UINT32_C(0x054A8CE5), UINT32_C(0x0BD7BB78), UINT32_C(0x029A965C),
+ UINT32_C(0x068F11B8), UINT32_C(0x02FBC1A0), UINT32_C(0x06354357),
+ UINT32_C(0x0CCD4DBD), UINT32_C(0x051102A2), UINT32_C(0x031FD9B0),
+ UINT32_C(0x02C008A8), UINT32_C(0x00AD491F), UINT32_C(0x0BB60D3F),
+ UINT32_C(0x02A28F80), UINT32_C(0x008E75C4), UINT32_C(0x0522E322),
+ UINT32_C(0x03343F73) } },
+ { { UINT32_C(0x0002D68B), UINT32_C(0x07643017), UINT32_C(0x088AD06A),
+ UINT32_C(0x0408925D), UINT32_C(0x08F2C855), UINT32_C(0x036834C5),
+ UINT32_C(0x0289A9D7), UINT32_C(0x0719D483), UINT32_C(0x032123DA),
+ UINT32_C(0x0B0A9B01), UINT32_C(0x0230FC26), UINT32_C(0x08B0CFCD),
+ UINT32_C(0x074393E1), UINT32_C(0x0439CA9A), UINT32_C(0x089E646F),
+ UINT32_C(0x024D4EB8), UINT32_C(0x036D4EC5), UINT32_C(0x03F0431F),
+ UINT32_C(0x0580DCFB) },
+ { UINT32_C(0x0D90B740), UINT32_C(0x066AECA5), UINT32_C(0x0B5967E7),
+ UINT32_C(0x07CE13A8), UINT32_C(0x0CB918FF), UINT32_C(0x052A2ED5),
+ UINT32_C(0x009DC3A7), UINT32_C(0x092EBC54), UINT32_C(0x07A491ED),
+ UINT32_C(0x0644023D), UINT32_C(0x06F1C343), UINT32_C(0x0EED295B),
+ UINT32_C(0x0173D4B0), UINT32_C(0x04FE8C9E), UINT32_C(0x0C06A3FA),
+ UINT32_C(0x0028401A), UINT32_C(0x0FC38BCB), UINT32_C(0x020029B9),
+ UINT32_C(0x03C565C1) } },
+ { { UINT32_C(0x0EDA25DC), UINT32_C(0x03927618), UINT32_C(0x0EDB2C58),
+ UINT32_C(0x00B2BAA3), UINT32_C(0x0E7BCCF6), UINT32_C(0x03A11FFE),
+ UINT32_C(0x02001D5C), UINT32_C(0x076D7291), UINT32_C(0x029BC068),
+ UINT32_C(0x094260B9), UINT32_C(0x0671EECC), UINT32_C(0x07B0A2FB),
+ UINT32_C(0x047A1899), UINT32_C(0x07CFA289), UINT32_C(0x065A085F),
+ UINT32_C(0x041FBFCB), UINT32_C(0x0050FB67), UINT32_C(0x02D9296D),
+ UINT32_C(0x05D31913) },
+ { UINT32_C(0x021A0C30), UINT32_C(0x07BBBC48), UINT32_C(0x077F7A30),
+ UINT32_C(0x024F84DD), UINT32_C(0x00FC19E6), UINT32_C(0x035C1B4C),
+ UINT32_C(0x02861399), UINT32_C(0x0CE0D90B), UINT32_C(0x00E21952),
+ UINT32_C(0x0A696F7C), UINT32_C(0x03D6F2B5), UINT32_C(0x07F2D73D),
+ UINT32_C(0x03F2D910), UINT32_C(0x00119F7C), UINT32_C(0x01B7B782),
+ UINT32_C(0x02CC95B4), UINT32_C(0x033CD00B), UINT32_C(0x005F0FE8),
+ UINT32_C(0x046BCE9F) } },
+ { { UINT32_C(0x016A8803), UINT32_C(0x057D0E0C), UINT32_C(0x04902444),
+ UINT32_C(0x06BC911C), UINT32_C(0x0C88373E), UINT32_C(0x0302735A),
+ UINT32_C(0x07E0A60D), UINT32_C(0x04C9D429), UINT32_C(0x05543A90),
+ UINT32_C(0x0EE4D9AC), UINT32_C(0x050794BC), UINT32_C(0x0985C982),
+ UINT32_C(0x0595F0A9), UINT32_C(0x05ABA2C4), UINT32_C(0x07307B7D),
+ UINT32_C(0x06A58CDB), UINT32_C(0x08CC2A00), UINT32_C(0x019E61E1),
+ UINT32_C(0x0363A648) },
+ { UINT32_C(0x09792D19), UINT32_C(0x04677C73), UINT32_C(0x08631594),
+ UINT32_C(0x032F8F6A), UINT32_C(0x098EA86F), UINT32_C(0x032B9330),
+ UINT32_C(0x009CD434), UINT32_C(0x04D14790), UINT32_C(0x06B8C324),
+ UINT32_C(0x035461EE), UINT32_C(0x06E597DA), UINT32_C(0x00182BBE),
+ UINT32_C(0x04A3C432), UINT32_C(0x045AA031), UINT32_C(0x014A30EC),
+ UINT32_C(0x009C13A2), UINT32_C(0x0C730FBE), UINT32_C(0x06A8A94C),
+ UINT32_C(0x049EC08E) } },
+ },
+ {
+ { { UINT32_C(0x043C6A8B), UINT32_C(0x069E114E), UINT32_C(0x02D17119),
+ UINT32_C(0x07161008), UINT32_C(0x04253BA7), UINT32_C(0x06D7E9D1),
+ UINT32_C(0x07AFFFEA), UINT32_C(0x0C20088E), UINT32_C(0x009D84CD),
+ UINT32_C(0x094B5A8B), UINT32_C(0x070C9B19), UINT32_C(0x0A140336),
+ UINT32_C(0x059D32DC), UINT32_C(0x07D5C770), UINT32_C(0x0B702098),
+ UINT32_C(0x0646FC6A), UINT32_C(0x06312DAB), UINT32_C(0x05DEF39B),
+ UINT32_C(0x07B32BAC) },
+ { UINT32_C(0x06B04438), UINT32_C(0x0086BBC2), UINT32_C(0x0CE331EB),
+ UINT32_C(0x07A1DB2A), UINT32_C(0x04798584), UINT32_C(0x0632A66E),
+ UINT32_C(0x03A4F5AE), UINT32_C(0x03B41996), UINT32_C(0x061944D5),
+ UINT32_C(0x0E8ECAB0), UINT32_C(0x00E38A9B), UINT32_C(0x0BBF7088),
+ UINT32_C(0x022E1052), UINT32_C(0x00FB1445), UINT32_C(0x0FF1C5EA),
+ UINT32_C(0x034DB2F7), UINT32_C(0x04C560D6), UINT32_C(0x050E7FEA),
+ UINT32_C(0x00B97B7C) } },
+ { { UINT32_C(0x004ED5E3), UINT32_C(0x012DA268), UINT32_C(0x08C92EF3),
+ UINT32_C(0x06F60BF9), UINT32_C(0x0656B119), UINT32_C(0x014823AF),
+ UINT32_C(0x058D04AC), UINT32_C(0x099D3419), UINT32_C(0x00CFAE71),
+ UINT32_C(0x0B423A38), UINT32_C(0x05EA80E2), UINT32_C(0x06C1F218),
+ UINT32_C(0x03E72AD5), UINT32_C(0x0691F49A), UINT32_C(0x04310FAB),
+ UINT32_C(0x05D250AD), UINT32_C(0x084D7BFA), UINT32_C(0x070595DE),
+ UINT32_C(0x017825D9) },
+ { UINT32_C(0x0A7D5B37), UINT32_C(0x00B0A7A2), UINT32_C(0x0ED3BDEF),
+ UINT32_C(0x02B29FDB), UINT32_C(0x085BCC71), UINT32_C(0x0455FDD9),
+ UINT32_C(0x0595CF1F), UINT32_C(0x0040CCA6), UINT32_C(0x04FA2F23),
+ UINT32_C(0x04A05DD3), UINT32_C(0x07E18B4E), UINT32_C(0x045A2A46),
+ UINT32_C(0x058F2043), UINT32_C(0x038FC52D), UINT32_C(0x0A7666DC),
+ UINT32_C(0x0701CE42), UINT32_C(0x04B38B92), UINT32_C(0x01AD842D),
+ UINT32_C(0x07A0B6A0) } },
+ { { UINT32_C(0x029D2024), UINT32_C(0x0728395A), UINT32_C(0x04DB516D),
+ UINT32_C(0x0504C2CE), UINT32_C(0x03C5DEB1), UINT32_C(0x041CFF48),
+ UINT32_C(0x014AE223), UINT32_C(0x0856531F), UINT32_C(0x02EC3F65),
+ UINT32_C(0x0A46F536), UINT32_C(0x04ECB2AA), UINT32_C(0x0FB7289E),
+ UINT32_C(0x03DE9EFF), UINT32_C(0x0724BAA3), UINT32_C(0x0508D541),
+ UINT32_C(0x051B73BA), UINT32_C(0x0B38749E), UINT32_C(0x044097DF),
+ UINT32_C(0x00E5AC8E) },
+ { UINT32_C(0x0DDD93A9), UINT32_C(0x04295052), UINT32_C(0x0E03B84C),
+ UINT32_C(0x00B38799), UINT32_C(0x037F6A48), UINT32_C(0x07614753),
+ UINT32_C(0x05765258), UINT32_C(0x0E0CA450), UINT32_C(0x07CFB537),
+ UINT32_C(0x07342BEF), UINT32_C(0x05C319BB), UINT32_C(0x04F3A1F5),
+ UINT32_C(0x04762545), UINT32_C(0x0589360C), UINT32_C(0x0E5A46C8),
+ UINT32_C(0x02744137), UINT32_C(0x05E9E991), UINT32_C(0x01523BC2),
+ UINT32_C(0x062CDAB6) } },
+ { { UINT32_C(0x090E92D6), UINT32_C(0x00FA75A5), UINT32_C(0x040D6969),
+ UINT32_C(0x011D7DDB), UINT32_C(0x0B02AC62), UINT32_C(0x07679C7F),
+ UINT32_C(0x07FD8A06), UINT32_C(0x0A623D2A), UINT32_C(0x034C8ED2),
+ UINT32_C(0x07FB351F), UINT32_C(0x008857BA), UINT32_C(0x09AD9171),
+ UINT32_C(0x03CB7A5B), UINT32_C(0x01A56DB4), UINT32_C(0x09225D29),
+ UINT32_C(0x07819EC5), UINT32_C(0x0645D37A), UINT32_C(0x0618AED1),
+ UINT32_C(0x053A82A2) },
+ { UINT32_C(0x0662F537), UINT32_C(0x00AB8407), UINT32_C(0x0FF98DF8),
+ UINT32_C(0x03C0F116), UINT32_C(0x0C87DD6F), UINT32_C(0x00995A87),
+ UINT32_C(0x036E7BF1), UINT32_C(0x0318B15E), UINT32_C(0x01116415),
+ UINT32_C(0x00A53CD8), UINT32_C(0x0237AEF5), UINT32_C(0x065DCC5D),
+ UINT32_C(0x048F2118), UINT32_C(0x011F3E13), UINT32_C(0x0AD27061),
+ UINT32_C(0x02B7B666), UINT32_C(0x01CB618D), UINT32_C(0x02EC555A),
+ UINT32_C(0x058DF8C5) } },
+ { { UINT32_C(0x0B9839DA), UINT32_C(0x0047D336), UINT32_C(0x09E93377),
+ UINT32_C(0x00074C09), UINT32_C(0x08B5F722), UINT32_C(0x06A0986D),
+ UINT32_C(0x03ABD41C), UINT32_C(0x057C1CAA), UINT32_C(0x02B2ACCA),
+ UINT32_C(0x0FC9B996), UINT32_C(0x05488187), UINT32_C(0x07861011),
+ UINT32_C(0x0163907B), UINT32_C(0x07F6DAF7), UINT32_C(0x0363BC0E),
+ UINT32_C(0x058EF00F), UINT32_C(0x05446B66), UINT32_C(0x0514AA79),
+ UINT32_C(0x04A03953) },
+ { UINT32_C(0x0C1962CE), UINT32_C(0x06493BB1), UINT32_C(0x086D6126),
+ UINT32_C(0x00FCE569), UINT32_C(0x0DC92336), UINT32_C(0x015B8163),
+ UINT32_C(0x0432A31C), UINT32_C(0x0133A6EE), UINT32_C(0x0578D7AF),
+ UINT32_C(0x0840A2D3), UINT32_C(0x064C1FC2), UINT32_C(0x085837C8),
+ UINT32_C(0x0641237D), UINT32_C(0x054AF205), UINT32_C(0x0657C4E2),
+ UINT32_C(0x04B8B1E0), UINT32_C(0x00272237), UINT32_C(0x05B53E59),
+ UINT32_C(0x001FEA03) } },
+ { { UINT32_C(0x0D2BF9A7), UINT32_C(0x01A65815), UINT32_C(0x06FC3341),
+ UINT32_C(0x065823F4), UINT32_C(0x01599DE7), UINT32_C(0x070CA981),
+ UINT32_C(0x067E13C8), UINT32_C(0x009A9A6A), UINT32_C(0x0229B72F),
+ UINT32_C(0x09B1BC4A), UINT32_C(0x06BCE69A), UINT32_C(0x0FA69B0D),
+ UINT32_C(0x078B83C0), UINT32_C(0x06E62A5C), UINT32_C(0x021D206C),
+ UINT32_C(0x04E0CE16), UINT32_C(0x0F728EF3), UINT32_C(0x0453D52E),
+ UINT32_C(0x01844B54) },
+ { UINT32_C(0x020C30CB), UINT32_C(0x04E85BEE), UINT32_C(0x095E4EAF),
+ UINT32_C(0x075E0168), UINT32_C(0x039C14AF), UINT32_C(0x0370EA5A),
+ UINT32_C(0x05B0F157), UINT32_C(0x02E11B96), UINT32_C(0x042E3824),
+ UINT32_C(0x0D5DC5BB), UINT32_C(0x00451C96), UINT32_C(0x0E911392),
+ UINT32_C(0x0724269B), UINT32_C(0x04003692), UINT32_C(0x076FEA68),
+ UINT32_C(0x033CBDE1), UINT32_C(0x0417AF7D), UINT32_C(0x00B9592D),
+ UINT32_C(0x027FA0B4) } },
+ { { UINT32_C(0x0B2E6D92), UINT32_C(0x06E8F69A), UINT32_C(0x0DCD1AA5),
+ UINT32_C(0x01FB27B9), UINT32_C(0x04974F21), UINT32_C(0x027768BA),
+ UINT32_C(0x02769E05), UINT32_C(0x08C4A5CC), UINT32_C(0x047AF64B),
+ UINT32_C(0x08B89BB2), UINT32_C(0x02ED5662), UINT32_C(0x03939461),
+ UINT32_C(0x01F7401B), UINT32_C(0x06FDF357), UINT32_C(0x019C98D9),
+ UINT32_C(0x07B1E9DD), UINT32_C(0x075DC034), UINT32_C(0x01E0054F),
+ UINT32_C(0x02A2F727) },
+ { UINT32_C(0x0EB71C5F), UINT32_C(0x023BF702), UINT32_C(0x02236711),
+ UINT32_C(0x012F6D73), UINT32_C(0x0CA22E0A), UINT32_C(0x02359757),
+ UINT32_C(0x0157DA08), UINT32_C(0x05CB0525), UINT32_C(0x0102CBFE),
+ UINT32_C(0x0854B694), UINT32_C(0x07F9F306), UINT32_C(0x0A6E3855),
+ UINT32_C(0x024CCD83), UINT32_C(0x0220CC0E), UINT32_C(0x0AAD6848),
+ UINT32_C(0x0783A366), UINT32_C(0x0B9AD104), UINT32_C(0x02844B14),
+ UINT32_C(0x07B5BC13) } },
+ { { UINT32_C(0x01490429), UINT32_C(0x07C3B47C), UINT32_C(0x0DB7A58B),
+ UINT32_C(0x04D10D93), UINT32_C(0x08CA405B), UINT32_C(0x07FD087B),
+ UINT32_C(0x07C88AC9), UINT32_C(0x07D54451), UINT32_C(0x07010F32),
+ UINT32_C(0x06D62976), UINT32_C(0x03752EE7), UINT32_C(0x0A2326FD),
+ UINT32_C(0x00445040), UINT32_C(0x03605DB9), UINT32_C(0x03194920),
+ UINT32_C(0x01F8F0DF), UINT32_C(0x0F321EF5), UINT32_C(0x0297EC47),
+ UINT32_C(0x05C97D9A) },
+ { UINT32_C(0x087CA374), UINT32_C(0x04D9BD85), UINT32_C(0x09E4C1E2),
+ UINT32_C(0x05C6B60F), UINT32_C(0x03338BE0), UINT32_C(0x06C38E9F),
+ UINT32_C(0x030527CA), UINT32_C(0x0F28850A), UINT32_C(0x039421C7),
+ UINT32_C(0x02DE48C5), UINT32_C(0x0652719F), UINT32_C(0x097E2E6B),
+ UINT32_C(0x0758DD1C), UINT32_C(0x06788A64), UINT32_C(0x01CDEC4A),
+ UINT32_C(0x0314A216), UINT32_C(0x022EE734), UINT32_C(0x023BD455),
+ UINT32_C(0x05EC7716) } },
+ { { UINT32_C(0x03ACF0F9), UINT32_C(0x0203D95A), UINT32_C(0x0286435B),
+ UINT32_C(0x01818DC4), UINT32_C(0x02821B92), UINT32_C(0x06AE5102),
+ UINT32_C(0x07066934), UINT32_C(0x07BC9150), UINT32_C(0x07BA5607),
+ UINT32_C(0x0EC5981C), UINT32_C(0x04C69569), UINT32_C(0x03CC0C2A),
+ UINT32_C(0x07DA94A0), UINT32_C(0x07E65511), UINT32_C(0x086234FB),
+ UINT32_C(0x05407465), UINT32_C(0x0F825CD7), UINT32_C(0x03F370CC),
+ UINT32_C(0x00DC963A) },
+ { UINT32_C(0x09436D81), UINT32_C(0x04465793), UINT32_C(0x041DBE76),
+ UINT32_C(0x0384C090), UINT32_C(0x005C5350), UINT32_C(0x07296D6A),
+ UINT32_C(0x04712C6D), UINT32_C(0x0B8974CF), UINT32_C(0x07A230E5),
+ UINT32_C(0x0CBF52A8), UINT32_C(0x016C1814), UINT32_C(0x06EDC3F7),
+ UINT32_C(0x0627F679), UINT32_C(0x0750029A), UINT32_C(0x06E2AA55),
+ UINT32_C(0x0245FF68), UINT32_C(0x0F8F41C6), UINT32_C(0x00A2BB27),
+ UINT32_C(0x052BDC1F) } },
+ { { UINT32_C(0x06C8D427), UINT32_C(0x0648C043), UINT32_C(0x045E9C01),
+ UINT32_C(0x042CC909), UINT32_C(0x089A90AA), UINT32_C(0x007114E3),
+ UINT32_C(0x0085B7C3), UINT32_C(0x0B9DE134), UINT32_C(0x06B0A9E9),
+ UINT32_C(0x0AAAEBCC), UINT32_C(0x0092A52A), UINT32_C(0x0D6E2713),
+ UINT32_C(0x05857362), UINT32_C(0x0118376C), UINT32_C(0x000A08F8),
+ UINT32_C(0x003DE32F), UINT32_C(0x0E3FE6ED), UINT32_C(0x06CFB412),
+ UINT32_C(0x043D1662) },
+ { UINT32_C(0x0D400463), UINT32_C(0x0448C05A), UINT32_C(0x0AE67E6E),
+ UINT32_C(0x059369CB), UINT32_C(0x0A23C77C), UINT32_C(0x06E7F666),
+ UINT32_C(0x05BB8233), UINT32_C(0x095E95B6), UINT32_C(0x0284C07C),
+ UINT32_C(0x0F6C7097), UINT32_C(0x0443F5D5), UINT32_C(0x0301FE7F),
+ UINT32_C(0x023010C9), UINT32_C(0x009D2363), UINT32_C(0x07BD65C2),
+ UINT32_C(0x07E297A0), UINT32_C(0x034DDA50), UINT32_C(0x07ADC7E7),
+ UINT32_C(0x03060E2B) } },
+ { { UINT32_C(0x0924C15F), UINT32_C(0x04E07505), UINT32_C(0x08D0DCCF),
+ UINT32_C(0x01D04769), UINT32_C(0x02E2E204), UINT32_C(0x0713097A),
+ UINT32_C(0x07E9B59C), UINT32_C(0x07FDCF7A), UINT32_C(0x03E60E03),
+ UINT32_C(0x0423C6CD), UINT32_C(0x06A163F7), UINT32_C(0x07C0FA8B),
+ UINT32_C(0x01341D2B), UINT32_C(0x06745C51), UINT32_C(0x03C9DE3A),
+ UINT32_C(0x06D6D6F5), UINT32_C(0x0F5AF83F), UINT32_C(0x02698DEF),
+ UINT32_C(0x06091F29) },
+ { UINT32_C(0x0DBEEE78), UINT32_C(0x060A02B3), UINT32_C(0x0558AE6B),
+ UINT32_C(0x07100333), UINT32_C(0x0A312381), UINT32_C(0x02FA9A13),
+ UINT32_C(0x06D1C0C3), UINT32_C(0x0C625336), UINT32_C(0x03B853CF),
+ UINT32_C(0x08B3BE37), UINT32_C(0x0104E5D9), UINT32_C(0x053B9B53),
+ UINT32_C(0x02A2D06C), UINT32_C(0x01CDC864), UINT32_C(0x0F04A867),
+ UINT32_C(0x07663226), UINT32_C(0x0FD6C54B), UINT32_C(0x040943C5),
+ UINT32_C(0x03C04D10) } },
+ { { UINT32_C(0x090F8C80), UINT32_C(0x0582A686), UINT32_C(0x0BA42ED6),
+ UINT32_C(0x070A8F1E), UINT32_C(0x0AB02D12), UINT32_C(0x01EB5C3D),
+ UINT32_C(0x07479B29), UINT32_C(0x04D72C41), UINT32_C(0x0362562E),
+ UINT32_C(0x06FAF4FC), UINT32_C(0x033FED54), UINT32_C(0x0229578C),
+ UINT32_C(0x005B4CFB), UINT32_C(0x03BA05BF), UINT32_C(0x0B4A3FBC),
+ UINT32_C(0x07DBD5D5), UINT32_C(0x05E8639D), UINT32_C(0x07D5867F),
+ UINT32_C(0x027FE947) },
+ { UINT32_C(0x01982847), UINT32_C(0x008A8D79), UINT32_C(0x0B215B64),
+ UINT32_C(0x06EDECCB), UINT32_C(0x045309BE), UINT32_C(0x055465DE),
+ UINT32_C(0x0426ED2E), UINT32_C(0x0D49D672), UINT32_C(0x01000B74),
+ UINT32_C(0x01206E3C), UINT32_C(0x061A0CA8), UINT32_C(0x020BEC03),
+ UINT32_C(0x02104AC7), UINT32_C(0x03FB64AC), UINT32_C(0x097C06BE),
+ UINT32_C(0x05DF7C1D), UINT32_C(0x0EFD23AB), UINT32_C(0x042BC8D8),
+ UINT32_C(0x02A649D7) } },
+ { { UINT32_C(0x0643409F), UINT32_C(0x06A50E0A), UINT32_C(0x00C269C2),
+ UINT32_C(0x0130B8C0), UINT32_C(0x0B25EAD2), UINT32_C(0x07A4A516),
+ UINT32_C(0x0375B082), UINT32_C(0x0E197F8C), UINT32_C(0x0546B686),
+ UINT32_C(0x0B8287C5), UINT32_C(0x04A367C1), UINT32_C(0x07DF58A1),
+ UINT32_C(0x05B7DD15), UINT32_C(0x061763FD), UINT32_C(0x0E2DF8E8),
+ UINT32_C(0x05ABFC51), UINT32_C(0x087018C8), UINT32_C(0x05935143),
+ UINT32_C(0x05E9EFA4) },
+ { UINT32_C(0x0AF2F29D), UINT32_C(0x0063F9B1), UINT32_C(0x0FB11A34),
+ UINT32_C(0x02D7C22E), UINT32_C(0x08AF67E7), UINT32_C(0x005AC16C),
+ UINT32_C(0x047EE080), UINT32_C(0x0B7677A2), UINT32_C(0x04500DDC),
+ UINT32_C(0x0137CD80), UINT32_C(0x01CF2369), UINT32_C(0x0DE177B8),
+ UINT32_C(0x018122DE), UINT32_C(0x00EDFC0C), UINT32_C(0x0048B9ED),
+ UINT32_C(0x043633B7), UINT32_C(0x0666D33E), UINT32_C(0x00317E10),
+ UINT32_C(0x066100C3) } },
+ { { UINT32_C(0x037B93A2), UINT32_C(0x07917621), UINT32_C(0x048F411C),
+ UINT32_C(0x04EF1E2A), UINT32_C(0x0FC8F91F), UINT32_C(0x04090E1D),
+ UINT32_C(0x066F78F2), UINT32_C(0x0C2C0207), UINT32_C(0x065E2513),
+ UINT32_C(0x0F03BADB), UINT32_C(0x03689AF4), UINT32_C(0x0FE959E2),
+ UINT32_C(0x028B6A5E), UINT32_C(0x0101C577), UINT32_C(0x0C3A5192),
+ UINT32_C(0x03042F53), UINT32_C(0x0E2A6A29), UINT32_C(0x0231095D),
+ UINT32_C(0x06E29445) },
+ { UINT32_C(0x07A00331), UINT32_C(0x041D85F7), UINT32_C(0x0D189E24),
+ UINT32_C(0x0294578C), UINT32_C(0x04A9E7A3), UINT32_C(0x037F260A),
+ UINT32_C(0x060D62BB), UINT32_C(0x07AED3DE), UINT32_C(0x0727FEAB),
+ UINT32_C(0x0283C99C), UINT32_C(0x05A11B56), UINT32_C(0x08953348),
+ UINT32_C(0x01A388E1), UINT32_C(0x028932F2), UINT32_C(0x0AFFD5A7),
+ UINT32_C(0x042CF6C6), UINT32_C(0x072339BA), UINT32_C(0x06344724),
+ UINT32_C(0x0395F757) } },
+ { { UINT32_C(0x01328CE4), UINT32_C(0x01D69A89), UINT32_C(0x03D3B2E3),
+ UINT32_C(0x0780829F), UINT32_C(0x0848A488), UINT32_C(0x057B85BD),
+ UINT32_C(0x02051385), UINT32_C(0x06706AD6), UINT32_C(0x02D6482A),
+ UINT32_C(0x0A8717D0), UINT32_C(0x05383AC5), UINT32_C(0x03250B87),
+ UINT32_C(0x05C77D8D), UINT32_C(0x05198B6D), UINT32_C(0x03FACF90),
+ UINT32_C(0x062058A1), UINT32_C(0x008F96B1), UINT32_C(0x01F29CAF),
+ UINT32_C(0x00358EC7) },
+ { UINT32_C(0x0B620A88), UINT32_C(0x06288694), UINT32_C(0x05B21FAC),
+ UINT32_C(0x03F64B44), UINT32_C(0x0DBD251D), UINT32_C(0x06B0D130),
+ UINT32_C(0x04314394), UINT32_C(0x02479C97), UINT32_C(0x003417DF),
+ UINT32_C(0x0318B1D4), UINT32_C(0x0762DFD7), UINT32_C(0x0DDA6BF1),
+ UINT32_C(0x0214A508), UINT32_C(0x0231DEBD), UINT32_C(0x0D8733B2),
+ UINT32_C(0x02ACA66C), UINT32_C(0x05C275E4), UINT32_C(0x07A8A625),
+ UINT32_C(0x001D2426) } },
+ { { UINT32_C(0x0C95FF29), UINT32_C(0x0608C2C5), UINT32_C(0x0404108F),
+ UINT32_C(0x03383226), UINT32_C(0x07F8CE0C), UINT32_C(0x0600859C),
+ UINT32_C(0x04899A96), UINT32_C(0x00CCD8EA), UINT32_C(0x02796E7C),
+ UINT32_C(0x0FB706CC), UINT32_C(0x0111E6FC), UINT32_C(0x027E2706),
+ UINT32_C(0x03EBDDF3), UINT32_C(0x02838065), UINT32_C(0x0585FBC0),
+ UINT32_C(0x07572ED5), UINT32_C(0x0907E1E4), UINT32_C(0x017E67B8),
+ UINT32_C(0x041786F0) },
+ { UINT32_C(0x04519732), UINT32_C(0x073D0689), UINT32_C(0x0DF32FF7),
+ UINT32_C(0x01246800), UINT32_C(0x068478E9), UINT32_C(0x031DEA3C),
+ UINT32_C(0x03E71E8F), UINT32_C(0x08C6C89E), UINT32_C(0x012CDD96),
+ UINT32_C(0x0AEEE8F4), UINT32_C(0x0121A9C4), UINT32_C(0x01F73DAA),
+ UINT32_C(0x033160E0), UINT32_C(0x062B3F6E), UINT32_C(0x081E3B9C),
+ UINT32_C(0x029ED0A5), UINT32_C(0x05F0DBFB), UINT32_C(0x0765E7EB),
+ UINT32_C(0x06026E18) } },
+ },
+ {
+ { { UINT32_C(0x0ED2EB86), UINT32_C(0x073B24CD), UINT32_C(0x01308B7E),
+ UINT32_C(0x001667D5), UINT32_C(0x06D840A4), UINT32_C(0x01CE15F3),
+ UINT32_C(0x00EC4628), UINT32_C(0x0BE255D7), UINT32_C(0x039A76B9),
+ UINT32_C(0x0CA76752), UINT32_C(0x02EA45FE), UINT32_C(0x0CB0A354),
+ UINT32_C(0x019D90B7), UINT32_C(0x036C0B82), UINT32_C(0x07E353B2),
+ UINT32_C(0x00B45E15), UINT32_C(0x0E1E3229), UINT32_C(0x06EED669),
+ UINT32_C(0x07975597) },
+ { UINT32_C(0x04B5DE1E), UINT32_C(0x05185A2C), UINT32_C(0x0F1C1594),
+ UINT32_C(0x01D7FD5B), UINT32_C(0x0CD949EB), UINT32_C(0x02E191E5),
+ UINT32_C(0x03295CCA), UINT32_C(0x02F97A05), UINT32_C(0x052209AD),
+ UINT32_C(0x0C0AF1C4), UINT32_C(0x07F93AD2), UINT32_C(0x060F26C1),
+ UINT32_C(0x0274993E), UINT32_C(0x023CDD4A), UINT32_C(0x08D9B938),
+ UINT32_C(0x00D32B5E), UINT32_C(0x04FE5190), UINT32_C(0x01AB014D),
+ UINT32_C(0x05DD64A0) } },
+ { { UINT32_C(0x0C2CA70B), UINT32_C(0x0346AE90), UINT32_C(0x0F8387AC),
+ UINT32_C(0x03ABE62A), UINT32_C(0x029DA053), UINT32_C(0x0041F61B),
+ UINT32_C(0x02CBC0BF), UINT32_C(0x05243AE9), UINT32_C(0x0360C16B),
+ UINT32_C(0x0C28A299), UINT32_C(0x0795D938), UINT32_C(0x02AC475A),
+ UINT32_C(0x0113BEAF), UINT32_C(0x05A671E6), UINT32_C(0x05C8C591),
+ UINT32_C(0x06924739), UINT32_C(0x02A54EEF), UINT32_C(0x02F274E3),
+ UINT32_C(0x0049A1CD) },
+ { UINT32_C(0x0426994D), UINT32_C(0x07F97B31), UINT32_C(0x0DA0C788),
+ UINT32_C(0x04B6F8C8), UINT32_C(0x05463D1A), UINT32_C(0x07C155D5),
+ UINT32_C(0x00BA793E), UINT32_C(0x0AB08953), UINT32_C(0x042C3976),
+ UINT32_C(0x069C681F), UINT32_C(0x02ABCC5A), UINT32_C(0x024C8F72),
+ UINT32_C(0x067DF148), UINT32_C(0x0180DD65), UINT32_C(0x042A4819),
+ UINT32_C(0x01AFAAD4), UINT32_C(0x0334701F), UINT32_C(0x031ADC33),
+ UINT32_C(0x03AA0140) } },
+ { { UINT32_C(0x0BEE1F7B), UINT32_C(0x07EA5E6A), UINT32_C(0x06C716A1),
+ UINT32_C(0x01C6DCD9), UINT32_C(0x00C62805), UINT32_C(0x06E99086),
+ UINT32_C(0x047E4182), UINT32_C(0x04E699EA), UINT32_C(0x017F98AF),
+ UINT32_C(0x0C64E476), UINT32_C(0x0464A2AE), UINT32_C(0x0AF646E7),
+ UINT32_C(0x0734C8DA), UINT32_C(0x069B3D13), UINT32_C(0x0BD58EFB),
+ UINT32_C(0x0572D3C4), UINT32_C(0x0889BAF4), UINT32_C(0x049A880A),
+ UINT32_C(0x01790356) },
+ { UINT32_C(0x0D71A4FA), UINT32_C(0x017475C1), UINT32_C(0x0B53C845),
+ UINT32_C(0x00ED5EC3), UINT32_C(0x072B9DBC), UINT32_C(0x032C8366),
+ UINT32_C(0x02B3D21C), UINT32_C(0x0E8E8016), UINT32_C(0x04B6FF58),
+ UINT32_C(0x017276EC), UINT32_C(0x069855EF), UINT32_C(0x0342CFC2),
+ UINT32_C(0x00D109A0), UINT32_C(0x07614A72), UINT32_C(0x09DC301B),
+ UINT32_C(0x036B57F5), UINT32_C(0x06CB91C2), UINT32_C(0x03E8DF1A),
+ UINT32_C(0x070FD727) } },
+ { { UINT32_C(0x032574BE), UINT32_C(0x04115A04), UINT32_C(0x0F98172F),
+ UINT32_C(0x04AEDED0), UINT32_C(0x02519CD4), UINT32_C(0x05A01A73),
+ UINT32_C(0x06EEA282), UINT32_C(0x0BBAAC38), UINT32_C(0x02CC4028),
+ UINT32_C(0x03AACD20), UINT32_C(0x01A067DD), UINT32_C(0x0AFED584),
+ UINT32_C(0x06846B34), UINT32_C(0x01F4D8B2), UINT32_C(0x00AB5080),
+ UINT32_C(0x02EFB0FB), UINT32_C(0x09F1C68E), UINT32_C(0x01829F05),
+ UINT32_C(0x008F3C67) },
+ { UINT32_C(0x062EC0F0), UINT32_C(0x04CAAFE4), UINT32_C(0x08147733),
+ UINT32_C(0x038A422E), UINT32_C(0x0085656E), UINT32_C(0x02D1FFD4),
+ UINT32_C(0x0731016E), UINT32_C(0x022AA6C1), UINT32_C(0x04385C24),
+ UINT32_C(0x06B4D30A), UINT32_C(0x04FF86E3), UINT32_C(0x0540E9AE),
+ UINT32_C(0x039185FE), UINT32_C(0x0278D41E), UINT32_C(0x05EEE86F),
+ UINT32_C(0x05D399FD), UINT32_C(0x07D5B982), UINT32_C(0x0364A589),
+ UINT32_C(0x07E1654F) } },
+ { { UINT32_C(0x0D8CB3CC), UINT32_C(0x06C254BF), UINT32_C(0x0FBC2C5D),
+ UINT32_C(0x07F746F2), UINT32_C(0x07E4259D), UINT32_C(0x022B49C4),
+ UINT32_C(0x04CE0ECE), UINT32_C(0x095F3130), UINT32_C(0x064022C7),
+ UINT32_C(0x076A7307), UINT32_C(0x074FEA23), UINT32_C(0x09CDD626),
+ UINT32_C(0x0612A401), UINT32_C(0x0562E226), UINT32_C(0x027BA2E0),
+ UINT32_C(0x01D98EB5), UINT32_C(0x0A54B2FF), UINT32_C(0x0345BAFC),
+ UINT32_C(0x05CE5083) },
+ { UINT32_C(0x082FB619), UINT32_C(0x01E59C7B), UINT32_C(0x07C56C18),
+ UINT32_C(0x0594E677), UINT32_C(0x0EBA4C47), UINT32_C(0x01F1C6FF),
+ UINT32_C(0x016B9F48), UINT32_C(0x0443B057), UINT32_C(0x017930FC),
+ UINT32_C(0x0D94B0A6), UINT32_C(0x0501D4ED), UINT32_C(0x0EB5EA2F),
+ UINT32_C(0x03F2D8D0), UINT32_C(0x04A1DA92), UINT32_C(0x0A702231),
+ UINT32_C(0x063C2830), UINT32_C(0x06F5E127), UINT32_C(0x06BE79CE),
+ UINT32_C(0x06600B2F) } },
+ { { UINT32_C(0x0F26ECDA), UINT32_C(0x0052168B), UINT32_C(0x0CBDB9E3),
+ UINT32_C(0x052FFD0A), UINT32_C(0x02FDCD7B), UINT32_C(0x05791EA2),
+ UINT32_C(0x03DF5472), UINT32_C(0x0544715D), UINT32_C(0x032F4FBD),
+ UINT32_C(0x05DA4E99), UINT32_C(0x000977D5), UINT32_C(0x0AEE5E82),
+ UINT32_C(0x07B5A2B7), UINT32_C(0x02494676), UINT32_C(0x0B416152),
+ UINT32_C(0x03AC76C7), UINT32_C(0x0B21FDC6), UINT32_C(0x04ECC50E),
+ UINT32_C(0x02A4E6AB) },
+ { UINT32_C(0x031E0BB4), UINT32_C(0x05FC9964), UINT32_C(0x014AC466),
+ UINT32_C(0x038F82D0), UINT32_C(0x0C0B56B8), UINT32_C(0x0217513C),
+ UINT32_C(0x0498C923), UINT32_C(0x076EEC28), UINT32_C(0x03824F59),
+ UINT32_C(0x0B7B1382), UINT32_C(0x056FE399), UINT32_C(0x00794841),
+ UINT32_C(0x076FEEC8), UINT32_C(0x0219F413), UINT32_C(0x04ABDD19),
+ UINT32_C(0x04CE2F28), UINT32_C(0x0F2E86F7), UINT32_C(0x02F472AF),
+ UINT32_C(0x06774781) } },
+ { { UINT32_C(0x0CEBC7BE), UINT32_C(0x00221686), UINT32_C(0x04E2E2B5),
+ UINT32_C(0x02865641), UINT32_C(0x0400F945), UINT32_C(0x01CF69C4),
+ UINT32_C(0x002D7B22), UINT32_C(0x04D5A98C), UINT32_C(0x075AA74B),
+ UINT32_C(0x0926F727), UINT32_C(0x0318AD6B), UINT32_C(0x009AE911),
+ UINT32_C(0x00216BA5), UINT32_C(0x0794C1D5), UINT32_C(0x047BB387),
+ UINT32_C(0x05890517), UINT32_C(0x0C438287), UINT32_C(0x04D6AF1C),
+ UINT32_C(0x010C34E7) },
+ { UINT32_C(0x02E3859D), UINT32_C(0x06690EFE), UINT32_C(0x0F063DCD),
+ UINT32_C(0x068C490B), UINT32_C(0x06DE5321), UINT32_C(0x0225E5EC),
+ UINT32_C(0x0573AFDE), UINT32_C(0x0C5AD59A), UINT32_C(0x064D175A),
+ UINT32_C(0x09D71327), UINT32_C(0x03D7526B), UINT32_C(0x04C7D696),
+ UINT32_C(0x05C7C0BF), UINT32_C(0x04314949), UINT32_C(0x064EA7B0),
+ UINT32_C(0x008652D7), UINT32_C(0x0EA31279), UINT32_C(0x0668F188),
+ UINT32_C(0x035A0886) } },
+ { { UINT32_C(0x02EB8133), UINT32_C(0x03EC558C), UINT32_C(0x088B2CEF),
+ UINT32_C(0x008352FC), UINT32_C(0x0ECF2FB1), UINT32_C(0x01F0E6BB),
+ UINT32_C(0x023E4A68), UINT32_C(0x0B9CC299), UINT32_C(0x02937BC1),
+ UINT32_C(0x0A4FE033), UINT32_C(0x03BAB078), UINT32_C(0x078C8608),
+ UINT32_C(0x000D53E7), UINT32_C(0x06DA1D39), UINT32_C(0x05E14C61),
+ UINT32_C(0x035624BE), UINT32_C(0x06669427), UINT32_C(0x079FAB65),
+ UINT32_C(0x0663AC20) },
+ { UINT32_C(0x06835A15), UINT32_C(0x013B136D), UINT32_C(0x08DB323F),
+ UINT32_C(0x068809A4), UINT32_C(0x02A3957E), UINT32_C(0x0081A010),
+ UINT32_C(0x06B7C838), UINT32_C(0x074F156F), UINT32_C(0x00F3A4DB),
+ UINT32_C(0x07ADF165), UINT32_C(0x05A07A0A), UINT32_C(0x0585D310),
+ UINT32_C(0x02A4FAF9), UINT32_C(0x03A5C451), UINT32_C(0x00426908),
+ UINT32_C(0x03C76306), UINT32_C(0x0D3289C2), UINT32_C(0x04FD8A7B),
+ UINT32_C(0x03974EFE) } },
+ { { UINT32_C(0x01D85118), UINT32_C(0x03F039A9), UINT32_C(0x0A744F66),
+ UINT32_C(0x00B874D3), UINT32_C(0x0AD31A3A), UINT32_C(0x07A3C5F8),
+ UINT32_C(0x045FFFF5), UINT32_C(0x023754A5), UINT32_C(0x02E38CB8),
+ UINT32_C(0x05910E6C), UINT32_C(0x01773ED0), UINT32_C(0x0835A72A),
+ UINT32_C(0x01BE848A), UINT32_C(0x07BD444B), UINT32_C(0x0B4AFA36),
+ UINT32_C(0x03B51CEC), UINT32_C(0x076A82F4), UINT32_C(0x049B5424),
+ UINT32_C(0x01EDBBC3) },
+ { UINT32_C(0x0D472029), UINT32_C(0x07322E8C), UINT32_C(0x0891E31F),
+ UINT32_C(0x0598F9A4), UINT32_C(0x0B8A6C89), UINT32_C(0x065A918E),
+ UINT32_C(0x01B36F21), UINT32_C(0x05650472), UINT32_C(0x053A7D69),
+ UINT32_C(0x05F09FDE), UINT32_C(0x03CE6055), UINT32_C(0x017487DC),
+ UINT32_C(0x01B03227), UINT32_C(0x013D4913), UINT32_C(0x096CA6AE),
+ UINT32_C(0x000E46D4), UINT32_C(0x07F35B2C), UINT32_C(0x06FDC86A),
+ UINT32_C(0x0191F319) } },
+ { { UINT32_C(0x0CE12393), UINT32_C(0x015F4FB3), UINT32_C(0x0C3E8E50),
+ UINT32_C(0x06CE6B2D), UINT32_C(0x0B3C1693), UINT32_C(0x045162F6),
+ UINT32_C(0x0407EFF6), UINT32_C(0x00A9135E), UINT32_C(0x047CF46F),
+ UINT32_C(0x04E91DC4), UINT32_C(0x036B9A3C), UINT32_C(0x0134193D),
+ UINT32_C(0x003E5C05), UINT32_C(0x00082BD9), UINT32_C(0x067D8D47),
+ UINT32_C(0x02764530), UINT32_C(0x01E6C320), UINT32_C(0x04A28C2A),
+ UINT32_C(0x048FBA5C) },
+ { UINT32_C(0x0CE5DBF5), UINT32_C(0x0385772C), UINT32_C(0x019E313F),
+ UINT32_C(0x073071A7), UINT32_C(0x0F5FC824), UINT32_C(0x02D63EF3),
+ UINT32_C(0x02B70267), UINT32_C(0x0A6BE174), UINT32_C(0x076EA84E),
+ UINT32_C(0x0FA0EBFC), UINT32_C(0x06D310F3), UINT32_C(0x01962AC7),
+ UINT32_C(0x0209883D), UINT32_C(0x03B86C97), UINT32_C(0x00441CDD),
+ UINT32_C(0x0066501C), UINT32_C(0x03267C1F), UINT32_C(0x03EAC5C9),
+ UINT32_C(0x00069F5A) } },
+ { { UINT32_C(0x01D1EEDB), UINT32_C(0x0706D366), UINT32_C(0x04DB59F7),
+ UINT32_C(0x03130058), UINT32_C(0x0FBF1E90), UINT32_C(0x02990341),
+ UINT32_C(0x052D42D0), UINT32_C(0x0D9F883C), UINT32_C(0x01C3CC5F),
+ UINT32_C(0x0602F8E0), UINT32_C(0x0719E908), UINT32_C(0x0152A103),
+ UINT32_C(0x05A33891), UINT32_C(0x0095E49C), UINT32_C(0x07DC00AE),
+ UINT32_C(0x00D04AA8), UINT32_C(0x034051A0), UINT32_C(0x01C589DC),
+ UINT32_C(0x044769AA) },
+ { UINT32_C(0x05A4238D), UINT32_C(0x038BBADC), UINT32_C(0x024C6D7A),
+ UINT32_C(0x058D2A82), UINT32_C(0x0BE67DEB), UINT32_C(0x057F5E80),
+ UINT32_C(0x055D31EA), UINT32_C(0x0DB49C5A), UINT32_C(0x070BEC2C),
+ UINT32_C(0x0F3322C2), UINT32_C(0x06C3108C), UINT32_C(0x0A1130EB),
+ UINT32_C(0x01DE1843), UINT32_C(0x002476B9), UINT32_C(0x0C1602A0),
+ UINT32_C(0x020FD705), UINT32_C(0x0E87B144), UINT32_C(0x00271FD2),
+ UINT32_C(0x02A1E7C8) } },
+ { { UINT32_C(0x0BB71E17), UINT32_C(0x00B697E6), UINT32_C(0x027C50D2),
+ UINT32_C(0x02FF8F72), UINT32_C(0x052B77CA), UINT32_C(0x02997C16),
+ UINT32_C(0x013C0178), UINT32_C(0x0F7FCEE6), UINT32_C(0x040B66E5),
+ UINT32_C(0x03A69C37), UINT32_C(0x02E55D76), UINT32_C(0x00F908D4),
+ UINT32_C(0x052718AB), UINT32_C(0x0076528F), UINT32_C(0x0306D84E),
+ UINT32_C(0x07EBCA7C), UINT32_C(0x01165F7E), UINT32_C(0x01DB45A9),
+ UINT32_C(0x067FCC94) },
+ { UINT32_C(0x0791633D), UINT32_C(0x047BD9A1), UINT32_C(0x0A26D9CC),
+ UINT32_C(0x000BE536), UINT32_C(0x0F022B81), UINT32_C(0x064B6F3C),
+ UINT32_C(0x03B7DA09), UINT32_C(0x0F632491), UINT32_C(0x02A9B2EF),
+ UINT32_C(0x029A6C74), UINT32_C(0x039178C1), UINT32_C(0x06C1B980),
+ UINT32_C(0x025426C4), UINT32_C(0x00AC18E2), UINT32_C(0x0854C009),
+ UINT32_C(0x07A990A9), UINT32_C(0x0BA40528), UINT32_C(0x05C4D8A8),
+ UINT32_C(0x0628B343) } },
+ { { UINT32_C(0x07812A25), UINT32_C(0x0179F4F9), UINT32_C(0x09DE2C08),
+ UINT32_C(0x02F4F1F9), UINT32_C(0x04F48E6A), UINT32_C(0x07549212),
+ UINT32_C(0x016DCA05), UINT32_C(0x07A3A534), UINT32_C(0x0359AADF),
+ UINT32_C(0x0E969384), UINT32_C(0x061DBB0C), UINT32_C(0x0E368BE3),
+ UINT32_C(0x07060163), UINT32_C(0x07CA82E3), UINT32_C(0x07332717),
+ UINT32_C(0x0002DFB2), UINT32_C(0x03AD0A18), UINT32_C(0x0417995E),
+ UINT32_C(0x0326668F) },
+ { UINT32_C(0x09EF75E3), UINT32_C(0x07B04772), UINT32_C(0x0852DCD8),
+ UINT32_C(0x06097708), UINT32_C(0x0B957C2C), UINT32_C(0x038B98A1),
+ UINT32_C(0x02B82598), UINT32_C(0x0F132C73), UINT32_C(0x04CE431B),
+ UINT32_C(0x07D4CBE1), UINT32_C(0x049BA972), UINT32_C(0x00D3788D),
+ UINT32_C(0x07EDE5A2), UINT32_C(0x0635F8BD), UINT32_C(0x0EB9AB1A),
+ UINT32_C(0x02C621B4), UINT32_C(0x0BCBFF41), UINT32_C(0x0439D1F9),
+ UINT32_C(0x003044A8) } },
+ { { UINT32_C(0x0CF8D334), UINT32_C(0x037C1C48), UINT32_C(0x05CD52D5),
+ UINT32_C(0x047578F0), UINT32_C(0x0BE7BC07), UINT32_C(0x06E68827),
+ UINT32_C(0x076445CB), UINT32_C(0x0FEBF611), UINT32_C(0x00142073),
+ UINT32_C(0x029F031E), UINT32_C(0x076C6434), UINT32_C(0x0F98F9D0),
+ UINT32_C(0x034E14D3), UINT32_C(0x038E0268), UINT32_C(0x0191305B),
+ UINT32_C(0x032A0200), UINT32_C(0x05EF4C75), UINT32_C(0x02826331),
+ UINT32_C(0x04D82A88) },
+ { UINT32_C(0x0D51E170), UINT32_C(0x00D3F07F), UINT32_C(0x08365D15),
+ UINT32_C(0x0781A3A1), UINT32_C(0x0D4BE663), UINT32_C(0x00175259),
+ UINT32_C(0x000C1FA1), UINT32_C(0x0F00FCE0), UINT32_C(0x00299B52),
+ UINT32_C(0x0C7D7E01), UINT32_C(0x052A3C59), UINT32_C(0x07C9CF44),
+ UINT32_C(0x05E7EE2B), UINT32_C(0x035E7031), UINT32_C(0x0FE2CB7C),
+ UINT32_C(0x0403D2B4), UINT32_C(0x0FC9A748), UINT32_C(0x07D461AF),
+ UINT32_C(0x006E35B5) } },
+ { { UINT32_C(0x0594D02E), UINT32_C(0x075E6F14), UINT32_C(0x03360822),
+ UINT32_C(0x03E7DDDB), UINT32_C(0x0F1C6110), UINT32_C(0x072483CF),
+ UINT32_C(0x03ECF221), UINT32_C(0x0D658C87), UINT32_C(0x060AC74F),
+ UINT32_C(0x0F51CC4C), UINT32_C(0x03EB69F7), UINT32_C(0x07B2F64B),
+ UINT32_C(0x0242F07B), UINT32_C(0x058E5984), UINT32_C(0x03A0B7A4),
+ UINT32_C(0x03CE806B), UINT32_C(0x06139B85), UINT32_C(0x01DAAFE3),
+ UINT32_C(0x0130F7E5) },
+ { UINT32_C(0x020891BB), UINT32_C(0x077E28D4), UINT32_C(0x0AAEAA8D),
+ UINT32_C(0x00B2D799), UINT32_C(0x0E10388A), UINT32_C(0x001DFD31),
+ UINT32_C(0x059F85F1), UINT32_C(0x00BC7E55), UINT32_C(0x05309429),
+ UINT32_C(0x0FEDF8A8), UINT32_C(0x06B52B0D), UINT32_C(0x0E3F8A44),
+ UINT32_C(0x07A8E2A2), UINT32_C(0x07D5866C), UINT32_C(0x02DBCD7C),
+ UINT32_C(0x02895FBE), UINT32_C(0x0F66BDAD), UINT32_C(0x048C3CAD),
+ UINT32_C(0x078587AD) } },
+ { { UINT32_C(0x0B1B7656), UINT32_C(0x02A1E440), UINT32_C(0x04EF5EA7),
+ UINT32_C(0x059FA6A2), UINT32_C(0x0C68CD6D), UINT32_C(0x005E8043),
+ UINT32_C(0x01AE592B), UINT32_C(0x00DD5F88), UINT32_C(0x0559B430),
+ UINT32_C(0x0BF3DF59), UINT32_C(0x011CBD52), UINT32_C(0x0DDDE17B),
+ UINT32_C(0x031D26D8), UINT32_C(0x0148FB57), UINT32_C(0x04EDBF2D),
+ UINT32_C(0x07220D0D), UINT32_C(0x0F7B0807), UINT32_C(0x076B1F6E),
+ UINT32_C(0x0306320E) },
+ { UINT32_C(0x07EEE80E), UINT32_C(0x0754C15A), UINT32_C(0x093487F6),
+ UINT32_C(0x023D5CA0), UINT32_C(0x00BD77C2), UINT32_C(0x0271EF5D),
+ UINT32_C(0x04FAEAB7), UINT32_C(0x07EBA560), UINT32_C(0x015A18D8),
+ UINT32_C(0x039861D4), UINT32_C(0x041FD3C8), UINT32_C(0x0D5863CB),
+ UINT32_C(0x066C5F53), UINT32_C(0x06380D15), UINT32_C(0x0E825C9F),
+ UINT32_C(0x00BA76BE), UINT32_C(0x0BC4E3B8), UINT32_C(0x06216B12),
+ UINT32_C(0x03B4F0D4) } },
+ },
+ {
+ { { UINT32_C(0x0201C48B), UINT32_C(0x073C85A8), UINT32_C(0x095DC61E),
+ UINT32_C(0x05F14993), UINT32_C(0x0123BD40), UINT32_C(0x05907610),
+ UINT32_C(0x046FBB4C), UINT32_C(0x0A0F3B82), UINT32_C(0x078A34BB),
+ UINT32_C(0x003DB127), UINT32_C(0x052D9AD5), UINT32_C(0x05103EE9),
+ UINT32_C(0x0465988A), UINT32_C(0x005F3641), UINT32_C(0x085495F9),
+ UINT32_C(0x069A8F20), UINT32_C(0x064AA21B), UINT32_C(0x007CCB01),
+ UINT32_C(0x04384B61) },
+ { UINT32_C(0x051DE678), UINT32_C(0x07820FBE), UINT32_C(0x063426A0),
+ UINT32_C(0x01B262F0), UINT32_C(0x0B0B9013), UINT32_C(0x045C8465),
+ UINT32_C(0x0240C64E), UINT32_C(0x0DDA697F), UINT32_C(0x0201A64C),
+ UINT32_C(0x016B17DF), UINT32_C(0x065E1757), UINT32_C(0x0F6B7334),
+ UINT32_C(0x07ED2866), UINT32_C(0x028D6370), UINT32_C(0x0E25340A),
+ UINT32_C(0x002693F4), UINT32_C(0x07D889A8), UINT32_C(0x06B215F7),
+ UINT32_C(0x062B5959) } },
+ { { UINT32_C(0x0D9C3B89), UINT32_C(0x077CC1DC), UINT32_C(0x013DDAA7),
+ UINT32_C(0x0111C6F8), UINT32_C(0x0577407F), UINT32_C(0x01FF52EA),
+ UINT32_C(0x06D56CA6), UINT32_C(0x06331227), UINT32_C(0x03AB576F),
+ UINT32_C(0x0CD7FD4F), UINT32_C(0x06AF74C0), UINT32_C(0x0AD52465),
+ UINT32_C(0x041865E8), UINT32_C(0x0546A928), UINT32_C(0x00FE8F9D),
+ UINT32_C(0x07C2CDD8), UINT32_C(0x0C0D3434), UINT32_C(0x030F8525),
+ UINT32_C(0x05B51E81) },
+ { UINT32_C(0x08A5170B), UINT32_C(0x074FC061), UINT32_C(0x0060E606),
+ UINT32_C(0x017D8D1E), UINT32_C(0x0A8E0395), UINT32_C(0x0428DCF1),
+ UINT32_C(0x046F46B8), UINT32_C(0x05E254D7), UINT32_C(0x05D05211),
+ UINT32_C(0x0B46AD84), UINT32_C(0x03446BA1), UINT32_C(0x00CA5FED),
+ UINT32_C(0x02A8C267), UINT32_C(0x0570EC98), UINT32_C(0x0750367D),
+ UINT32_C(0x0362D78B), UINT32_C(0x0C84DA94), UINT32_C(0x07AF8D8F),
+ UINT32_C(0x0583AA8B) } },
+ { { UINT32_C(0x09126FAC), UINT32_C(0x06B05898), UINT32_C(0x0872DF85),
+ UINT32_C(0x048C3352), UINT32_C(0x0331E5B3), UINT32_C(0x076671FB),
+ UINT32_C(0x02076524), UINT32_C(0x0492A4A3), UINT32_C(0x06D57C7C),
+ UINT32_C(0x052A5C41), UINT32_C(0x052CA0DF), UINT32_C(0x0E7D0224),
+ UINT32_C(0x07241BC6), UINT32_C(0x0234848A), UINT32_C(0x048CE05E),
+ UINT32_C(0x01B286B5), UINT32_C(0x0B054813), UINT32_C(0x02F6EDFC),
+ UINT32_C(0x0250A4D8) },
+ { UINT32_C(0x0831CD9D), UINT32_C(0x04B04313), UINT32_C(0x0F484946),
+ UINT32_C(0x03B996C8), UINT32_C(0x00F547BB), UINT32_C(0x007A0AA7),
+ UINT32_C(0x065BBAA5), UINT32_C(0x014C49BC), UINT32_C(0x03D6CABB),
+ UINT32_C(0x01EF46B3), UINT32_C(0x05A5D159), UINT32_C(0x0EDE3DB4),
+ UINT32_C(0x00D1B3A0), UINT32_C(0x02F97DFA), UINT32_C(0x0D68EB87),
+ UINT32_C(0x06CE81C0), UINT32_C(0x00D73B27), UINT32_C(0x0342609A),
+ UINT32_C(0x019C049C) } },
+ { { UINT32_C(0x08BC45E5), UINT32_C(0x015B0C25), UINT32_C(0x0B2A43B0),
+ UINT32_C(0x00067BBC), UINT32_C(0x07B24685), UINT32_C(0x0046140C),
+ UINT32_C(0x0157806B), UINT32_C(0x049AE2AD), UINT32_C(0x0113F8DF),
+ UINT32_C(0x06BBA162), UINT32_C(0x0534E07B), UINT32_C(0x086988E1),
+ UINT32_C(0x00E2C213), UINT32_C(0x0513FA95), UINT32_C(0x0EC2A78F),
+ UINT32_C(0x02E28447), UINT32_C(0x011B9FFF), UINT32_C(0x01506FAF),
+ UINT32_C(0x07B4C5A9) },
+ { UINT32_C(0x0AE71753), UINT32_C(0x0151FA30), UINT32_C(0x091691B4),
+ UINT32_C(0x02ACCC22), UINT32_C(0x0BA74B18), UINT32_C(0x0073B635),
+ UINT32_C(0x02F0EB55), UINT32_C(0x0CC9DF51), UINT32_C(0x0784FCDA),
+ UINT32_C(0x0BFAD098), UINT32_C(0x03F5BFD6), UINT32_C(0x006AD5C5),
+ UINT32_C(0x014F12F5), UINT32_C(0x0745527A), UINT32_C(0x03A6506B),
+ UINT32_C(0x015CF2C8), UINT32_C(0x039A3185), UINT32_C(0x077CD12B),
+ UINT32_C(0x02A9BAF3) } },
+ { { UINT32_C(0x00D9229F), UINT32_C(0x039D37CD), UINT32_C(0x0948ECC6),
+ UINT32_C(0x0072BCB0), UINT32_C(0x0A458017), UINT32_C(0x038A159B),
+ UINT32_C(0x0368034D), UINT32_C(0x0B0315FA), UINT32_C(0x01756900),
+ UINT32_C(0x04149285), UINT32_C(0x03FFBD8A), UINT32_C(0x0079E774),
+ UINT32_C(0x0702A2CF), UINT32_C(0x0641C3A8), UINT32_C(0x0F3751BA),
+ UINT32_C(0x028EDF14), UINT32_C(0x090F681A), UINT32_C(0x012CF177),
+ UINT32_C(0x04614034) },
+ { UINT32_C(0x04E4C072), UINT32_C(0x07E207E1), UINT32_C(0x02D8F8F8),
+ UINT32_C(0x013BFA68), UINT32_C(0x0CC798F9), UINT32_C(0x014BAAD6),
+ UINT32_C(0x023BD550), UINT32_C(0x0919F8D1), UINT32_C(0x03C00ADA),
+ UINT32_C(0x0758236E), UINT32_C(0x058602C2), UINT32_C(0x0FA0FE24),
+ UINT32_C(0x01A8C5A6), UINT32_C(0x0026B4C4), UINT32_C(0x0534F014),
+ UINT32_C(0x02CF2A7F), UINT32_C(0x00192714), UINT32_C(0x04B51417),
+ UINT32_C(0x0168C607) } },
+ { { UINT32_C(0x019403A6), UINT32_C(0x04E6BA92), UINT32_C(0x0065202D),
+ UINT32_C(0x06FDAE5F), UINT32_C(0x0AD1C130), UINT32_C(0x05C03BED),
+ UINT32_C(0x00D7CFCE), UINT32_C(0x02B63E74), UINT32_C(0x06CD8D97),
+ UINT32_C(0x00E7608A), UINT32_C(0x05009FCD), UINT32_C(0x01026095),
+ UINT32_C(0x058890EC), UINT32_C(0x0662F635), UINT32_C(0x0F16F3A2),
+ UINT32_C(0x06B88A1B), UINT32_C(0x000D681A), UINT32_C(0x05689B12),
+ UINT32_C(0x0620658C) },
+ { UINT32_C(0x0B48EFBA), UINT32_C(0x01574FA6), UINT32_C(0x0FC77D17),
+ UINT32_C(0x06CDF2A2), UINT32_C(0x0DCEA8A9), UINT32_C(0x00B1DE26),
+ UINT32_C(0x009A7C7A), UINT32_C(0x0435CC54), UINT32_C(0x06E8AF2E),
+ UINT32_C(0x09AFC5BC), UINT32_C(0x05124055), UINT32_C(0x045BF6E2),
+ UINT32_C(0x0536C8AD), UINT32_C(0x073FE4CD), UINT32_C(0x0A467A40),
+ UINT32_C(0x03EB6B38), UINT32_C(0x05F039C6), UINT32_C(0x00622055),
+ UINT32_C(0x045DF262) } },
+ { { UINT32_C(0x0C5E165D), UINT32_C(0x00A8610A), UINT32_C(0x062AF616),
+ UINT32_C(0x055190B9), UINT32_C(0x0F988454), UINT32_C(0x0395472A),
+ UINT32_C(0x036DCD3E), UINT32_C(0x0FDA6187), UINT32_C(0x036EC91D),
+ UINT32_C(0x0E66FCFC), UINT32_C(0x077BBD1F), UINT32_C(0x0DF3E1C9),
+ UINT32_C(0x040454AC), UINT32_C(0x03004F37), UINT32_C(0x0CBDED62),
+ UINT32_C(0x03DD5570), UINT32_C(0x05724DFF), UINT32_C(0x07B6002A),
+ UINT32_C(0x00B93C70) },
+ { UINT32_C(0x06C8A9BC), UINT32_C(0x032D8B60), UINT32_C(0x0C0850D6),
+ UINT32_C(0x06C94F36), UINT32_C(0x0649CD3A), UINT32_C(0x000C0E51),
+ UINT32_C(0x07B40760), UINT32_C(0x0BFA6092), UINT32_C(0x019FB910),
+ UINT32_C(0x092A27FF), UINT32_C(0x02D6F975), UINT32_C(0x0E910EDA),
+ UINT32_C(0x01FFB3D4), UINT32_C(0x01814FFF), UINT32_C(0x0985A6F8),
+ UINT32_C(0x06787CA6), UINT32_C(0x0B7B7FC6), UINT32_C(0x01532265),
+ UINT32_C(0x06228702) } },
+ { { UINT32_C(0x0391B195), UINT32_C(0x01F1A68F), UINT32_C(0x0AB9DD28),
+ UINT32_C(0x000B690E), UINT32_C(0x0C4FD58F), UINT32_C(0x05292C46),
+ UINT32_C(0x0017D075), UINT32_C(0x010E0044), UINT32_C(0x0709FE41),
+ UINT32_C(0x02F0CD13), UINT32_C(0x003D99BE), UINT32_C(0x0E6F68D8),
+ UINT32_C(0x04608708), UINT32_C(0x05B1F159), UINT32_C(0x0A4CFC70),
+ UINT32_C(0x02FB2946), UINT32_C(0x076D32E5), UINT32_C(0x0482F0ED),
+ UINT32_C(0x06ED3305) },
+ { UINT32_C(0x05C4416F), UINT32_C(0x02270E15), UINT32_C(0x073143E0),
+ UINT32_C(0x02F4151F), UINT32_C(0x099069A7), UINT32_C(0x05437AEB),
+ UINT32_C(0x027A90CA), UINT32_C(0x0A75E48C), UINT32_C(0x013FC627),
+ UINT32_C(0x0300361B), UINT32_C(0x072745C2), UINT32_C(0x0C9DD555),
+ UINT32_C(0x05D86308), UINT32_C(0x03713AF4), UINT32_C(0x01AF9EBC),
+ UINT32_C(0x0157F18F), UINT32_C(0x0E008EAF), UINT32_C(0x0409010B),
+ UINT32_C(0x074F85AA) } },
+ { { UINT32_C(0x045C5FF5), UINT32_C(0x046845EE), UINT32_C(0x074B8893),
+ UINT32_C(0x036C56E2), UINT32_C(0x0CC7B43B), UINT32_C(0x030C1789),
+ UINT32_C(0x05916A34), UINT32_C(0x0F2AFB7C), UINT32_C(0x0154EDEB),
+ UINT32_C(0x0407BF3E), UINT32_C(0x05362D80), UINT32_C(0x0CCA97B1),
+ UINT32_C(0x041BFF6D), UINT32_C(0x05DAE466), UINT32_C(0x07D9D691),
+ UINT32_C(0x023DBF89), UINT32_C(0x05162F52), UINT32_C(0x000CBF57),
+ UINT32_C(0x0154EDFD) },
+ { UINT32_C(0x08BF712A), UINT32_C(0x06009B91), UINT32_C(0x0AFFBD38),
+ UINT32_C(0x03FD6332), UINT32_C(0x06CD1DC8), UINT32_C(0x06C678BF),
+ UINT32_C(0x0040E5CE), UINT32_C(0x02743457), UINT32_C(0x060DF50E),
+ UINT32_C(0x0691C947), UINT32_C(0x0746D675), UINT32_C(0x0D68B325),
+ UINT32_C(0x0290D55C), UINT32_C(0x015B144C), UINT32_C(0x05A0332F),
+ UINT32_C(0x0563DB53), UINT32_C(0x04CED890), UINT32_C(0x04AC67C8),
+ UINT32_C(0x04387D35) } },
+ { { UINT32_C(0x0A66FBB8), UINT32_C(0x05FDBF97), UINT32_C(0x0A47124E),
+ UINT32_C(0x03FED0AF), UINT32_C(0x082B44B9), UINT32_C(0x0244ADCE),
+ UINT32_C(0x05980D8A), UINT32_C(0x0687D615), UINT32_C(0x07E4662D),
+ UINT32_C(0x03F2180A), UINT32_C(0x04BA4DB6), UINT32_C(0x03FE8141),
+ UINT32_C(0x04B2BC20), UINT32_C(0x006DF40A), UINT32_C(0x0AB2698D),
+ UINT32_C(0x0365D173), UINT32_C(0x08DE4017), UINT32_C(0x079E6BA2),
+ UINT32_C(0x02C7A033) },
+ { UINT32_C(0x075570A1), UINT32_C(0x06A48901), UINT32_C(0x0492AC74),
+ UINT32_C(0x077D2844), UINT32_C(0x0DB87BFD), UINT32_C(0x01D218B2),
+ UINT32_C(0x0522DA69), UINT32_C(0x0B4F7CF4), UINT32_C(0x00841BC4),
+ UINT32_C(0x0E420155), UINT32_C(0x00BDBB35), UINT32_C(0x0BB5E945),
+ UINT32_C(0x06FE4123), UINT32_C(0x0435B025), UINT32_C(0x0ACCEA16),
+ UINT32_C(0x00BE381C), UINT32_C(0x0C3F4D0D), UINT32_C(0x03862E1B),
+ UINT32_C(0x04A46652) } },
+ { { UINT32_C(0x009B3F23), UINT32_C(0x00CFBD75), UINT32_C(0x069BE715),
+ UINT32_C(0x009C9678), UINT32_C(0x013F2EB4), UINT32_C(0x04EE1278),
+ UINT32_C(0x06387FDD), UINT32_C(0x0329F9F1), UINT32_C(0x048E212F),
+ UINT32_C(0x0F24F073), UINT32_C(0x008F0FD5), UINT32_C(0x02F3DAFE),
+ UINT32_C(0x039C6160), UINT32_C(0x018F4D1D), UINT32_C(0x0E9D0F18),
+ UINT32_C(0x066F0916), UINT32_C(0x09931852), UINT32_C(0x040EEBEA),
+ UINT32_C(0x032448BB) },
+ { UINT32_C(0x0C226E2C), UINT32_C(0x07706840), UINT32_C(0x0D3C1C34),
+ UINT32_C(0x07E4BA61), UINT32_C(0x0A51E4A1), UINT32_C(0x038E00FB),
+ UINT32_C(0x06E25F2A), UINT32_C(0x0C263EC1), UINT32_C(0x078D29D8),
+ UINT32_C(0x07C7272D), UINT32_C(0x0572E10B), UINT32_C(0x0B83C0DC),
+ UINT32_C(0x02179CDB), UINT32_C(0x066C84E3), UINT32_C(0x07675170),
+ UINT32_C(0x00BDF2F6), UINT32_C(0x0F52477D), UINT32_C(0x00FE3151),
+ UINT32_C(0x05460029) } },
+ { { UINT32_C(0x0DA35EBF), UINT32_C(0x066B421E), UINT32_C(0x07116B3C),
+ UINT32_C(0x077330D7), UINT32_C(0x0CE4D316), UINT32_C(0x027318E8),
+ UINT32_C(0x04CA0B0C), UINT32_C(0x06EFBBCB), UINT32_C(0x027FF80D),
+ UINT32_C(0x07B56250), UINT32_C(0x03FBF443), UINT32_C(0x0E5E86E3),
+ UINT32_C(0x01050837), UINT32_C(0x027F8C63), UINT32_C(0x0040889F),
+ UINT32_C(0x0233D7DC), UINT32_C(0x085C1EB3), UINT32_C(0x0190948B),
+ UINT32_C(0x02A42839) },
+ { UINT32_C(0x046020F0), UINT32_C(0x04A9DB75), UINT32_C(0x0C1F003A),
+ UINT32_C(0x05C091F8), UINT32_C(0x069D2F26), UINT32_C(0x05CBE28A),
+ UINT32_C(0x00B98CA0), UINT32_C(0x0C44F77C), UINT32_C(0x06591FB2),
+ UINT32_C(0x0336AA95), UINT32_C(0x05A28AC0), UINT32_C(0x0A8AC670),
+ UINT32_C(0x0735C3E5), UINT32_C(0x049911B7), UINT32_C(0x04F28112),
+ UINT32_C(0x0532B634), UINT32_C(0x00A3E84F), UINT32_C(0x06EA385D),
+ UINT32_C(0x01F2A03A) } },
+ { { UINT32_C(0x06A09384), UINT32_C(0x0260C3CA), UINT32_C(0x092529A6),
+ UINT32_C(0x016D77CF), UINT32_C(0x0B8E2D9A), UINT32_C(0x01055E02),
+ UINT32_C(0x055BC4FD), UINT32_C(0x0CA2C0AF), UINT32_C(0x03A4ABF9),
+ UINT32_C(0x0290D54C), UINT32_C(0x07B6E3EE), UINT32_C(0x07074346),
+ UINT32_C(0x047E1F90), UINT32_C(0x06D2B228), UINT32_C(0x064225A4),
+ UINT32_C(0x06F125F2), UINT32_C(0x0D66264B), UINT32_C(0x01B0F052),
+ UINT32_C(0x070B7573) },
+ { UINT32_C(0x0B2264B8), UINT32_C(0x04D4A619), UINT32_C(0x0AC1F517),
+ UINT32_C(0x049FE3F8), UINT32_C(0x08BEDBF0), UINT32_C(0x01EB5F66),
+ UINT32_C(0x0145535A), UINT32_C(0x042D102F), UINT32_C(0x04447303),
+ UINT32_C(0x067B60A3), UINT32_C(0x043A9645), UINT32_C(0x0D502303),
+ UINT32_C(0x0669CEC4), UINT32_C(0x052699E3), UINT32_C(0x0E740F66),
+ UINT32_C(0x011DF90D), UINT32_C(0x006017A2), UINT32_C(0x03C99A89),
+ UINT32_C(0x069500E3) } },
+ { { UINT32_C(0x0184B415), UINT32_C(0x06F26FDD), UINT32_C(0x01E5007E),
+ UINT32_C(0x038A2542), UINT32_C(0x0DA8A807), UINT32_C(0x078F5424),
+ UINT32_C(0x04D3FA96), UINT32_C(0x0A456FBD), UINT32_C(0x062853C6),
+ UINT32_C(0x017211A6), UINT32_C(0x049854E5), UINT32_C(0x0A8F3585),
+ UINT32_C(0x079A3009), UINT32_C(0x07AFB481), UINT32_C(0x081AFE37),
+ UINT32_C(0x031A410E), UINT32_C(0x0EADF215), UINT32_C(0x02649FCC),
+ UINT32_C(0x00A68E58) },
+ { UINT32_C(0x0A87B468), UINT32_C(0x0744629E), UINT32_C(0x010788AE),
+ UINT32_C(0x00DA10EC), UINT32_C(0x07BD591B), UINT32_C(0x07BC474E),
+ UINT32_C(0x02AE7E4E), UINT32_C(0x074ED106), UINT32_C(0x059550A8),
+ UINT32_C(0x0C2FBDF7), UINT32_C(0x078A0AB0), UINT32_C(0x019D9F46),
+ UINT32_C(0x030FE4BE), UINT32_C(0x00DF9F6A), UINT32_C(0x04D2A38F),
+ UINT32_C(0x052B1469), UINT32_C(0x005AE2E6), UINT32_C(0x07E6C02D),
+ UINT32_C(0x0283843A) } },
+ { { UINT32_C(0x0784F95B), UINT32_C(0x01616DEF), UINT32_C(0x056C696A),
+ UINT32_C(0x03B98963), UINT32_C(0x085F2426), UINT32_C(0x07BDAC89),
+ UINT32_C(0x05EAFBF9), UINT32_C(0x09A4C8CC), UINT32_C(0x0558AA78),
+ UINT32_C(0x0D041BCD), UINT32_C(0x04BDD0B5), UINT32_C(0x037216D5),
+ UINT32_C(0x06BD4C93), UINT32_C(0x0042A72A), UINT32_C(0x0B4A6F17),
+ UINT32_C(0x0177EE47), UINT32_C(0x028752B7), UINT32_C(0x0750D182),
+ UINT32_C(0x04BE36EA) },
+ { UINT32_C(0x01DCCF70), UINT32_C(0x05249FC9), UINT32_C(0x063EE812),
+ UINT32_C(0x0362E5A3), UINT32_C(0x017DB2F0), UINT32_C(0x05508041),
+ UINT32_C(0x078C050C), UINT32_C(0x0C161A22), UINT32_C(0x078E338A),
+ UINT32_C(0x0BB9EF36), UINT32_C(0x001185AB), UINT32_C(0x09058EAD),
+ UINT32_C(0x00D3AF42), UINT32_C(0x02FBEDA9), UINT32_C(0x0996A3FA),
+ UINT32_C(0x02E0B934), UINT32_C(0x08F57F1A), UINT32_C(0x025EB5CE),
+ UINT32_C(0x0254456F) } },
+ { { UINT32_C(0x08F9B528), UINT32_C(0x04174130), UINT32_C(0x013E12B3),
+ UINT32_C(0x022B697D), UINT32_C(0x0B0CEF11), UINT32_C(0x03A2E8E2),
+ UINT32_C(0x00D96F4F), UINT32_C(0x0B4B7DF9), UINT32_C(0x0056458A),
+ UINT32_C(0x083BA433), UINT32_C(0x068A2473), UINT32_C(0x0D586B52),
+ UINT32_C(0x00ACD634), UINT32_C(0x01D1EAD7), UINT32_C(0x03036203),
+ UINT32_C(0x000C0094), UINT32_C(0x047A01B9), UINT32_C(0x0212F1A6),
+ UINT32_C(0x04D19921) },
+ { UINT32_C(0x0837554E), UINT32_C(0x02ECC2C4), UINT32_C(0x0B80FBFE),
+ UINT32_C(0x07A5E03B), UINT32_C(0x041C1C48), UINT32_C(0x043DD0D4),
+ UINT32_C(0x04C36416), UINT32_C(0x0869B643), UINT32_C(0x028DC568),
+ UINT32_C(0x0F15A5D2), UINT32_C(0x00D7FC36), UINT32_C(0x04D7306E),
+ UINT32_C(0x0306A221), UINT32_C(0x04950B4A), UINT32_C(0x06DC4FCA),
+ UINT32_C(0x048D5878), UINT32_C(0x0032B7DE), UINT32_C(0x000E5973),
+ UINT32_C(0x04FFCD15) } },
+ },
+ {
+ { { UINT32_C(0x051368EE), UINT32_C(0x03C182D8), UINT32_C(0x0233E580),
+ UINT32_C(0x0467AAF9), UINT32_C(0x038EEE52), UINT32_C(0x01F8CCEB),
+ UINT32_C(0x04E7863B), UINT32_C(0x0974DE7F), UINT32_C(0x07C7D47D),
+ UINT32_C(0x01F4B806), UINT32_C(0x0059F163), UINT32_C(0x07DFA5B8),
+ UINT32_C(0x0449B3CD), UINT32_C(0x0378D1F4), UINT32_C(0x03486C59),
+ UINT32_C(0x02FFDC03), UINT32_C(0x0854568F), UINT32_C(0x017FDD91),
+ UINT32_C(0x0384B0DC) },
+ { UINT32_C(0x08A3F84B), UINT32_C(0x065DE2C1), UINT32_C(0x085945B9),
+ UINT32_C(0x04E5C55A), UINT32_C(0x06CB12ED), UINT32_C(0x07B741CC),
+ UINT32_C(0x05B2C0EB), UINT32_C(0x0809AC7E), UINT32_C(0x04A46CA2),
+ UINT32_C(0x061FF16D), UINT32_C(0x03744313), UINT32_C(0x0C777A3B),
+ UINT32_C(0x0207FD18), UINT32_C(0x0539771F), UINT32_C(0x01004BCB),
+ UINT32_C(0x04A8FC6F), UINT32_C(0x0F0A63E8), UINT32_C(0x02373910),
+ UINT32_C(0x072840F7) } },
+ { { UINT32_C(0x0E024391), UINT32_C(0x02781D5D), UINT32_C(0x05026331),
+ UINT32_C(0x025635CD), UINT32_C(0x0492939D), UINT32_C(0x00222466),
+ UINT32_C(0x0456BF4C), UINT32_C(0x07C8DEE7), UINT32_C(0x000178A5),
+ UINT32_C(0x051D50AE), UINT32_C(0x02CE451F), UINT32_C(0x01814C6B),
+ UINT32_C(0x0265AE7F), UINT32_C(0x0312E044), UINT32_C(0x0848FF64),
+ UINT32_C(0x013BB3DA), UINT32_C(0x0C153136), UINT32_C(0x019DF825),
+ UINT32_C(0x0462A6B6) },
+ { UINT32_C(0x0E9AB68C), UINT32_C(0x04B05DA9), UINT32_C(0x04C2481A),
+ UINT32_C(0x076E7298), UINT32_C(0x09F0C636), UINT32_C(0x01F7D7D4),
+ UINT32_C(0x00F9BB8A), UINT32_C(0x0F077B4D), UINT32_C(0x0259165A),
+ UINT32_C(0x0592DC29), UINT32_C(0x02303769), UINT32_C(0x0EDF23B9),
+ UINT32_C(0x06E3C4F3), UINT32_C(0x026481C0), UINT32_C(0x033547D1),
+ UINT32_C(0x04349C82), UINT32_C(0x0FB49FD0), UINT32_C(0x03D48B1E),
+ UINT32_C(0x00EDD6A9) } },
+ { { UINT32_C(0x09496A3E), UINT32_C(0x0779CC41), UINT32_C(0x0F31204C),
+ UINT32_C(0x01DD9727), UINT32_C(0x0B88711D), UINT32_C(0x0531C3F2),
+ UINT32_C(0x04294797), UINT32_C(0x043683B3), UINT32_C(0x05DBB4CC),
+ UINT32_C(0x06B27F93), UINT32_C(0x04CEFE76), UINT32_C(0x02EF8CFB),
+ UINT32_C(0x065C5182), UINT32_C(0x051D70E4), UINT32_C(0x0B92D89E),
+ UINT32_C(0x015A48BA), UINT32_C(0x00689714), UINT32_C(0x02F0F899),
+ UINT32_C(0x03A05527) },
+ { UINT32_C(0x04B88B67), UINT32_C(0x0337896D), UINT32_C(0x0AC27DF4),
+ UINT32_C(0x02CFE168), UINT32_C(0x003AC24A), UINT32_C(0x0287B4A1),
+ UINT32_C(0x04C9337D), UINT32_C(0x0480FCAA), UINT32_C(0x0385E818),
+ UINT32_C(0x0698332E), UINT32_C(0x00B177F0), UINT32_C(0x088F3F24),
+ UINT32_C(0x056A2745), UINT32_C(0x06A53116), UINT32_C(0x0101CC1F),
+ UINT32_C(0x013E9DBA), UINT32_C(0x06227F55), UINT32_C(0x03D027B4),
+ UINT32_C(0x02CD8668) } },
+ { { UINT32_C(0x0076683D), UINT32_C(0x076BEE0D), UINT32_C(0x0D7D7B4C),
+ UINT32_C(0x0108643A), UINT32_C(0x0F993C30), UINT32_C(0x07B71D95),
+ UINT32_C(0x029E4008), UINT32_C(0x034C59B6), UINT32_C(0x00E01922),
+ UINT32_C(0x062750BC), UINT32_C(0x00DA23D4), UINT32_C(0x0BF7FFAF),
+ UINT32_C(0x016F2E12), UINT32_C(0x0546677C), UINT32_C(0x038327C5),
+ UINT32_C(0x07930C31), UINT32_C(0x03297791), UINT32_C(0x06E93707),
+ UINT32_C(0x0731AA7A) },
+ { UINT32_C(0x0B99594F), UINT32_C(0x0300795B), UINT32_C(0x0C5F3D55),
+ UINT32_C(0x01C1DE37), UINT32_C(0x02FD7C9F), UINT32_C(0x001493C6),
+ UINT32_C(0x07BB523B), UINT32_C(0x08D81CF9), UINT32_C(0x000974EA),
+ UINT32_C(0x04B4CFBC), UINT32_C(0x04354B41), UINT32_C(0x0644AB94),
+ UINT32_C(0x0251A61B), UINT32_C(0x0555FAF5), UINT32_C(0x03713B98),
+ UINT32_C(0x0597947C), UINT32_C(0x061DDC4D), UINT32_C(0x01C1E655),
+ UINT32_C(0x05DDAC10) } },
+ { { UINT32_C(0x02662A6A), UINT32_C(0x0721BA5B), UINT32_C(0x08BFB362),
+ UINT32_C(0x02A23D78), UINT32_C(0x04F666A1), UINT32_C(0x060FB317),
+ UINT32_C(0x0729C7ED), UINT32_C(0x09B1B389), UINT32_C(0x031F8856),
+ UINT32_C(0x06913D9E), UINT32_C(0x0779217C), UINT32_C(0x0A3634CD),
+ UINT32_C(0x06292B3F), UINT32_C(0x01E6FDE6), UINT32_C(0x0F97C1F0),
+ UINT32_C(0x0698999D), UINT32_C(0x0D773548), UINT32_C(0x01ED7CE9),
+ UINT32_C(0x00FFC55A) },
+ { UINT32_C(0x0D76A58E), UINT32_C(0x0195519C), UINT32_C(0x02C2F7AB),
+ UINT32_C(0x061D1820), UINT32_C(0x09A1252D), UINT32_C(0x07772B8E),
+ UINT32_C(0x05554A30), UINT32_C(0x0687BCF0), UINT32_C(0x06CE8978),
+ UINT32_C(0x0961AAB6), UINT32_C(0x0611194A), UINT32_C(0x097F9E4C),
+ UINT32_C(0x07E8543A), UINT32_C(0x076F7FC5), UINT32_C(0x039F7F09),
+ UINT32_C(0x074DF751), UINT32_C(0x000B4239), UINT32_C(0x010D59A8),
+ UINT32_C(0x03F90438) } },
+ { { UINT32_C(0x0DA09D54), UINT32_C(0x06AF7630), UINT32_C(0x02BF95A8),
+ UINT32_C(0x055D4226), UINT32_C(0x059FD1D0), UINT32_C(0x06B060C9),
+ UINT32_C(0x07D177E4), UINT32_C(0x03F4F180), UINT32_C(0x021C92CF),
+ UINT32_C(0x02D3DD59), UINT32_C(0x048EB409), UINT32_C(0x07E17E45),
+ UINT32_C(0x05EEE57B), UINT32_C(0x01B0CED0), UINT32_C(0x0E7E68AB),
+ UINT32_C(0x043C0C09), UINT32_C(0x0A766549), UINT32_C(0x0006D7E3),
+ UINT32_C(0x06CB262D) },
+ { UINT32_C(0x045007F6), UINT32_C(0x077C78B0), UINT32_C(0x006040A8),
+ UINT32_C(0x06713C8D), UINT32_C(0x09341EBC), UINT32_C(0x0236E27C),
+ UINT32_C(0x055A82B4), UINT32_C(0x06F7750F), UINT32_C(0x0669305F),
+ UINT32_C(0x017EE81A), UINT32_C(0x01216750), UINT32_C(0x0ED65974),
+ UINT32_C(0x03FEF768), UINT32_C(0x01F1588F), UINT32_C(0x0E26B74A),
+ UINT32_C(0x078B116C), UINT32_C(0x0B1F0885), UINT32_C(0x05EF5659),
+ UINT32_C(0x02E63355) } },
+ { { UINT32_C(0x0FB0D3ED), UINT32_C(0x003E5A50), UINT32_C(0x0C55AAAF),
+ UINT32_C(0x0289AC3D), UINT32_C(0x05EF5174), UINT32_C(0x0719E0EE),
+ UINT32_C(0x01A9C3D8), UINT32_C(0x0DE06CD1), UINT32_C(0x07ED918A),
+ UINT32_C(0x0BF6A107), UINT32_C(0x06149FAB), UINT32_C(0x0880197B),
+ UINT32_C(0x060CCF4B), UINT32_C(0x015F00A0), UINT32_C(0x026084C4),
+ UINT32_C(0x06C15B05), UINT32_C(0x04E4098B), UINT32_C(0x063ED2C8),
+ UINT32_C(0x058C6384) },
+ { UINT32_C(0x040FA002), UINT32_C(0x01B4B412), UINT32_C(0x08A0A8F3),
+ UINT32_C(0x015D5274), UINT32_C(0x0B3D6C31), UINT32_C(0x0241F67E),
+ UINT32_C(0x0383A0C0), UINT32_C(0x0D2CCE25), UINT32_C(0x07A721DD),
+ UINT32_C(0x0FD7994F), UINT32_C(0x04852FC2), UINT32_C(0x0EEB0BC8),
+ UINT32_C(0x05CF0812), UINT32_C(0x06594895), UINT32_C(0x0F6294B1),
+ UINT32_C(0x047E9685), UINT32_C(0x03C1ADBF), UINT32_C(0x00B567D9),
+ UINT32_C(0x005C4AB1) } },
+ { { UINT32_C(0x0696BA83), UINT32_C(0x06603D4F), UINT32_C(0x0885A978),
+ UINT32_C(0x011657F3), UINT32_C(0x0774554D), UINT32_C(0x01806495),
+ UINT32_C(0x01B33254), UINT32_C(0x0A1BB9D6), UINT32_C(0x03A6DF67),
+ UINT32_C(0x03AB9C8C), UINT32_C(0x0737480A), UINT32_C(0x00203D86),
+ UINT32_C(0x04CE906D), UINT32_C(0x0751DBBB), UINT32_C(0x01AB53E1),
+ UINT32_C(0x01405C83), UINT32_C(0x0894C75D), UINT32_C(0x02ACD3EC),
+ UINT32_C(0x02926ACF) },
+ { UINT32_C(0x0E8C01EF), UINT32_C(0x043477F5), UINT32_C(0x068FA361),
+ UINT32_C(0x07FC59F7), UINT32_C(0x04967BAC), UINT32_C(0x0236FCA8),
+ UINT32_C(0x053E4F2C), UINT32_C(0x02BA3E65), UINT32_C(0x05F9F6F0),
+ UINT32_C(0x064247B4), UINT32_C(0x021B5084), UINT32_C(0x0894325C),
+ UINT32_C(0x04EFE79C), UINT32_C(0x0188ED3F), UINT32_C(0x0D4FE809),
+ UINT32_C(0x044BAE94), UINT32_C(0x0C8112AE), UINT32_C(0x05C68229),
+ UINT32_C(0x07D43896) } },
+ { { UINT32_C(0x046C1FB6), UINT32_C(0x077D8036), UINT32_C(0x0295DD8C),
+ UINT32_C(0x04452F28), UINT32_C(0x0B23C464), UINT32_C(0x0644D5BA),
+ UINT32_C(0x05069E01), UINT32_C(0x090DF002), UINT32_C(0x03B40591),
+ UINT32_C(0x01F28172), UINT32_C(0x06905D57), UINT32_C(0x0DF1C74E),
+ UINT32_C(0x05CE4958), UINT32_C(0x079BDE8E), UINT32_C(0x0D3F2F1A),
+ UINT32_C(0x04E07C5F), UINT32_C(0x088FF1FA), UINT32_C(0x05C72030),
+ UINT32_C(0x03BE09B6) },
+ { UINT32_C(0x0A78B572), UINT32_C(0x052D6B4B), UINT32_C(0x091101F1),
+ UINT32_C(0x01EB64B1), UINT32_C(0x0AA87947), UINT32_C(0x01ECBA5D),
+ UINT32_C(0x03E02CC6), UINT32_C(0x0FDA4839), UINT32_C(0x02FF59B8),
+ UINT32_C(0x0CA6ED0F), UINT32_C(0x06C0BD08), UINT32_C(0x0948203F),
+ UINT32_C(0x00417563), UINT32_C(0x03ED5E44), UINT32_C(0x09D9F1D1),
+ UINT32_C(0x043138E9), UINT32_C(0x087C76A9), UINT32_C(0x0436C464),
+ UINT32_C(0x065BC41C) } },
+ { { UINT32_C(0x0878503F), UINT32_C(0x02F87D12), UINT32_C(0x02476646),
+ UINT32_C(0x0245CC6E), UINT32_C(0x0D4C90B6), UINT32_C(0x03F5323B),
+ UINT32_C(0x05B608C2), UINT32_C(0x0E11AA7B), UINT32_C(0x03BBF4CC),
+ UINT32_C(0x0E62F0E5), UINT32_C(0x03FDD83B), UINT32_C(0x01FAF12E),
+ UINT32_C(0x00E02D6E), UINT32_C(0x0404666D), UINT32_C(0x0A39480C),
+ UINT32_C(0x05904EE4), UINT32_C(0x0D422EC7), UINT32_C(0x009272AF),
+ UINT32_C(0x065E518B) },
+ { UINT32_C(0x0947A480), UINT32_C(0x0638CCA2), UINT32_C(0x0B86EFCD),
+ UINT32_C(0x04C5912B), UINT32_C(0x0416F142), UINT32_C(0x066CD9A8),
+ UINT32_C(0x0062F342), UINT32_C(0x030CBA20), UINT32_C(0x0675D320),
+ UINT32_C(0x02C4F492), UINT32_C(0x04263BD8), UINT32_C(0x0B10ED23),
+ UINT32_C(0x00458FD7), UINT32_C(0x064D3804), UINT32_C(0x030CE729),
+ UINT32_C(0x055F1902), UINT32_C(0x005C9288), UINT32_C(0x05B65212),
+ UINT32_C(0x03463ED7) } },
+ { { UINT32_C(0x0002FA40), UINT32_C(0x019C27F1), UINT32_C(0x00CBB750),
+ UINT32_C(0x03DB3435), UINT32_C(0x07286E98), UINT32_C(0x0279AAFF),
+ UINT32_C(0x06D46384), UINT32_C(0x0A49DB6A), UINT32_C(0x0137478E),
+ UINT32_C(0x07036ADC), UINT32_C(0x0156A020), UINT32_C(0x03444CA2),
+ UINT32_C(0x014A059C), UINT32_C(0x062920C4), UINT32_C(0x05340D48),
+ UINT32_C(0x07AB2B40), UINT32_C(0x060E1CBF), UINT32_C(0x06DBC3C7),
+ UINT32_C(0x02A6E451) },
+ { UINT32_C(0x02203C97), UINT32_C(0x0318811D), UINT32_C(0x02528A1B),
+ UINT32_C(0x04016192), UINT32_C(0x002C3086), UINT32_C(0x031D212C),
+ UINT32_C(0x03FC1DA6), UINT32_C(0x0E3A234E), UINT32_C(0x048A2B44),
+ UINT32_C(0x046AB91A), UINT32_C(0x03F8806B), UINT32_C(0x073943DE),
+ UINT32_C(0x02B12570), UINT32_C(0x024DEAC9), UINT32_C(0x08C3B2AA),
+ UINT32_C(0x06910619), UINT32_C(0x01EBE0ED), UINT32_C(0x04FB5E82),
+ UINT32_C(0x068938E9) } },
+ { { UINT32_C(0x06A8409F), UINT32_C(0x03819FA0), UINT32_C(0x04EBCC7D),
+ UINT32_C(0x05295667), UINT32_C(0x00BD47C4), UINT32_C(0x02F397A5),
+ UINT32_C(0x00B133A1), UINT32_C(0x073E4AFA), UINT32_C(0x0760D526),
+ UINT32_C(0x0D372CAA), UINT32_C(0x0068759A), UINT32_C(0x09A7813F),
+ UINT32_C(0x000A0F4E), UINT32_C(0x01EAF02F), UINT32_C(0x09F88085),
+ UINT32_C(0x0117D84A), UINT32_C(0x0B583330), UINT32_C(0x07FFDDE3),
+ UINT32_C(0x00C0B54F) },
+ { UINT32_C(0x0593BC03), UINT32_C(0x05294489), UINT32_C(0x0C95575C),
+ UINT32_C(0x06A16930), UINT32_C(0x07E57953), UINT32_C(0x04258C35),
+ UINT32_C(0x027EF886), UINT32_C(0x09A129B5), UINT32_C(0x034A8854),
+ UINT32_C(0x0BB5AF8F), UINT32_C(0x0469C5BA), UINT32_C(0x000C4849),
+ UINT32_C(0x00CE9665), UINT32_C(0x02759E17), UINT32_C(0x087D763E),
+ UINT32_C(0x03FB717D), UINT32_C(0x0F3FD635), UINT32_C(0x007CA5FC),
+ UINT32_C(0x01D3A8B2) } },
+ { { UINT32_C(0x068172DA), UINT32_C(0x05B9F788), UINT32_C(0x0612E973),
+ UINT32_C(0x0052E050), UINT32_C(0x099B39D0), UINT32_C(0x061F5F0F),
+ UINT32_C(0x0799AF1A), UINT32_C(0x0466C10B), UINT32_C(0x0680E8D3),
+ UINT32_C(0x04361EC0), UINT32_C(0x05210B2E), UINT32_C(0x0DF23AB3),
+ UINT32_C(0x02B3A0B2), UINT32_C(0x0380194E), UINT32_C(0x09D77AFB),
+ UINT32_C(0x06BCE4AB), UINT32_C(0x05EAD2E7), UINT32_C(0x02DD9B74),
+ UINT32_C(0x033D66F2) },
+ { UINT32_C(0x0BF1C993), UINT32_C(0x04E38933), UINT32_C(0x02FC4FAF),
+ UINT32_C(0x0461AE62), UINT32_C(0x0F6D1B38), UINT32_C(0x021B47B4),
+ UINT32_C(0x01F061C9), UINT32_C(0x051CC234), UINT32_C(0x01C8E186),
+ UINT32_C(0x001C7EF9), UINT32_C(0x0664E0E2), UINT32_C(0x048E8CC7),
+ UINT32_C(0x015C9670), UINT32_C(0x0481B87A), UINT32_C(0x05BCAD05),
+ UINT32_C(0x003B38E6), UINT32_C(0x00886CA1), UINT32_C(0x00B0D706),
+ UINT32_C(0x026557A5) } },
+ { { UINT32_C(0x05F0E5DA), UINT32_C(0x03682274), UINT32_C(0x0F4E352F),
+ UINT32_C(0x0105AE83), UINT32_C(0x0A820E71), UINT32_C(0x022C5CEC),
+ UINT32_C(0x03DD2CFC), UINT32_C(0x0298E61A), UINT32_C(0x00120917),
+ UINT32_C(0x0B0B64DF), UINT32_C(0x03C1333E), UINT32_C(0x03C5D41B),
+ UINT32_C(0x04B5D215), UINT32_C(0x0187971D), UINT32_C(0x0389EAD7),
+ UINT32_C(0x03CFCCE2), UINT32_C(0x063F13FF), UINT32_C(0x0652C165),
+ UINT32_C(0x07742EFC) },
+ { UINT32_C(0x0931C0F0), UINT32_C(0x018F45E5), UINT32_C(0x0C4C756D),
+ UINT32_C(0x0537A469), UINT32_C(0x0433FB52), UINT32_C(0x0754DECC),
+ UINT32_C(0x04D896F7), UINT32_C(0x04335219), UINT32_C(0x073BBC0E),
+ UINT32_C(0x083BA2C0), UINT32_C(0x012D3B9E), UINT32_C(0x023EABD5),
+ UINT32_C(0x04475CF9), UINT32_C(0x07A0DA39), UINT32_C(0x088DDF48),
+ UINT32_C(0x002FFFDF), UINT32_C(0x0D8B7000), UINT32_C(0x06504250),
+ UINT32_C(0x00F1A818) } },
+ { { UINT32_C(0x052228CC), UINT32_C(0x06FA4348), UINT32_C(0x0F049E30),
+ UINT32_C(0x0713CA99), UINT32_C(0x0E5D39FE), UINT32_C(0x0057B8DA),
+ UINT32_C(0x003125E1), UINT32_C(0x0CC15492), UINT32_C(0x07700BE8),
+ UINT32_C(0x08CFE785), UINT32_C(0x00CEB57F), UINT32_C(0x0F478327),
+ UINT32_C(0x05A00945), UINT32_C(0x0490F14E), UINT32_C(0x025BA378),
+ UINT32_C(0x060ED998), UINT32_C(0x01B249B5), UINT32_C(0x0023BC4C),
+ UINT32_C(0x04DEDEC8) },
+ { UINT32_C(0x0BA1E090), UINT32_C(0x027EBAC8), UINT32_C(0x0DD6FE71),
+ UINT32_C(0x01F0ADDC), UINT32_C(0x0549F634), UINT32_C(0x06BE8416),
+ UINT32_C(0x02F156E2), UINT32_C(0x0A531A53), UINT32_C(0x00AFBE73),
+ UINT32_C(0x0FFF18EB), UINT32_C(0x0020C1DC), UINT32_C(0x0F409F61),
+ UINT32_C(0x04E3859C), UINT32_C(0x015D5ECF), UINT32_C(0x03B3F268),
+ UINT32_C(0x0288B503), UINT32_C(0x03A276BD), UINT32_C(0x0286EE9C),
+ UINT32_C(0x03166F91) } },
+ { { UINT32_C(0x0F1CAC2C), UINT32_C(0x035777A8), UINT32_C(0x0AF34113),
+ UINT32_C(0x050DD855), UINT32_C(0x0B6BC9C1), UINT32_C(0x07010D91),
+ UINT32_C(0x0452008D), UINT32_C(0x0471A3DA), UINT32_C(0x05830FDC),
+ UINT32_C(0x0F222BBE), UINT32_C(0x04848384), UINT32_C(0x049CFD4D),
+ UINT32_C(0x01817D66), UINT32_C(0x0724627E), UINT32_C(0x082270B8),
+ UINT32_C(0x07ED5A0F), UINT32_C(0x0EEA015A), UINT32_C(0x0700F77E),
+ UINT32_C(0x007E36E1) },
+ { UINT32_C(0x09244F78), UINT32_C(0x049DAC0A), UINT32_C(0x0573D581),
+ UINT32_C(0x001D1B4C), UINT32_C(0x0F0116EB), UINT32_C(0x03CFFD42),
+ UINT32_C(0x043FFF66), UINT32_C(0x048523A0), UINT32_C(0x0671CEF3),
+ UINT32_C(0x0EC2D7AF), UINT32_C(0x0049EBD0), UINT32_C(0x0F4034B6),
+ UINT32_C(0x05C34B54), UINT32_C(0x025E680B), UINT32_C(0x0D2C5BEA),
+ UINT32_C(0x06F544F6), UINT32_C(0x0B0CFA5A), UINT32_C(0x018276AE),
+ UINT32_C(0x077D6B16) } },
+ },
+ {
+ { { UINT32_C(0x00E10587), UINT32_C(0x01885D11), UINT32_C(0x00A74863),
+ UINT32_C(0x02F34C13), UINT32_C(0x0BD4B6A2), UINT32_C(0x00E26C23),
+ UINT32_C(0x07F483FF), UINT32_C(0x0A97D9DC), UINT32_C(0x02338A61),
+ UINT32_C(0x07F72547), UINT32_C(0x03535AFC), UINT32_C(0x0B8E96B4),
+ UINT32_C(0x001E804D), UINT32_C(0x03BD1DFE), UINT32_C(0x0A6ED29A),
+ UINT32_C(0x0634588A), UINT32_C(0x0F0F6D32), UINT32_C(0x0117DDE8),
+ UINT32_C(0x037107C5) },
+ { UINT32_C(0x0BF698BD), UINT32_C(0x0671195E), UINT32_C(0x0E9DC570),
+ UINT32_C(0x052CBC52), UINT32_C(0x0C08C8ED), UINT32_C(0x04213081),
+ UINT32_C(0x00A08E33), UINT32_C(0x0A4BC1ED), UINT32_C(0x00B396EB),
+ UINT32_C(0x0FF34D08), UINT32_C(0x04A4BDD9), UINT32_C(0x0A6F615E),
+ UINT32_C(0x0534B5A0), UINT32_C(0x0057D6A7), UINT32_C(0x0F6CE02C),
+ UINT32_C(0x06F6315B), UINT32_C(0x0D666709), UINT32_C(0x050AF998),
+ UINT32_C(0x006F0E3F) } },
+ { { UINT32_C(0x06965640), UINT32_C(0x0081356B), UINT32_C(0x0F41E038),
+ UINT32_C(0x06713218), UINT32_C(0x0FB9E806), UINT32_C(0x0121D001),
+ UINT32_C(0x07B97EDD), UINT32_C(0x0CDDEFA2), UINT32_C(0x0585D94D),
+ UINT32_C(0x065F4CD7), UINT32_C(0x03CFC91B), UINT32_C(0x06B603EF),
+ UINT32_C(0x07128C67), UINT32_C(0x030595F0), UINT32_C(0x0E51FB71),
+ UINT32_C(0x06217FBE), UINT32_C(0x0B730732), UINT32_C(0x06277C1D),
+ UINT32_C(0x04AE17C6) },
+ { UINT32_C(0x0CFB1D0D), UINT32_C(0x053AA14E), UINT32_C(0x0442F9BE),
+ UINT32_C(0x0786EEC1), UINT32_C(0x0EF775DF), UINT32_C(0x07A66D5B),
+ UINT32_C(0x032CDF98), UINT32_C(0x0CA3E106), UINT32_C(0x07042EBA),
+ UINT32_C(0x00FD51A1), UINT32_C(0x02B743F2), UINT32_C(0x0D214308),
+ UINT32_C(0x03293BD7), UINT32_C(0x0635DC49), UINT32_C(0x0EB86870),
+ UINT32_C(0x03EB73BF), UINT32_C(0x07F02587), UINT32_C(0x0017A824),
+ UINT32_C(0x01F012DD) } },
+ { { UINT32_C(0x0E0BF039), UINT32_C(0x003B2CD3), UINT32_C(0x0C2C0F48),
+ UINT32_C(0x039AED35), UINT32_C(0x044C7CCC), UINT32_C(0x0364D078),
+ UINT32_C(0x02C04409), UINT32_C(0x0CAEF9C4), UINT32_C(0x05C37F4A),
+ UINT32_C(0x0D99EE77), UINT32_C(0x0200140A), UINT32_C(0x0A3BBBDE),
+ UINT32_C(0x041E7C9A), UINT32_C(0x0371B744), UINT32_C(0x05A165FF),
+ UINT32_C(0x05A7216A), UINT32_C(0x0A9CE444), UINT32_C(0x03DD4951),
+ UINT32_C(0x031EC3D2) },
+ { UINT32_C(0x08EAF6EB), UINT32_C(0x0703CD67), UINT32_C(0x0DEBC6FB),
+ UINT32_C(0x079F8F47), UINT32_C(0x090D3A5B), UINT32_C(0x05FF4EFE),
+ UINT32_C(0x05A2BC42), UINT32_C(0x006C3961), UINT32_C(0x00795219),
+ UINT32_C(0x0FF8315E), UINT32_C(0x05BD4244), UINT32_C(0x02EEA381),
+ UINT32_C(0x02022F89), UINT32_C(0x07878373), UINT32_C(0x084B3FA1),
+ UINT32_C(0x0715713B), UINT32_C(0x0EF55815), UINT32_C(0x0748BA61),
+ UINT32_C(0x0445AEE6) } },
+ { { UINT32_C(0x0DCBF5E2), UINT32_C(0x03557A9E), UINT32_C(0x063D2A67),
+ UINT32_C(0x00EFE9F6), UINT32_C(0x09FA350B), UINT32_C(0x03896396),
+ UINT32_C(0x01F8036E), UINT32_C(0x0DC0F10D), UINT32_C(0x02B56329),
+ UINT32_C(0x02504A0F), UINT32_C(0x063A7100), UINT32_C(0x0FA5A9E7),
+ UINT32_C(0x07665FD9), UINT32_C(0x05DE4FB8), UINT32_C(0x00484D0C),
+ UINT32_C(0x03AEE4FB), UINT32_C(0x046B10E6), UINT32_C(0x04D5E0D6),
+ UINT32_C(0x01F835F4) },
+ { UINT32_C(0x047D2B4B), UINT32_C(0x05847634), UINT32_C(0x0C0A675C),
+ UINT32_C(0x00120157), UINT32_C(0x07AF8F0E), UINT32_C(0x0251A99B),
+ UINT32_C(0x00CEE4D0), UINT32_C(0x07351889), UINT32_C(0x0621596F),
+ UINT32_C(0x00C5618B), UINT32_C(0x066E65D2), UINT32_C(0x049D9FBE),
+ UINT32_C(0x01E37BCF), UINT32_C(0x01C629C9), UINT32_C(0x0EC1F561),
+ UINT32_C(0x02AFE546), UINT32_C(0x0005751E), UINT32_C(0x018C42B2),
+ UINT32_C(0x01EAA03C) } },
+ { { UINT32_C(0x0D959BD9), UINT32_C(0x038EEBBB), UINT32_C(0x08419A01),
+ UINT32_C(0x05F1CCBE), UINT32_C(0x03171501), UINT32_C(0x07C18C55),
+ UINT32_C(0x035306D9), UINT32_C(0x011DBDEA), UINT32_C(0x036E5963),
+ UINT32_C(0x090BCEBA), UINT32_C(0x01350854), UINT32_C(0x0BB28AF5),
+ UINT32_C(0x04F74928), UINT32_C(0x0330FF01), UINT32_C(0x095BA009),
+ UINT32_C(0x0578BFB6), UINT32_C(0x0FCF0801), UINT32_C(0x03302535),
+ UINT32_C(0x06BFF304) },
+ { UINT32_C(0x0384E611), UINT32_C(0x00AD5348), UINT32_C(0x0E493BE6),
+ UINT32_C(0x03CA4CDB), UINT32_C(0x0C4D1BD5), UINT32_C(0x027B8CE4),
+ UINT32_C(0x02E5B4CB), UINT32_C(0x0707AF6D), UINT32_C(0x06A39971),
+ UINT32_C(0x0BA42E4C), UINT32_C(0x0755E74C), UINT32_C(0x04AD6360),
+ UINT32_C(0x068A6F0D), UINT32_C(0x023144DE), UINT32_C(0x07375993),
+ UINT32_C(0x02780B3A), UINT32_C(0x0E492027), UINT32_C(0x05808694),
+ UINT32_C(0x07431A53) } },
+ { { UINT32_C(0x010FBD04), UINT32_C(0x019723AA), UINT32_C(0x025CF109),
+ UINT32_C(0x03F3A3A7), UINT32_C(0x0D9D8E3F), UINT32_C(0x02F7C4B0),
+ UINT32_C(0x03DF7DF6), UINT32_C(0x0B60F06D), UINT32_C(0x02A5D26D),
+ UINT32_C(0x0C5F86A4), UINT32_C(0x06E7FCD9), UINT32_C(0x0DEF388F),
+ UINT32_C(0x05AC83A6), UINT32_C(0x0217A751), UINT32_C(0x00401D85),
+ UINT32_C(0x075A320E), UINT32_C(0x01AE8195), UINT32_C(0x06F4F327),
+ UINT32_C(0x04C77D2F) },
+ { UINT32_C(0x09493BE8), UINT32_C(0x00A14C7B), UINT32_C(0x091C8FF9),
+ UINT32_C(0x01DEAA22), UINT32_C(0x0AB4BA27), UINT32_C(0x0562E012),
+ UINT32_C(0x07519BAB), UINT32_C(0x062D9AAA), UINT32_C(0x058B7863),
+ UINT32_C(0x08A2419C), UINT32_C(0x035D8277), UINT32_C(0x0F5C3CF3),
+ UINT32_C(0x03527C6B), UINT32_C(0x00F3B9E0), UINT32_C(0x0EF25B4A),
+ UINT32_C(0x0127A8B4), UINT32_C(0x0CE17BD2), UINT32_C(0x0195E53E),
+ UINT32_C(0x071B9B4C) } },
+ { { UINT32_C(0x0DAA2FB7), UINT32_C(0x021B0EB2), UINT32_C(0x0B55E936),
+ UINT32_C(0x057A20CC), UINT32_C(0x01398941), UINT32_C(0x06E0BA5C),
+ UINT32_C(0x07DEDA3A), UINT32_C(0x00B1377E), UINT32_C(0x008093F5),
+ UINT32_C(0x00F8C281), UINT32_C(0x05D4332E), UINT32_C(0x0CF54E5F),
+ UINT32_C(0x039D7F62), UINT32_C(0x0699AB5B), UINT32_C(0x05FE8914),
+ UINT32_C(0x01C38070), UINT32_C(0x0685A0AC), UINT32_C(0x0104BEEE),
+ UINT32_C(0x06E340C1) },
+ { UINT32_C(0x0FDAA949), UINT32_C(0x02A92433), UINT32_C(0x04E882FB),
+ UINT32_C(0x0435EA3D), UINT32_C(0x0CFC4BD1), UINT32_C(0x065698D5),
+ UINT32_C(0x02B61BEC), UINT32_C(0x0A7025E9), UINT32_C(0x06C77C84),
+ UINT32_C(0x066340BA), UINT32_C(0x07C0B02F), UINT32_C(0x0F9B4BCA),
+ UINT32_C(0x0207D1CA), UINT32_C(0x061D80D9), UINT32_C(0x061524CC),
+ UINT32_C(0x03F6A9F8), UINT32_C(0x094B6D53), UINT32_C(0x017C53E1),
+ UINT32_C(0x00BC771D) } },
+ { { UINT32_C(0x0C8D6167), UINT32_C(0x0171F9BD), UINT32_C(0x05943DEC),
+ UINT32_C(0x01837B9B), UINT32_C(0x06E46FBD), UINT32_C(0x050C893D),
+ UINT32_C(0x0034F50C), UINT32_C(0x0E98EEDA), UINT32_C(0x06603ADA),
+ UINT32_C(0x0FF3362D), UINT32_C(0x023406A4), UINT32_C(0x03DC7095),
+ UINT32_C(0x03BCCC93), UINT32_C(0x033BDFE7), UINT32_C(0x0AA65D81),
+ UINT32_C(0x0739E2AF), UINT32_C(0x03455112), UINT32_C(0x06643DC0),
+ UINT32_C(0x020DF18F) },
+ { UINT32_C(0x084BF04E), UINT32_C(0x024B7756), UINT32_C(0x059E51F9),
+ UINT32_C(0x05998215), UINT32_C(0x03684ACA), UINT32_C(0x065BD6DC),
+ UINT32_C(0x03075ACB), UINT32_C(0x01AD9C9A), UINT32_C(0x07375334),
+ UINT32_C(0x01731A12), UINT32_C(0x000384D3), UINT32_C(0x02632FF6),
+ UINT32_C(0x0023BB3A), UINT32_C(0x0348AF93), UINT32_C(0x088B02BB),
+ UINT32_C(0x02C7DE6E), UINT32_C(0x0933F326), UINT32_C(0x00B1B61E),
+ UINT32_C(0x076AC60E) } },
+ { { UINT32_C(0x0757C756), UINT32_C(0x05545A21), UINT32_C(0x018FFA93),
+ UINT32_C(0x06C9A78F), UINT32_C(0x02C61841), UINT32_C(0x040A1739),
+ UINT32_C(0x04441B1D), UINT32_C(0x052E0E81), UINT32_C(0x07E14C4D),
+ UINT32_C(0x0FFFC0D5), UINT32_C(0x03072E2E), UINT32_C(0x007584A9),
+ UINT32_C(0x01259E6D), UINT32_C(0x002D25F5), UINT32_C(0x0C519B94),
+ UINT32_C(0x01BB1C14), UINT32_C(0x02CEB824), UINT32_C(0x02BBBEA4),
+ UINT32_C(0x035E112A) },
+ { UINT32_C(0x0288CF7B), UINT32_C(0x0045C5C7), UINT32_C(0x002D8D8C),
+ UINT32_C(0x03BE5B42), UINT32_C(0x0A81E4C6), UINT32_C(0x0141578F),
+ UINT32_C(0x033F7AC2), UINT32_C(0x0EE71541), UINT32_C(0x067EAD7B),
+ UINT32_C(0x07E75F23), UINT32_C(0x011AF108), UINT32_C(0x047CA170),
+ UINT32_C(0x05308227), UINT32_C(0x054879D4), UINT32_C(0x0A37B132),
+ UINT32_C(0x00E6D1CA), UINT32_C(0x0629367A), UINT32_C(0x03276C5F),
+ UINT32_C(0x004CBC63) } },
+ { { UINT32_C(0x00CF69E7), UINT32_C(0x0584FC9D), UINT32_C(0x06952F73),
+ UINT32_C(0x0281D51C), UINT32_C(0x037663C6), UINT32_C(0x0537F046),
+ UINT32_C(0x0725FFD4), UINT32_C(0x0C66B9FC), UINT32_C(0x049A3EDF),
+ UINT32_C(0x0F4FB830), UINT32_C(0x06728E50), UINT32_C(0x07B188F6),
+ UINT32_C(0x021C067A), UINT32_C(0x06F06BE8), UINT32_C(0x00AA347B),
+ UINT32_C(0x031AABF8), UINT32_C(0x03347446), UINT32_C(0x04B62373),
+ UINT32_C(0x043D128D) },
+ { UINT32_C(0x02AE7427), UINT32_C(0x00F73AC9), UINT32_C(0x0095D833),
+ UINT32_C(0x00E6005C), UINT32_C(0x007FD8B7), UINT32_C(0x074C2204),
+ UINT32_C(0x00283649), UINT32_C(0x084EDD51), UINT32_C(0x05AC7321),
+ UINT32_C(0x08C40328), UINT32_C(0x04BFB5EF), UINT32_C(0x0A555FE0),
+ UINT32_C(0x04C70C7C), UINT32_C(0x076D0055), UINT32_C(0x0425B2E6),
+ UINT32_C(0x029D910F), UINT32_C(0x0B0A51DB), UINT32_C(0x04B38F9B),
+ UINT32_C(0x01028D80) } },
+ { { UINT32_C(0x0F3DE4D2), UINT32_C(0x06047E27), UINT32_C(0x03505298),
+ UINT32_C(0x062523ED), UINT32_C(0x0F0D4A9F), UINT32_C(0x0150EF42),
+ UINT32_C(0x056CBCAD), UINT32_C(0x0B36A628), UINT32_C(0x071A352A),
+ UINT32_C(0x0D7A2CB8), UINT32_C(0x050FEDFC), UINT32_C(0x02BAC823),
+ UINT32_C(0x010EDF77), UINT32_C(0x0459668A), UINT32_C(0x04041659),
+ UINT32_C(0x07432BB7), UINT32_C(0x0F9651D8), UINT32_C(0x01999DE2),
+ UINT32_C(0x00CBECA1) },
+ { UINT32_C(0x06A2607F), UINT32_C(0x06DC83E9), UINT32_C(0x005B1A08),
+ UINT32_C(0x05B9405C), UINT32_C(0x091E04D3), UINT32_C(0x0546E232),
+ UINT32_C(0x0566FE22), UINT32_C(0x0695BB9A), UINT32_C(0x0074A612),
+ UINT32_C(0x0E9787A0), UINT32_C(0x077B1860), UINT32_C(0x05404661),
+ UINT32_C(0x00184991), UINT32_C(0x02A1C038), UINT32_C(0x0A57F0B8),
+ UINT32_C(0x0382A987), UINT32_C(0x0691AC01), UINT32_C(0x02D8A8A9),
+ UINT32_C(0x05A19B11) } },
+ { { UINT32_C(0x081DC2A6), UINT32_C(0x017A4663), UINT32_C(0x0209D21F),
+ UINT32_C(0x06A6AA7F), UINT32_C(0x051CC44C), UINT32_C(0x000D763F),
+ UINT32_C(0x034EFD90), UINT32_C(0x0DEE4042), UINT32_C(0x07CBAFFB),
+ UINT32_C(0x082C34D9), UINT32_C(0x02EB3FE5), UINT32_C(0x0BF15295),
+ UINT32_C(0x027D4089), UINT32_C(0x056DBCC8), UINT32_C(0x024595A7),
+ UINT32_C(0x03EC08BE), UINT32_C(0x057085E2), UINT32_C(0x017E7356),
+ UINT32_C(0x049CE745) },
+ { UINT32_C(0x0123BA29), UINT32_C(0x0045804E), UINT32_C(0x08DEDF0E),
+ UINT32_C(0x00CB57D1), UINT32_C(0x0F61E577), UINT32_C(0x06EB6B79),
+ UINT32_C(0x05E3EED1), UINT32_C(0x09CB4DCD), UINT32_C(0x05DAE17F),
+ UINT32_C(0x034F393E), UINT32_C(0x03F5164C), UINT32_C(0x05F3C4A2),
+ UINT32_C(0x0708CC05), UINT32_C(0x04F2CAC7), UINT32_C(0x0798DD7C),
+ UINT32_C(0x0513331D), UINT32_C(0x004B3A41), UINT32_C(0x00801443),
+ UINT32_C(0x0196B762) } },
+ { { UINT32_C(0x0356B52C), UINT32_C(0x03557744), UINT32_C(0x050104FE),
+ UINT32_C(0x069B4687), UINT32_C(0x0337937D), UINT32_C(0x018C3F4F),
+ UINT32_C(0x00568175), UINT32_C(0x01EE408E), UINT32_C(0x04092DE8),
+ UINT32_C(0x05E59E83), UINT32_C(0x0299816F), UINT32_C(0x05556DCC),
+ UINT32_C(0x038621D8), UINT32_C(0x0278A753), UINT32_C(0x05BC9211),
+ UINT32_C(0x009E162C), UINT32_C(0x0A3409DC), UINT32_C(0x04076EA9),
+ UINT32_C(0x0464CEC0) },
+ { UINT32_C(0x0A659158), UINT32_C(0x022396D5), UINT32_C(0x08424377),
+ UINT32_C(0x0054703B), UINT32_C(0x0D2722F5), UINT32_C(0x03BAEB8A),
+ UINT32_C(0x04B65383), UINT32_C(0x07997DDA), UINT32_C(0x07F6A3B2),
+ UINT32_C(0x0BAFF348), UINT32_C(0x0299F9D9), UINT32_C(0x0B97AA04),
+ UINT32_C(0x02BA4DB8), UINT32_C(0x0696475F), UINT32_C(0x0B68D089),
+ UINT32_C(0x0472CB9F), UINT32_C(0x08CACFAE), UINT32_C(0x028807A6),
+ UINT32_C(0x009288EF) } },
+ { { UINT32_C(0x0ED9CDF5), UINT32_C(0x00B31C4E), UINT32_C(0x0C549857),
+ UINT32_C(0x02D7F964), UINT32_C(0x074F9F98), UINT32_C(0x0792DF5F),
+ UINT32_C(0x020ED722), UINT32_C(0x0AA8C982), UINT32_C(0x02A2408C),
+ UINT32_C(0x053CDF30), UINT32_C(0x01CF47E5), UINT32_C(0x08E3FF2F),
+ UINT32_C(0x0333087A), UINT32_C(0x028090D6), UINT32_C(0x032F6CA0),
+ UINT32_C(0x02CF642E), UINT32_C(0x0DAB4498), UINT32_C(0x04A66B66),
+ UINT32_C(0x07248BCE) },
+ { UINT32_C(0x092B1FE6), UINT32_C(0x02AD6EEE), UINT32_C(0x0EB5963E),
+ UINT32_C(0x0621B6BD), UINT32_C(0x04A1A8EF), UINT32_C(0x0374D40D),
+ UINT32_C(0x0573791F), UINT32_C(0x0DED8513), UINT32_C(0x03AEE0F5),
+ UINT32_C(0x03420B85), UINT32_C(0x04366099), UINT32_C(0x087C7CA7),
+ UINT32_C(0x00B9ADB9), UINT32_C(0x056E8EBA), UINT32_C(0x0E532676),
+ UINT32_C(0x05D27A22), UINT32_C(0x0554F4E5), UINT32_C(0x0474B581),
+ UINT32_C(0x02A6694F) } },
+ { { UINT32_C(0x080DE633), UINT32_C(0x0639306E), UINT32_C(0x0CA4F76E),
+ UINT32_C(0x05BB3DCB), UINT32_C(0x06DA081A), UINT32_C(0x052EA9E2),
+ UINT32_C(0x017AF437), UINT32_C(0x07D25D54), UINT32_C(0x0772DE75),
+ UINT32_C(0x05670178), UINT32_C(0x06E81696), UINT32_C(0x0D28F3A1),
+ UINT32_C(0x07AF022A), UINT32_C(0x07B0D67B), UINT32_C(0x04C17950),
+ UINT32_C(0x001B706E), UINT32_C(0x04CE5637), UINT32_C(0x04CE1F2F),
+ UINT32_C(0x0211C385) },
+ { UINT32_C(0x0E5D0D74), UINT32_C(0x0411D39E), UINT32_C(0x06137F67),
+ UINT32_C(0x00487846), UINT32_C(0x01B15D1C), UINT32_C(0x02B65C31),
+ UINT32_C(0x06027C03), UINT32_C(0x01F15577), UINT32_C(0x011F0564),
+ UINT32_C(0x066BA415), UINT32_C(0x00520E15), UINT32_C(0x01F82222),
+ UINT32_C(0x07F8C048), UINT32_C(0x05A09F41), UINT32_C(0x0BBA92E8),
+ UINT32_C(0x017E3648), UINT32_C(0x0861CC16), UINT32_C(0x07A9DAF6),
+ UINT32_C(0x05F2C6E5) } },
+ { { UINT32_C(0x04DA7708), UINT32_C(0x057D4066), UINT32_C(0x01F6A8A0),
+ UINT32_C(0x00EE18FE), UINT32_C(0x05BB3FCD), UINT32_C(0x071CB79F),
+ UINT32_C(0x038BBCE0), UINT32_C(0x0AAFE87E), UINT32_C(0x0245536B),
+ UINT32_C(0x0D0401C6), UINT32_C(0x027984FD), UINT32_C(0x0064D51F),
+ UINT32_C(0x04DCF2A2), UINT32_C(0x037E99AD), UINT32_C(0x03487C33),
+ UINT32_C(0x068353F1), UINT32_C(0x0BA863FC), UINT32_C(0x00721339),
+ UINT32_C(0x0754D195) },
+ { UINT32_C(0x09031706), UINT32_C(0x0327DD4E), UINT32_C(0x05DDA163),
+ UINT32_C(0x03F893AE), UINT32_C(0x0F1F3959), UINT32_C(0x02EC658A),
+ UINT32_C(0x05A438AD), UINT32_C(0x0AE93F30), UINT32_C(0x01D8B56B),
+ UINT32_C(0x09592309), UINT32_C(0x0189BB66), UINT32_C(0x050E8D52),
+ UINT32_C(0x0526168D), UINT32_C(0x07FD307D), UINT32_C(0x08A4C7BC),
+ UINT32_C(0x03B12944), UINT32_C(0x08329BC8), UINT32_C(0x02A4A1CE),
+ UINT32_C(0x0087B284) } },
+ },
+ {
+ { { UINT32_C(0x01C86157), UINT32_C(0x0017ED5F), UINT32_C(0x079948D2),
+ UINT32_C(0x02FD6755), UINT32_C(0x0A5E2B5C), UINT32_C(0x00395EB0),
+ UINT32_C(0x070A6ECC), UINT32_C(0x031E307B), UINT32_C(0x070DA4B9),
+ UINT32_C(0x0166FB85), UINT32_C(0x02AF3210), UINT32_C(0x079379FF),
+ UINT32_C(0x010504D3), UINT32_C(0x022DFB7B), UINT32_C(0x0C019CF3),
+ UINT32_C(0x05E0727A), UINT32_C(0x0CE73CB9), UINT32_C(0x005CF0C7),
+ UINT32_C(0x039AD397) },
+ { UINT32_C(0x08E15F36), UINT32_C(0x04E08562), UINT32_C(0x0EC12012),
+ UINT32_C(0x009F68C4), UINT32_C(0x0733E4B1), UINT32_C(0x014872C8),
+ UINT32_C(0x0490CCCC), UINT32_C(0x0E53957D), UINT32_C(0x05CD4F2D),
+ UINT32_C(0x082FD79D), UINT32_C(0x05F2B6D8), UINT32_C(0x0C7600B1),
+ UINT32_C(0x02D81D79), UINT32_C(0x007520D1), UINT32_C(0x09EEC681),
+ UINT32_C(0x04D6FB1B), UINT32_C(0x0641B032), UINT32_C(0x0283E5C0),
+ UINT32_C(0x072A39F3) } },
+ { { UINT32_C(0x01C9C2EC), UINT32_C(0x03A87BAF), UINT32_C(0x056E06F3),
+ UINT32_C(0x02AA4CD5), UINT32_C(0x0D64394D), UINT32_C(0x044B2642),
+ UINT32_C(0x018E8ECB), UINT32_C(0x02C6B29E), UINT32_C(0x00B5D0E1),
+ UINT32_C(0x0795603C), UINT32_C(0x027FEAC7), UINT32_C(0x07400535),
+ UINT32_C(0x04BD90C2), UINT32_C(0x0212CC37), UINT32_C(0x018B9D6C),
+ UINT32_C(0x05FC9D53), UINT32_C(0x03C7248E), UINT32_C(0x038A1FEB),
+ UINT32_C(0x06C809CE) },
+ { UINT32_C(0x06F1CACC), UINT32_C(0x0758DFC1), UINT32_C(0x019C0D17),
+ UINT32_C(0x0749CD61), UINT32_C(0x00C0724E), UINT32_C(0x0667F861),
+ UINT32_C(0x03CDAF01), UINT32_C(0x0DE66325), UINT32_C(0x0767BD47),
+ UINT32_C(0x0A1FDF93), UINT32_C(0x04E66E27), UINT32_C(0x004977BC),
+ UINT32_C(0x05EE6515), UINT32_C(0x018DEC59), UINT32_C(0x03B99628),
+ UINT32_C(0x02B69F3F), UINT32_C(0x019CC516), UINT32_C(0x07CB4623),
+ UINT32_C(0x0353C229) } },
+ { { UINT32_C(0x05A2D6F0), UINT32_C(0x04982642), UINT32_C(0x088CE54F),
+ UINT32_C(0x06602A66), UINT32_C(0x0A17C84E), UINT32_C(0x02BE4DCE),
+ UINT32_C(0x0718C264), UINT32_C(0x0FDCB2D1), UINT32_C(0x01F7AC59),
+ UINT32_C(0x0E4C2C6C), UINT32_C(0x01B5B9D3), UINT32_C(0x0CCEB9E5),
+ UINT32_C(0x04C7FB08), UINT32_C(0x04600748), UINT32_C(0x09F19FD9),
+ UINT32_C(0x011C0141), UINT32_C(0x0A08392D), UINT32_C(0x07099321),
+ UINT32_C(0x075F26A3) },
+ { UINT32_C(0x0AF35FA1), UINT32_C(0x01CA261B), UINT32_C(0x0FF7838D),
+ UINT32_C(0x00432E0D), UINT32_C(0x08296922), UINT32_C(0x077D0499),
+ UINT32_C(0x06A4988A), UINT32_C(0x0D91BD7B), UINT32_C(0x007D4895),
+ UINT32_C(0x01A77EB2), UINT32_C(0x0491B2C9), UINT32_C(0x07D6BB4E),
+ UINT32_C(0x065BB828), UINT32_C(0x05D28C77), UINT32_C(0x034C1831),
+ UINT32_C(0x03111000), UINT32_C(0x048A3F8F), UINT32_C(0x007D19EE),
+ UINT32_C(0x006FAC9D) } },
+ { { UINT32_C(0x0719C87C), UINT32_C(0x07385BC9), UINT32_C(0x01F42502),
+ UINT32_C(0x074D4561), UINT32_C(0x02CA79B8), UINT32_C(0x01BE905A),
+ UINT32_C(0x044E03DC), UINT32_C(0x05034A1A), UINT32_C(0x012B4964),
+ UINT32_C(0x0BF284CE), UINT32_C(0x0080C91A), UINT32_C(0x0B4EE205),
+ UINT32_C(0x0121E876), UINT32_C(0x04C7D981), UINT32_C(0x09D6F0D5),
+ UINT32_C(0x011438CC), UINT32_C(0x0906A777), UINT32_C(0x05FD89D1),
+ UINT32_C(0x01D7C3AC) },
+ { UINT32_C(0x0392D834), UINT32_C(0x0199066B), UINT32_C(0x0E53AECD),
+ UINT32_C(0x0279A7E5), UINT32_C(0x0E8B313A), UINT32_C(0x04F8A2AF),
+ UINT32_C(0x062A274F), UINT32_C(0x0869ED62), UINT32_C(0x01C4081F),
+ UINT32_C(0x0DD27618), UINT32_C(0x0093ED89), UINT32_C(0x053869B6),
+ UINT32_C(0x07CB8D0C), UINT32_C(0x00D79FE6), UINT32_C(0x04A20332),
+ UINT32_C(0x03366324), UINT32_C(0x0C0B74C3), UINT32_C(0x070C316E),
+ UINT32_C(0x066AD76F) } },
+ { { UINT32_C(0x011FA55B), UINT32_C(0x0775F5E8), UINT32_C(0x0C7BF6F4),
+ UINT32_C(0x07FCBE6F), UINT32_C(0x021BE3C2), UINT32_C(0x0017D919),
+ UINT32_C(0x01644455), UINT32_C(0x0AEE3FD7), UINT32_C(0x0259DD5E),
+ UINT32_C(0x002EC22F), UINT32_C(0x00D308F5), UINT32_C(0x038F6CBC),
+ UINT32_C(0x04FDED85), UINT32_C(0x001A53FA), UINT32_C(0x03E09FE9),
+ UINT32_C(0x0312E74F), UINT32_C(0x09B20907), UINT32_C(0x078CC1DB),
+ UINT32_C(0x066D9E8D) },
+ { UINT32_C(0x08C7A5B7), UINT32_C(0x038B0D82), UINT32_C(0x063E4030),
+ UINT32_C(0x06CE3A75), UINT32_C(0x0488AD55), UINT32_C(0x0054AAAA),
+ UINT32_C(0x044F068C), UINT32_C(0x0CCE69AA), UINT32_C(0x014EF6E0),
+ UINT32_C(0x068C0346), UINT32_C(0x01443327), UINT32_C(0x0A416B3D),
+ UINT32_C(0x04EB25A7), UINT32_C(0x00B6E80F), UINT32_C(0x0819D7FD),
+ UINT32_C(0x061AFFF1), UINT32_C(0x070E8C81), UINT32_C(0x061C5530),
+ UINT32_C(0x0473CB02) } },
+ { { UINT32_C(0x08D8BE36), UINT32_C(0x057DE7D1), UINT32_C(0x06025FA9),
+ UINT32_C(0x0039A5D5), UINT32_C(0x00FD02EF), UINT32_C(0x02EE7913),
+ UINT32_C(0x04E5E224), UINT32_C(0x052DC251), UINT32_C(0x04138D66),
+ UINT32_C(0x09FAF17A), UINT32_C(0x030D57A1), UINT32_C(0x08B8F06A),
+ UINT32_C(0x01D015A2), UINT32_C(0x0153FCA9), UINT32_C(0x0C54D5DF),
+ UINT32_C(0x00BAAE4A), UINT32_C(0x0940A0FA), UINT32_C(0x038292EA),
+ UINT32_C(0x02C97BC9) },
+ { UINT32_C(0x024BFA00), UINT32_C(0x057378C3), UINT32_C(0x0A92C578),
+ UINT32_C(0x07A6310B), UINT32_C(0x0F28F901), UINT32_C(0x04ED3F57),
+ UINT32_C(0x037C7D8A), UINT32_C(0x00B71701), UINT32_C(0x0173A01A),
+ UINT32_C(0x0A9B43A3), UINT32_C(0x0196E612), UINT32_C(0x07111189),
+ UINT32_C(0x03F5BC1D), UINT32_C(0x05154B49), UINT32_C(0x0DD68D97),
+ UINT32_C(0x0220CC1D), UINT32_C(0x0895DF59), UINT32_C(0x0014717C),
+ UINT32_C(0x0384CEF8) } },
+ { { UINT32_C(0x05F8022D), UINT32_C(0x07431A94), UINT32_C(0x0A7A9097),
+ UINT32_C(0x06FC555D), UINT32_C(0x0578029C), UINT32_C(0x00758DC8),
+ UINT32_C(0x00FDAF66), UINT32_C(0x0AE902D1), UINT32_C(0x06FDDF4D),
+ UINT32_C(0x056FCD2A), UINT32_C(0x0393CA27), UINT32_C(0x083EDDB9),
+ UINT32_C(0x071C8D5E), UINT32_C(0x02DA7EE1), UINT32_C(0x091B7578),
+ UINT32_C(0x022CF2B8), UINT32_C(0x08F559AF), UINT32_C(0x00F551D9),
+ UINT32_C(0x04CE7872) },
+ { UINT32_C(0x0450FD39), UINT32_C(0x05325A33), UINT32_C(0x06D04EAD),
+ UINT32_C(0x0111017F), UINT32_C(0x04B7D043), UINT32_C(0x009CD030),
+ UINT32_C(0x02760D24), UINT32_C(0x0B333C83), UINT32_C(0x0178F799),
+ UINT32_C(0x06E56E99), UINT32_C(0x06AC4002), UINT32_C(0x06C6F55C),
+ UINT32_C(0x04212C69), UINT32_C(0x0776C549), UINT32_C(0x05AD10F2),
+ UINT32_C(0x07D4C443), UINT32_C(0x093443A3), UINT32_C(0x01E4DAC4),
+ UINT32_C(0x062304F4) } },
+ { { UINT32_C(0x09FFF942), UINT32_C(0x039E7FBF), UINT32_C(0x0E4E0544),
+ UINT32_C(0x01C8EF03), UINT32_C(0x015953E4), UINT32_C(0x0641511A),
+ UINT32_C(0x0340D7DD), UINT32_C(0x04FBA207), UINT32_C(0x04DCD411),
+ UINT32_C(0x0CE5C435), UINT32_C(0x06C85A54), UINT32_C(0x0596F209),
+ UINT32_C(0x006C47CF), UINT32_C(0x039823F7), UINT32_C(0x01721D4C),
+ UINT32_C(0x03FE86B7), UINT32_C(0x044008FA), UINT32_C(0x05E107EC),
+ UINT32_C(0x0146DF75) },
+ { UINT32_C(0x03BF30CF), UINT32_C(0x034E0D17), UINT32_C(0x0C6EB8E1),
+ UINT32_C(0x016786DE), UINT32_C(0x0B4F8D94), UINT32_C(0x01E54C18),
+ UINT32_C(0x0409537F), UINT32_C(0x0AD69F59), UINT32_C(0x04423A96),
+ UINT32_C(0x01427559), UINT32_C(0x0517F981), UINT32_C(0x0C655FF1),
+ UINT32_C(0x072A4662), UINT32_C(0x014DB58F), UINT32_C(0x09979D6E),
+ UINT32_C(0x05396DDB), UINT32_C(0x03E46CF7), UINT32_C(0x062B9D62),
+ UINT32_C(0x0334D070) } },
+ { { UINT32_C(0x0C8B2AF6), UINT32_C(0x04C4030A), UINT32_C(0x03F4EA61),
+ UINT32_C(0x06B51CFD), UINT32_C(0x08530E96), UINT32_C(0x035106EB),
+ UINT32_C(0x07ACB7C9), UINT32_C(0x003FAA6D), UINT32_C(0x005AFE21),
+ UINT32_C(0x09C9266C), UINT32_C(0x02684731), UINT32_C(0x0745AC29),
+ UINT32_C(0x06162CD8), UINT32_C(0x069A0B95), UINT32_C(0x090B8391),
+ UINT32_C(0x0570D83A), UINT32_C(0x09AE0D06), UINT32_C(0x054A95B8),
+ UINT32_C(0x02CB380B) },
+ { UINT32_C(0x02779E4D), UINT32_C(0x04B32E43), UINT32_C(0x0C0582B0),
+ UINT32_C(0x03521F35), UINT32_C(0x089A8F39), UINT32_C(0x03BF1933),
+ UINT32_C(0x027659AD), UINT32_C(0x0607CE4F), UINT32_C(0x072A97A4),
+ UINT32_C(0x0F6C2DAD), UINT32_C(0x0648C496), UINT32_C(0x02D0AF23),
+ UINT32_C(0x036927AF), UINT32_C(0x032E9075), UINT32_C(0x01C0AD79),
+ UINT32_C(0x02044936), UINT32_C(0x0DBCFEA2), UINT32_C(0x07DADFF1),
+ UINT32_C(0x06EDBCF7) } },
+ { { UINT32_C(0x0209B80C), UINT32_C(0x01E54056), UINT32_C(0x0E397930),
+ UINT32_C(0x01AD9D0C), UINT32_C(0x0908F895), UINT32_C(0x02A9A26E),
+ UINT32_C(0x00744EB0), UINT32_C(0x0B2D7673), UINT32_C(0x00736623),
+ UINT32_C(0x0F9EEB98), UINT32_C(0x07E8C693), UINT32_C(0x05615D70),
+ UINT32_C(0x077E9858), UINT32_C(0x045C88B2), UINT32_C(0x06BA3291),
+ UINT32_C(0x02089363), UINT32_C(0x0D1148CA), UINT32_C(0x026B1CE4),
+ UINT32_C(0x0267E39A) },
+ { UINT32_C(0x0E9F76E1), UINT32_C(0x0700247A), UINT32_C(0x02F5C013),
+ UINT32_C(0x045D6B0B), UINT32_C(0x02398752), UINT32_C(0x011414B8),
+ UINT32_C(0x0189B0D8), UINT32_C(0x065621BE), UINT32_C(0x07214CB5),
+ UINT32_C(0x0C72745E), UINT32_C(0x026E830D), UINT32_C(0x0BB5064F),
+ UINT32_C(0x03BD6991), UINT32_C(0x067AABA6), UINT32_C(0x03AAD9C4),
+ UINT32_C(0x01C748B3), UINT32_C(0x0F2AD6A8), UINT32_C(0x07B1AAD0),
+ UINT32_C(0x0515A45B) } },
+ { { UINT32_C(0x0D45283F), UINT32_C(0x033F0C2B), UINT32_C(0x0EF7ECBA),
+ UINT32_C(0x03F31217), UINT32_C(0x0BF2BDDB), UINT32_C(0x05AE5F1D),
+ UINT32_C(0x015A33AE), UINT32_C(0x0B1D94AB), UINT32_C(0x00BB377A),
+ UINT32_C(0x077D4679), UINT32_C(0x056AF89C), UINT32_C(0x07165F99),
+ UINT32_C(0x046A17A3), UINT32_C(0x04CF6178), UINT32_C(0x00269B9B),
+ UINT32_C(0x03F1B9F6), UINT32_C(0x07453C34), UINT32_C(0x07253011),
+ UINT32_C(0x074559A2) },
+ { UINT32_C(0x08D82B0E), UINT32_C(0x00D12F5F), UINT32_C(0x01FD52F5),
+ UINT32_C(0x03C4069B), UINT32_C(0x0B01B2FE), UINT32_C(0x05E81250),
+ UINT32_C(0x035DC621), UINT32_C(0x034EA726), UINT32_C(0x04613127),
+ UINT32_C(0x0B36D680), UINT32_C(0x06F52BC5), UINT32_C(0x04B16171),
+ UINT32_C(0x02156292), UINT32_C(0x0180583E), UINT32_C(0x0C8D5B19),
+ UINT32_C(0x043B9BE2), UINT32_C(0x097EF032), UINT32_C(0x0307A273),
+ UINT32_C(0x02ECC50D) } },
+ { { UINT32_C(0x0613AC50), UINT32_C(0x01BBB9CD), UINT32_C(0x032CF181),
+ UINT32_C(0x04565F80), UINT32_C(0x09B00E52), UINT32_C(0x011EC5E2),
+ UINT32_C(0x05E7561C), UINT32_C(0x05B6572C), UINT32_C(0x072FBF3A),
+ UINT32_C(0x04311E38), UINT32_C(0x0350633E), UINT32_C(0x0C27E7E9),
+ UINT32_C(0x02DC82FC), UINT32_C(0x01DE746D), UINT32_C(0x078E3236),
+ UINT32_C(0x0712B6B0), UINT32_C(0x000A7E83), UINT32_C(0x0115CB1B),
+ UINT32_C(0x04C1103F) },
+ { UINT32_C(0x0359ED2E), UINT32_C(0x065ADF64), UINT32_C(0x025E3238),
+ UINT32_C(0x076BEAFD), UINT32_C(0x072427F7), UINT32_C(0x05DBCD55),
+ UINT32_C(0x07AB37FF), UINT32_C(0x0865BFD5), UINT32_C(0x04382D44),
+ UINT32_C(0x0F1D5580), UINT32_C(0x06D00533), UINT32_C(0x08D6A784),
+ UINT32_C(0x05BB29BF), UINT32_C(0x005CEC3F), UINT32_C(0x06575E68),
+ UINT32_C(0x053585D5), UINT32_C(0x0403BCB0), UINT32_C(0x02F77540),
+ UINT32_C(0x02470C7F) } },
+ { { UINT32_C(0x02C087ED), UINT32_C(0x07961B4B), UINT32_C(0x0F657FC0),
+ UINT32_C(0x00B16431), UINT32_C(0x01885C19), UINT32_C(0x029A3FB7),
+ UINT32_C(0x0721535D), UINT32_C(0x02FAD79C), UINT32_C(0x0596E385),
+ UINT32_C(0x02412161), UINT32_C(0x0289A97A), UINT32_C(0x01B54107),
+ UINT32_C(0x0271E7BB), UINT32_C(0x02E3D256), UINT32_C(0x07E3B820),
+ UINT32_C(0x07F5A8EE), UINT32_C(0x0C3BD541), UINT32_C(0x01BBC84D),
+ UINT32_C(0x02D55A46) },
+ { UINT32_C(0x006E7D53), UINT32_C(0x07982C04), UINT32_C(0x09C948A0),
+ UINT32_C(0x00A62A93), UINT32_C(0x047CD945), UINT32_C(0x060F1A2B),
+ UINT32_C(0x05764587), UINT32_C(0x02111992), UINT32_C(0x03CD3492),
+ UINT32_C(0x0E5873CA), UINT32_C(0x04871D26), UINT32_C(0x0EBDD263),
+ UINT32_C(0x07899288), UINT32_C(0x00105962), UINT32_C(0x07975B25),
+ UINT32_C(0x00D6A34D), UINT32_C(0x02DF3799), UINT32_C(0x02807307),
+ UINT32_C(0x06FCAC54) } },
+ { { UINT32_C(0x0302E505), UINT32_C(0x02CAC37A), UINT32_C(0x01A79721),
+ UINT32_C(0x03B2E74F), UINT32_C(0x0BE5B627), UINT32_C(0x019F58EA),
+ UINT32_C(0x03B18976), UINT32_C(0x0663CE37), UINT32_C(0x04C1003E),
+ UINT32_C(0x086DCC91), UINT32_C(0x0566BE13), UINT32_C(0x0A0C94D1),
+ UINT32_C(0x04A0F522), UINT32_C(0x01CBC165), UINT32_C(0x03D621C1),
+ UINT32_C(0x03F68C3D), UINT32_C(0x04156E0A), UINT32_C(0x04C1C807),
+ UINT32_C(0x002BF853) },
+ { UINT32_C(0x073938D8), UINT32_C(0x076E66F8), UINT32_C(0x0251205F),
+ UINT32_C(0x01B82A4E), UINT32_C(0x0C9EAC88), UINT32_C(0x0736DBEE),
+ UINT32_C(0x028732CD), UINT32_C(0x03522855), UINT32_C(0x0343EE5A),
+ UINT32_C(0x053E49A4), UINT32_C(0x025D55C0), UINT32_C(0x0D4096DF),
+ UINT32_C(0x01108518), UINT32_C(0x02AE724F), UINT32_C(0x07514106),
+ UINT32_C(0x0301EB15), UINT32_C(0x0D82C2DE), UINT32_C(0x05E3A585),
+ UINT32_C(0x036F14AF) } },
+ { { UINT32_C(0x07452267), UINT32_C(0x01E0D6D7), UINT32_C(0x04A4A896),
+ UINT32_C(0x06D1C7B5), UINT32_C(0x03C983EF), UINT32_C(0x017B4C4A),
+ UINT32_C(0x07C8F2FB), UINT32_C(0x078C2CCC), UINT32_C(0x0676C9A3),
+ UINT32_C(0x09CD585C), UINT32_C(0x0529FFB0), UINT32_C(0x020720BD),
+ UINT32_C(0x07B793B3), UINT32_C(0x07E65DA3), UINT32_C(0x0C89EDD5),
+ UINT32_C(0x04009C8D), UINT32_C(0x0EDC15A4), UINT32_C(0x077C8AC3),
+ UINT32_C(0x074868C1) },
+ { UINT32_C(0x0DBC2674), UINT32_C(0x07B6C41F), UINT32_C(0x0B10636B),
+ UINT32_C(0x0607B000), UINT32_C(0x01B2C3EF), UINT32_C(0x014283CF),
+ UINT32_C(0x07BD944A), UINT32_C(0x016DA691), UINT32_C(0x0147454E),
+ UINT32_C(0x052DE117), UINT32_C(0x06E5CDC4), UINT32_C(0x0C7BE891),
+ UINT32_C(0x03BD94DE), UINT32_C(0x00362FA3), UINT32_C(0x0608B5DA),
+ UINT32_C(0x000C28A8), UINT32_C(0x06CFAD2C), UINT32_C(0x0502E5EB),
+ UINT32_C(0x0081DDC6) } },
+ { { UINT32_C(0x0A2FCC67), UINT32_C(0x050EED2A), UINT32_C(0x0EAC3925),
+ UINT32_C(0x03CCFE3E), UINT32_C(0x0DC1F4E8), UINT32_C(0x012FD64C),
+ UINT32_C(0x02CFA2B3), UINT32_C(0x07921E80), UINT32_C(0x04F76E6D),
+ UINT32_C(0x090CBEA8), UINT32_C(0x00304ECF), UINT32_C(0x0933B9C8),
+ UINT32_C(0x01E92879), UINT32_C(0x062A922A), UINT32_C(0x03BEBB40),
+ UINT32_C(0x0475B5A4), UINT32_C(0x0AB9D3C2), UINT32_C(0x02845E4B),
+ UINT32_C(0x073D2AD6) },
+ { UINT32_C(0x026C197B), UINT32_C(0x060C44B9), UINT32_C(0x07D6B2DD),
+ UINT32_C(0x06E7D188), UINT32_C(0x03B672A1), UINT32_C(0x0277F32F),
+ UINT32_C(0x011D4198), UINT32_C(0x07C178F6), UINT32_C(0x02E95A84),
+ UINT32_C(0x005619C7), UINT32_C(0x029B73FC), UINT32_C(0x03CAC5E3),
+ UINT32_C(0x068A3B5E), UINT32_C(0x07C2DFA8), UINT32_C(0x00EC9903),
+ UINT32_C(0x07AEED34), UINT32_C(0x08C0A0D0), UINT32_C(0x02A2FF79),
+ UINT32_C(0x06DBE6B8) } },
+ },
+ {
+ { { UINT32_C(0x0C3D1383), UINT32_C(0x04E126EE), UINT32_C(0x0B631DA3),
+ UINT32_C(0x03014900), UINT32_C(0x0D3831FE), UINT32_C(0x01BF06C7),
+ UINT32_C(0x032CA284), UINT32_C(0x092E0CA0), UINT32_C(0x01703AE0),
+ UINT32_C(0x0DCB8158), UINT32_C(0x06FF316B), UINT32_C(0x0ED60D31),
+ UINT32_C(0x05DB467E), UINT32_C(0x01F3917A), UINT32_C(0x06770BD1),
+ UINT32_C(0x00A944AF), UINT32_C(0x08E2035D), UINT32_C(0x020A054F),
+ UINT32_C(0x035F8744) },
+ { UINT32_C(0x0A303000), UINT32_C(0x0029FD2C), UINT32_C(0x0A5D9AC4),
+ UINT32_C(0x06593596), UINT32_C(0x0288D9B1), UINT32_C(0x02B32376),
+ UINT32_C(0x067C4E0D), UINT32_C(0x0D1B984D), UINT32_C(0x04235BF5),
+ UINT32_C(0x001AA52B), UINT32_C(0x0221BA35), UINT32_C(0x0B74D0D3),
+ UINT32_C(0x03DDFA56), UINT32_C(0x004A6854), UINT32_C(0x01203660),
+ UINT32_C(0x0090027D), UINT32_C(0x02356607), UINT32_C(0x064E652F),
+ UINT32_C(0x01D4CBEB) } },
+ { { UINT32_C(0x05CFE5E0), UINT32_C(0x04C8937C), UINT32_C(0x084C1BC9),
+ UINT32_C(0x0651FCA6), UINT32_C(0x0BDAC076), UINT32_C(0x079DB07C),
+ UINT32_C(0x01988893), UINT32_C(0x0D8E1644), UINT32_C(0x04F7CFCD),
+ UINT32_C(0x05727E1E), UINT32_C(0x073F0B5C), UINT32_C(0x0D975E23),
+ UINT32_C(0x06001F51), UINT32_C(0x07B2218F), UINT32_C(0x07159FF4),
+ UINT32_C(0x02D8AF28), UINT32_C(0x0F0AFF67), UINT32_C(0x0464C014),
+ UINT32_C(0x005A1007) },
+ { UINT32_C(0x078A8DB5), UINT32_C(0x035A301E), UINT32_C(0x0E9F9693),
+ UINT32_C(0x07A8969A), UINT32_C(0x096A5ECF), UINT32_C(0x03467DDF),
+ UINT32_C(0x07AF13AA), UINT32_C(0x0BF17A6B), UINT32_C(0x00FBC9C7),
+ UINT32_C(0x002F3F21), UINT32_C(0x01610D30), UINT32_C(0x0A6FEF92),
+ UINT32_C(0x00334A31), UINT32_C(0x0619D424), UINT32_C(0x011832DC),
+ UINT32_C(0x04A2EBED), UINT32_C(0x092C4F4E), UINT32_C(0x03E72AFA),
+ UINT32_C(0x04555CAD) } },
+ { { UINT32_C(0x0E8401D3), UINT32_C(0x031A9337), UINT32_C(0x0A68B915),
+ UINT32_C(0x006E6E9B), UINT32_C(0x0B1B6E29), UINT32_C(0x01B7F14B),
+ UINT32_C(0x047E0BD8), UINT32_C(0x0A8CBD43), UINT32_C(0x024528C3),
+ UINT32_C(0x08CA88A7), UINT32_C(0x000A1FEE), UINT32_C(0x0F21E47C),
+ UINT32_C(0x07D1A248), UINT32_C(0x04BE0AD5), UINT32_C(0x071E2CED),
+ UINT32_C(0x025521CD), UINT32_C(0x0F41E897), UINT32_C(0x0398886C),
+ UINT32_C(0x04779FFD) },
+ { UINT32_C(0x0A828FA8), UINT32_C(0x017C8B2C), UINT32_C(0x0910B047),
+ UINT32_C(0x06160B77), UINT32_C(0x0B98B463), UINT32_C(0x07DF3373),
+ UINT32_C(0x0455763C), UINT32_C(0x0F1284BE), UINT32_C(0x00906AAE),
+ UINT32_C(0x01A75E0B), UINT32_C(0x07A6DA7C), UINT32_C(0x0FFCAFF1),
+ UINT32_C(0x050D6EE5), UINT32_C(0x024BD0BA), UINT32_C(0x08383A01),
+ UINT32_C(0x070AE8EA), UINT32_C(0x0CAA2B64), UINT32_C(0x06171B63),
+ UINT32_C(0x020CE9FD) } },
+ { { UINT32_C(0x0147F509), UINT32_C(0x0074A121), UINT32_C(0x0B1C1B8D),
+ UINT32_C(0x00A39076), UINT32_C(0x0E542208), UINT32_C(0x01A08FA4),
+ UINT32_C(0x012AA998), UINT32_C(0x0954BE0E), UINT32_C(0x05751A97),
+ UINT32_C(0x09EFE174), UINT32_C(0x05C09E0D), UINT32_C(0x0DEE1815),
+ UINT32_C(0x000B0415), UINT32_C(0x06D82BE5), UINT32_C(0x000E24A9),
+ UINT32_C(0x042F7FD4), UINT32_C(0x0698791D), UINT32_C(0x05A5F79E),
+ UINT32_C(0x0334C8D5) },
+ { UINT32_C(0x0BB690A0), UINT32_C(0x01835514), UINT32_C(0x031B4F26),
+ UINT32_C(0x023AC44F), UINT32_C(0x012CDCD1), UINT32_C(0x059AE369),
+ UINT32_C(0x0123A551), UINT32_C(0x0AEBA693), UINT32_C(0x07D984CD),
+ UINT32_C(0x0DAD9128), UINT32_C(0x0765643E), UINT32_C(0x0910F0F8),
+ UINT32_C(0x03FB31E2), UINT32_C(0x01BD811A), UINT32_C(0x059F6B39),
+ UINT32_C(0x049E6619), UINT32_C(0x06B63C96), UINT32_C(0x075166F7),
+ UINT32_C(0x025CA72B) } },
+ { { UINT32_C(0x055F34E4), UINT32_C(0x00BF08BF), UINT32_C(0x03730236),
+ UINT32_C(0x039543BD), UINT32_C(0x05C17F94), UINT32_C(0x00A5C65D),
+ UINT32_C(0x06121DA8), UINT32_C(0x099AC777), UINT32_C(0x02DCC3D6),
+ UINT32_C(0x09002059), UINT32_C(0x0460BBB3), UINT32_C(0x07A202D8),
+ UINT32_C(0x04C44EB5), UINT32_C(0x049D001E), UINT32_C(0x0E783DED),
+ UINT32_C(0x0120D789), UINT32_C(0x086FA177), UINT32_C(0x065D19BF),
+ UINT32_C(0x042CA8B7) },
+ { UINT32_C(0x02860379), UINT32_C(0x06375711), UINT32_C(0x078E9829),
+ UINT32_C(0x04F20A43), UINT32_C(0x0ADA67C4), UINT32_C(0x054101F4),
+ UINT32_C(0x0602943F), UINT32_C(0x03FD9150), UINT32_C(0x06B8D61B),
+ UINT32_C(0x06F5ADD6), UINT32_C(0x06EB2BAC), UINT32_C(0x0A07906A),
+ UINT32_C(0x0147EDC1), UINT32_C(0x0477D372), UINT32_C(0x0025B1CE),
+ UINT32_C(0x071B32CF), UINT32_C(0x0F40C9C6), UINT32_C(0x02483D0B),
+ UINT32_C(0x07A56FCD) } },
+ { { UINT32_C(0x0B1B724E), UINT32_C(0x0100B5C8), UINT32_C(0x081380B3),
+ UINT32_C(0x048D8711), UINT32_C(0x0E363740), UINT32_C(0x029ED59F),
+ UINT32_C(0x05E7819F), UINT32_C(0x02898DC3), UINT32_C(0x03621527),
+ UINT32_C(0x0F99DD5D), UINT32_C(0x01DF449E), UINT32_C(0x022C0763),
+ UINT32_C(0x04490568), UINT32_C(0x051A6A61), UINT32_C(0x0EE682C8),
+ UINT32_C(0x0315AB2B), UINT32_C(0x08BF8EC0), UINT32_C(0x0221F0BD),
+ UINT32_C(0x0034A2F5) },
+ { UINT32_C(0x0505A0E7), UINT32_C(0x031C759D), UINT32_C(0x006AE380),
+ UINT32_C(0x04AD9B4F), UINT32_C(0x0F850346), UINT32_C(0x0053B140),
+ UINT32_C(0x060AB23A), UINT32_C(0x021E3C52), UINT32_C(0x002B9A66),
+ UINT32_C(0x01646B7A), UINT32_C(0x03977D69), UINT32_C(0x02418634),
+ UINT32_C(0x05E2030C), UINT32_C(0x06F8DED9), UINT32_C(0x064302A0),
+ UINT32_C(0x0553D4B6), UINT32_C(0x0956D92B), UINT32_C(0x0537BD35),
+ UINT32_C(0x07AFABE7) } },
+ { { UINT32_C(0x04CB8040), UINT32_C(0x016D2E6C), UINT32_C(0x0DDE4688),
+ UINT32_C(0x00DF2559), UINT32_C(0x0A980125), UINT32_C(0x066A1AC7),
+ UINT32_C(0x07DF5C4B), UINT32_C(0x0FD3C659), UINT32_C(0x00481C65),
+ UINT32_C(0x0AE5A70F), UINT32_C(0x029F751C), UINT32_C(0x00B4A3D4),
+ UINT32_C(0x075575BC), UINT32_C(0x045CF25E), UINT32_C(0x06867A07),
+ UINT32_C(0x076D7354), UINT32_C(0x0861487C), UINT32_C(0x017CEA2E),
+ UINT32_C(0x03228414) },
+ { UINT32_C(0x026AE111), UINT32_C(0x038FA015), UINT32_C(0x060716CA),
+ UINT32_C(0x04976285), UINT32_C(0x059BC9DE), UINT32_C(0x043BF937),
+ UINT32_C(0x035F13A1), UINT32_C(0x0F8D8888), UINT32_C(0x06D5E9F8),
+ UINT32_C(0x08616DB1), UINT32_C(0x032C0CBB), UINT32_C(0x0AA3299C),
+ UINT32_C(0x03F194B4), UINT32_C(0x00D0F72D), UINT32_C(0x0B3FCCBD),
+ UINT32_C(0x02803044), UINT32_C(0x0A08E3C3), UINT32_C(0x037A0997),
+ UINT32_C(0x05DC3B19) } },
+ { { UINT32_C(0x085193F0), UINT32_C(0x019978F4), UINT32_C(0x0BF0C234),
+ UINT32_C(0x04F7BBC1), UINT32_C(0x0722B6D6), UINT32_C(0x013DCEE7),
+ UINT32_C(0x05D575CD), UINT32_C(0x0779F809), UINT32_C(0x06335183),
+ UINT32_C(0x0DCC718C), UINT32_C(0x02D1E7DB), UINT32_C(0x0F6A6D57),
+ UINT32_C(0x065A96BF), UINT32_C(0x065930E7), UINT32_C(0x039B793F),
+ UINT32_C(0x06A9BA2E), UINT32_C(0x0C033596), UINT32_C(0x01BE1126),
+ UINT32_C(0x03EA93B8) },
+ { UINT32_C(0x03161177), UINT32_C(0x002665D5), UINT32_C(0x017B69C9),
+ UINT32_C(0x07892DD4), UINT32_C(0x0F6F8ECB), UINT32_C(0x0576AF37),
+ UINT32_C(0x03C1E515), UINT32_C(0x05A60E50), UINT32_C(0x02549873),
+ UINT32_C(0x09B3D920), UINT32_C(0x029DA082), UINT32_C(0x009DAE44),
+ UINT32_C(0x0197C8E7), UINT32_C(0x0154A33B), UINT32_C(0x097B3971),
+ UINT32_C(0x023C0423), UINT32_C(0x02B8C68C), UINT32_C(0x04DCA653),
+ UINT32_C(0x00079A0F) } },
+ { { UINT32_C(0x063E2975), UINT32_C(0x06BEC9ED), UINT32_C(0x0B38790C),
+ UINT32_C(0x022D87D1), UINT32_C(0x0EA228A4), UINT32_C(0x010DBA9F),
+ UINT32_C(0x015868D8), UINT32_C(0x080C5E0D), UINT32_C(0x075196CF),
+ UINT32_C(0x0A3AFD7E), UINT32_C(0x031A6E14), UINT32_C(0x0E7A5374),
+ UINT32_C(0x067A8FE5), UINT32_C(0x06ECEB0D), UINT32_C(0x0B84F9C7),
+ UINT32_C(0x0680604D), UINT32_C(0x072314F9), UINT32_C(0x03A2F4B2),
+ UINT32_C(0x06C5081F) },
+ { UINT32_C(0x0B981980), UINT32_C(0x0349CBF0), UINT32_C(0x072972B5),
+ UINT32_C(0x02885527), UINT32_C(0x0150CDBD), UINT32_C(0x07F178E3),
+ UINT32_C(0x032B4111), UINT32_C(0x0B2B4EF6), UINT32_C(0x000F21B3),
+ UINT32_C(0x039D39FF), UINT32_C(0x07E2383D), UINT32_C(0x0F91A9DF),
+ UINT32_C(0x000BF2A4), UINT32_C(0x003EA686), UINT32_C(0x06E3C109),
+ UINT32_C(0x05D771D7), UINT32_C(0x03336F2A), UINT32_C(0x00A9A15C),
+ UINT32_C(0x0310BC8B) } },
+ { { UINT32_C(0x082B5AA4), UINT32_C(0x04A7240C), UINT32_C(0x00ABF375),
+ UINT32_C(0x07E33DEB), UINT32_C(0x01BD8789), UINT32_C(0x06BA83A6),
+ UINT32_C(0x05A6491B), UINT32_C(0x04DB69BD), UINT32_C(0x010D6A55),
+ UINT32_C(0x0D5DAFA1), UINT32_C(0x06C7F999), UINT32_C(0x0185AD3E),
+ UINT32_C(0x027EAEB5), UINT32_C(0x006644C8), UINT32_C(0x0B9709E1),
+ UINT32_C(0x07676CF0), UINT32_C(0x0508273E), UINT32_C(0x054D3FBB),
+ UINT32_C(0x063EFA4A) },
+ { UINT32_C(0x010AA767), UINT32_C(0x01CC5A04), UINT32_C(0x0BE5B1B3),
+ UINT32_C(0x06950FCE), UINT32_C(0x0E94E6DB), UINT32_C(0x0497BB17),
+ UINT32_C(0x00CC06B4), UINT32_C(0x08846F32), UINT32_C(0x0314DC3B),
+ UINT32_C(0x0BA27736), UINT32_C(0x0432450D), UINT32_C(0x04925C53),
+ UINT32_C(0x03119EE1), UINT32_C(0x04A66669), UINT32_C(0x05FBA305),
+ UINT32_C(0x033D4900), UINT32_C(0x0FE789AF), UINT32_C(0x0671EF4B),
+ UINT32_C(0x0259D6DF) } },
+ { { UINT32_C(0x05C529C4), UINT32_C(0x04097FDD), UINT32_C(0x0296486E),
+ UINT32_C(0x05D5E29C), UINT32_C(0x0B3FABA2), UINT32_C(0x0695126C),
+ UINT32_C(0x0312362F), UINT32_C(0x08DC4B4B), UINT32_C(0x0413884F),
+ UINT32_C(0x067DDD33), UINT32_C(0x055DBD8F), UINT32_C(0x07D0B9CB),
+ UINT32_C(0x01BE7C35), UINT32_C(0x043BC43D), UINT32_C(0x00E5A19E),
+ UINT32_C(0x017725FC), UINT32_C(0x006A669F), UINT32_C(0x063FD379),
+ UINT32_C(0x0682F5E5) },
+ { UINT32_C(0x0035FA1B), UINT32_C(0x0302079C), UINT32_C(0x0A397CF2),
+ UINT32_C(0x02A9E0EB), UINT32_C(0x0183E8FA), UINT32_C(0x00950C41),
+ UINT32_C(0x05ACFED2), UINT32_C(0x0B8DC827), UINT32_C(0x0004B05C),
+ UINT32_C(0x0ECD486A), UINT32_C(0x04FBAB30), UINT32_C(0x0A2FE908),
+ UINT32_C(0x05C95F6D), UINT32_C(0x06B30876), UINT32_C(0x0F3D7A8A),
+ UINT32_C(0x0734E57D), UINT32_C(0x0410C523), UINT32_C(0x057AD388),
+ UINT32_C(0x073AF161) } },
+ { { UINT32_C(0x033E8718), UINT32_C(0x05E156C6), UINT32_C(0x0188F2D0),
+ UINT32_C(0x07B490F4), UINT32_C(0x0D1D9936), UINT32_C(0x045ACF91),
+ UINT32_C(0x05EADE92), UINT32_C(0x09204996), UINT32_C(0x03FB05AD),
+ UINT32_C(0x0952B30E), UINT32_C(0x066E8B73), UINT32_C(0x02E38706),
+ UINT32_C(0x06AD215A), UINT32_C(0x05770FF2), UINT32_C(0x0CCC64AA),
+ UINT32_C(0x00A77560), UINT32_C(0x084A4A57), UINT32_C(0x07428950),
+ UINT32_C(0x007783FF) },
+ { UINT32_C(0x07864A53), UINT32_C(0x02B0B04D), UINT32_C(0x0CE9B903),
+ UINT32_C(0x032C4DB9), UINT32_C(0x0ED34B7B), UINT32_C(0x02B9BB80),
+ UINT32_C(0x0107A7A1), UINT32_C(0x0133502C), UINT32_C(0x06939D9B),
+ UINT32_C(0x07AE6A42), UINT32_C(0x01C55CB0), UINT32_C(0x0A087059),
+ UINT32_C(0x011E8069), UINT32_C(0x02AC5D81), UINT32_C(0x0FF470E4),
+ UINT32_C(0x068D4B88), UINT32_C(0x03B934D1), UINT32_C(0x01E86F4D),
+ UINT32_C(0x00286D40) } },
+ { { UINT32_C(0x0A097CC4), UINT32_C(0x07C93D92), UINT32_C(0x03638A82),
+ UINT32_C(0x05D44662), UINT32_C(0x034F8801), UINT32_C(0x01E1B0E9),
+ UINT32_C(0x03132ED7), UINT32_C(0x0D61A771), UINT32_C(0x0777FA2F),
+ UINT32_C(0x0E4D4244), UINT32_C(0x02CDDCA4), UINT32_C(0x01988721),
+ UINT32_C(0x0694972F), UINT32_C(0x02AA2585), UINT32_C(0x06A552DD),
+ UINT32_C(0x02719251), UINT32_C(0x0C4FD604), UINT32_C(0x033FC4DD),
+ UINT32_C(0x02A49BC5) },
+ { UINT32_C(0x0ECC32F4), UINT32_C(0x03998CBA), UINT32_C(0x0E555140),
+ UINT32_C(0x06BE70C6), UINT32_C(0x02ECE0DB), UINT32_C(0x07D7EE62),
+ UINT32_C(0x006B8450), UINT32_C(0x0C677BF6), UINT32_C(0x0065EEBA),
+ UINT32_C(0x0C8F791B), UINT32_C(0x05880489), UINT32_C(0x07724E1B),
+ UINT32_C(0x00C43815), UINT32_C(0x079C7129), UINT32_C(0x0AC7BD8B),
+ UINT32_C(0x00B35A76), UINT32_C(0x0E62F127), UINT32_C(0x06892912),
+ UINT32_C(0x069DE730) } },
+ { { UINT32_C(0x0D176E2E), UINT32_C(0x04BD43B7), UINT32_C(0x0843A348),
+ UINT32_C(0x0749D5C1), UINT32_C(0x0ED9CC05), UINT32_C(0x00305C32),
+ UINT32_C(0x037CC7F4), UINT32_C(0x03DF22FB), UINT32_C(0x05799B29),
+ UINT32_C(0x0BAA8556), UINT32_C(0x01B9550B), UINT32_C(0x0B71D97D),
+ UINT32_C(0x071866D2), UINT32_C(0x042A76ED), UINT32_C(0x0CF558E6),
+ UINT32_C(0x05C52446), UINT32_C(0x0E80A5C3), UINT32_C(0x0732DC8B),
+ UINT32_C(0x05430293) },
+ { UINT32_C(0x08A05AA1), UINT32_C(0x060E94EA), UINT32_C(0x0495DB83),
+ UINT32_C(0x07F23E7E), UINT32_C(0x09BABC6A), UINT32_C(0x07B134F3),
+ UINT32_C(0x02C60301), UINT32_C(0x0C76C75A), UINT32_C(0x0496E91D),
+ UINT32_C(0x0354A538), UINT32_C(0x03F832DB), UINT32_C(0x03139812),
+ UINT32_C(0x028BB56E), UINT32_C(0x06BC315A), UINT32_C(0x08F87E08),
+ UINT32_C(0x04EB9933), UINT32_C(0x0D94A083), UINT32_C(0x00F1E782),
+ UINT32_C(0x00039DA7) } },
+ { { UINT32_C(0x0F46E9D5), UINT32_C(0x04AFDE7F), UINT32_C(0x02DD9156),
+ UINT32_C(0x03A43A4A), UINT32_C(0x0334CF91), UINT32_C(0x06B820D5),
+ UINT32_C(0x02AB098A), UINT32_C(0x010407F3), UINT32_C(0x06E15825),
+ UINT32_C(0x0DE19BBC), UINT32_C(0x05C155A7), UINT32_C(0x098AB480),
+ UINT32_C(0x027F0A26), UINT32_C(0x001E493A), UINT32_C(0x0D3BF154),
+ UINT32_C(0x0022BB7B), UINT32_C(0x092F7F8A), UINT32_C(0x025E06B0),
+ UINT32_C(0x0214EC84) },
+ { UINT32_C(0x0E367447), UINT32_C(0x07A76C60), UINT32_C(0x0E7F25B2),
+ UINT32_C(0x061DC274), UINT32_C(0x08037471), UINT32_C(0x0601CC83),
+ UINT32_C(0x077C01C1), UINT32_C(0x0BD797B8), UINT32_C(0x07A2D854),
+ UINT32_C(0x0F539925), UINT32_C(0x00056A50), UINT32_C(0x0F52ABBB),
+ UINT32_C(0x01C407C4), UINT32_C(0x046E3EC8), UINT32_C(0x08C6B255),
+ UINT32_C(0x06BB4D5F), UINT32_C(0x09336DFF), UINT32_C(0x00D914F1),
+ UINT32_C(0x01F9DBAA) } },
+ { { UINT32_C(0x0D831A04), UINT32_C(0x05A97D33), UINT32_C(0x0906D401),
+ UINT32_C(0x01E543D5), UINT32_C(0x063B64A7), UINT32_C(0x01DF1F04),
+ UINT32_C(0x07BEAE26), UINT32_C(0x0C4C51CE), UINT32_C(0x071253E1),
+ UINT32_C(0x07C5C1BC), UINT32_C(0x0686EDD8), UINT32_C(0x0EADB491),
+ UINT32_C(0x06FCC7E8), UINT32_C(0x04DC895B), UINT32_C(0x0DA99CB1),
+ UINT32_C(0x07538043), UINT32_C(0x0DCCD221), UINT32_C(0x05338542),
+ UINT32_C(0x0263F3E2) },
+ { UINT32_C(0x049B2FC3), UINT32_C(0x00D9571D), UINT32_C(0x09A6B74E),
+ UINT32_C(0x013E9069), UINT32_C(0x0C142061), UINT32_C(0x0661D5AE),
+ UINT32_C(0x078F1467), UINT32_C(0x0568D3A9), UINT32_C(0x02729AA5),
+ UINT32_C(0x0749905F), UINT32_C(0x02491337), UINT32_C(0x0A8EED74),
+ UINT32_C(0x070FB80C), UINT32_C(0x066BA15B), UINT32_C(0x087A7668),
+ UINT32_C(0x03342CBD), UINT32_C(0x0FCD50D2), UINT32_C(0x017CF7F9),
+ UINT32_C(0x05DA6EDD) } },
+ },
+ {
+ { { UINT32_C(0x08ECE594), UINT32_C(0x02E6D7AF), UINT32_C(0x0160833B),
+ UINT32_C(0x05E9199C), UINT32_C(0x05C1EB44), UINT32_C(0x01F9CDD2),
+ UINT32_C(0x04ECBF7E), UINT32_C(0x011F5E2E), UINT32_C(0x00B16683),
+ UINT32_C(0x082C80F7), UINT32_C(0x04F6D76E), UINT32_C(0x0A9035A2),
+ UINT32_C(0x02A6F996), UINT32_C(0x07CF51EF), UINT32_C(0x011C78A5),
+ UINT32_C(0x03E6811A), UINT32_C(0x0DCCBD54), UINT32_C(0x029CA158),
+ UINT32_C(0x0188556B) },
+ { UINT32_C(0x0EBBFAD7), UINT32_C(0x036D4FEF), UINT32_C(0x0DAD8CB2),
+ UINT32_C(0x024C5461), UINT32_C(0x09F7253C), UINT32_C(0x052C8206),
+ UINT32_C(0x03009FD7), UINT32_C(0x05A4E883), UINT32_C(0x04FFDBF8),
+ UINT32_C(0x07B5A2D0), UINT32_C(0x0487033B), UINT32_C(0x003EABFC),
+ UINT32_C(0x0107E479), UINT32_C(0x0479A422), UINT32_C(0x0ECEA707),
+ UINT32_C(0x05D06F61), UINT32_C(0x05BD0428), UINT32_C(0x01301D97),
+ UINT32_C(0x0137ADE9) } },
+ { { UINT32_C(0x008164D4), UINT32_C(0x02998A00), UINT32_C(0x0E9FE1D5),
+ UINT32_C(0x05B9A827), UINT32_C(0x0AA45754), UINT32_C(0x06793FDD),
+ UINT32_C(0x01D8C060), UINT32_C(0x030ECBF4), UINT32_C(0x01FDC34C),
+ UINT32_C(0x0FA8650F), UINT32_C(0x0739AA31), UINT32_C(0x0905FB0D),
+ UINT32_C(0x04B98585), UINT32_C(0x04528DD9), UINT32_C(0x0582E0E8),
+ UINT32_C(0x0685885D), UINT32_C(0x008F4125), UINT32_C(0x02A15C01),
+ UINT32_C(0x023D540D) },
+ { UINT32_C(0x039B003C), UINT32_C(0x074C5CC0), UINT32_C(0x029B2FBB),
+ UINT32_C(0x07F27890), UINT32_C(0x0C083234), UINT32_C(0x054081D7),
+ UINT32_C(0x0109E54D), UINT32_C(0x08920F8E), UINT32_C(0x07D87B98),
+ UINT32_C(0x07E36E68), UINT32_C(0x023912DB), UINT32_C(0x071A5BBC),
+ UINT32_C(0x0733E49F), UINT32_C(0x058495D0), UINT32_C(0x0068F694),
+ UINT32_C(0x012DCC7D), UINT32_C(0x0DC88ED4), UINT32_C(0x06D1A2D4),
+ UINT32_C(0x02BBA636) } },
+ { { UINT32_C(0x0B78796F), UINT32_C(0x0335FA8E), UINT32_C(0x0243FD16),
+ UINT32_C(0x03C6B319), UINT32_C(0x01CD8CA8), UINT32_C(0x0704FAEE),
+ UINT32_C(0x04540F1E), UINT32_C(0x092AC9A2), UINT32_C(0x020A1CA3),
+ UINT32_C(0x023FC6DD), UINT32_C(0x01EFAF42), UINT32_C(0x00BC4AB2),
+ UINT32_C(0x0206DD26), UINT32_C(0x07400CF2), UINT32_C(0x072BD012),
+ UINT32_C(0x00840AB3), UINT32_C(0x016D752E), UINT32_C(0x00CEF006),
+ UINT32_C(0x0647C23D) },
+ { UINT32_C(0x0F6CA70B), UINT32_C(0x05AFF85F), UINT32_C(0x031691E3),
+ UINT32_C(0x01063899), UINT32_C(0x02420E8C), UINT32_C(0x03D2D13C),
+ UINT32_C(0x059E8A01), UINT32_C(0x0FC5FC43), UINT32_C(0x042A852F),
+ UINT32_C(0x06446FD4), UINT32_C(0x0341CB5B), UINT32_C(0x044193ED),
+ UINT32_C(0x073BE475), UINT32_C(0x051FCBEA), UINT32_C(0x00D6D405),
+ UINT32_C(0x00A0026F), UINT32_C(0x09A09555), UINT32_C(0x0037DFDB),
+ UINT32_C(0x0186A76D) } },
+ { { UINT32_C(0x06762E69), UINT32_C(0x05E586F2), UINT32_C(0x08A5D295),
+ UINT32_C(0x021AEB8A), UINT32_C(0x0D8E9356), UINT32_C(0x05E8F45E),
+ UINT32_C(0x04336CB6), UINT32_C(0x04373909), UINT32_C(0x020299B5),
+ UINT32_C(0x013EB290), UINT32_C(0x061E0E31), UINT32_C(0x07167125),
+ UINT32_C(0x01291CE5), UINT32_C(0x05F204F5), UINT32_C(0x060A0EA2),
+ UINT32_C(0x0414B179), UINT32_C(0x064F6F43), UINT32_C(0x0114060E),
+ UINT32_C(0x040928CF) },
+ { UINT32_C(0x0B54A6C6), UINT32_C(0x010FE7C2), UINT32_C(0x0FDA19CB),
+ UINT32_C(0x056B791E), UINT32_C(0x049ED286), UINT32_C(0x02401472),
+ UINT32_C(0x048F8CD1), UINT32_C(0x0EAC2400), UINT32_C(0x075D6078),
+ UINT32_C(0x0EAAD7B3), UINT32_C(0x051EDE19), UINT32_C(0x0D7E6F09),
+ UINT32_C(0x001044A9), UINT32_C(0x0411E3BA), UINT32_C(0x0D3647C4),
+ UINT32_C(0x00168497), UINT32_C(0x08BA1235), UINT32_C(0x01C93676),
+ UINT32_C(0x01411BDC) } },
+ { { UINT32_C(0x07F5FEA0), UINT32_C(0x068F1494), UINT32_C(0x0CF3659A),
+ UINT32_C(0x034F4CD5), UINT32_C(0x08840E07), UINT32_C(0x01463227),
+ UINT32_C(0x02CE4099), UINT32_C(0x00306A1A), UINT32_C(0x043276DA),
+ UINT32_C(0x0C0A79A8), UINT32_C(0x045485DA), UINT32_C(0x0D43B7E5),
+ UINT32_C(0x0245D30D), UINT32_C(0x07040ECA), UINT32_C(0x0F0944E2),
+ UINT32_C(0x02FAB448), UINT32_C(0x0A3418D6), UINT32_C(0x00AEEE32),
+ UINT32_C(0x054B0477) },
+ { UINT32_C(0x002E1A49), UINT32_C(0x02417738), UINT32_C(0x003FC230),
+ UINT32_C(0x057B81BC), UINT32_C(0x09252F9B), UINT32_C(0x071E923E),
+ UINT32_C(0x07556FE9), UINT32_C(0x0405C043), UINT32_C(0x05F4A479),
+ UINT32_C(0x00AE6EBC), UINT32_C(0x0470CEA9), UINT32_C(0x043EFE7F),
+ UINT32_C(0x032F779B), UINT32_C(0x05D5E4C1), UINT32_C(0x0F412FF3),
+ UINT32_C(0x029E0A95), UINT32_C(0x027FF900), UINT32_C(0x0639C4FE),
+ UINT32_C(0x05496FF2) } },
+ { { UINT32_C(0x093A81E5), UINT32_C(0x06552EA0), UINT32_C(0x076C940F),
+ UINT32_C(0x04D9EBF4), UINT32_C(0x07435E68), UINT32_C(0x00026B20),
+ UINT32_C(0x022F07A1), UINT32_C(0x0D1152A6), UINT32_C(0x01605EB4),
+ UINT32_C(0x021ED2B3), UINT32_C(0x0416BC52), UINT32_C(0x0F03BB25),
+ UINT32_C(0x032FD879), UINT32_C(0x0224E24D), UINT32_C(0x0227BC06),
+ UINT32_C(0x07E18BB7), UINT32_C(0x0846E10C), UINT32_C(0x025383D2),
+ UINT32_C(0x0716FE98) },
+ { UINT32_C(0x048353E7), UINT32_C(0x06A51D17), UINT32_C(0x0602B7B4),
+ UINT32_C(0x00A3A912), UINT32_C(0x00D41798), UINT32_C(0x009BAAA2),
+ UINT32_C(0x014F6863), UINT32_C(0x0B8C9E0C), UINT32_C(0x004E89E7),
+ UINT32_C(0x01EA2B4D), UINT32_C(0x069FE41B), UINT32_C(0x0E23CD44),
+ UINT32_C(0x0284C3F8), UINT32_C(0x0709633E), UINT32_C(0x00EC122E),
+ UINT32_C(0x054C3546), UINT32_C(0x0274CE48), UINT32_C(0x0562858C),
+ UINT32_C(0x00845131) } },
+ { { UINT32_C(0x093C77DA), UINT32_C(0x01D351AD), UINT32_C(0x023A3C02),
+ UINT32_C(0x050A84F5), UINT32_C(0x0D2278BA), UINT32_C(0x0166F47B),
+ UINT32_C(0x010E24C3), UINT32_C(0x0171F355), UINT32_C(0x070D70CC),
+ UINT32_C(0x0F04C14A), UINT32_C(0x0675CE80), UINT32_C(0x03C92277),
+ UINT32_C(0x027C5314), UINT32_C(0x0475432E), UINT32_C(0x0A42C984),
+ UINT32_C(0x021A86BA), UINT32_C(0x09667047), UINT32_C(0x0162D620),
+ UINT32_C(0x05CE1F5E) },
+ { UINT32_C(0x0541016D), UINT32_C(0x04AA27AD), UINT32_C(0x024272A0),
+ UINT32_C(0x0124A937), UINT32_C(0x04022798), UINT32_C(0x04C4908F),
+ UINT32_C(0x078D2755), UINT32_C(0x05FC4690), UINT32_C(0x03D49867),
+ UINT32_C(0x0D0542ED), UINT32_C(0x014AC0C6), UINT32_C(0x0444F4AA),
+ UINT32_C(0x0527B53A), UINT32_C(0x04E463E4), UINT32_C(0x084795B9),
+ UINT32_C(0x06190D53), UINT32_C(0x01F0982A), UINT32_C(0x06C19AFA),
+ UINT32_C(0x02B40A43) } },
+ { { UINT32_C(0x0D526DD9), UINT32_C(0x02D2A436), UINT32_C(0x06CBC632),
+ UINT32_C(0x06A016EB), UINT32_C(0x0229215C), UINT32_C(0x063A186E),
+ UINT32_C(0x056A2652), UINT32_C(0x0982D8F0), UINT32_C(0x04950B55),
+ UINT32_C(0x0C34A068), UINT32_C(0x036F958C), UINT32_C(0x0EC7C304),
+ UINT32_C(0x00685912), UINT32_C(0x00521605), UINT32_C(0x074386C5),
+ UINT32_C(0x06C5C880), UINT32_C(0x01D5C0E0), UINT32_C(0x0321B5FC),
+ UINT32_C(0x031F89D8) },
+ { UINT32_C(0x0E4F4EFB), UINT32_C(0x042EF02C), UINT32_C(0x0747294D),
+ UINT32_C(0x06315147), UINT32_C(0x09826B36), UINT32_C(0x044F7A99),
+ UINT32_C(0x00DA6A3B), UINT32_C(0x0B192C6C), UINT32_C(0x017D9CD6),
+ UINT32_C(0x07D0FC8D), UINT32_C(0x00306186), UINT32_C(0x0DA5FD2C),
+ UINT32_C(0x048EA8B6), UINT32_C(0x041BED38), UINT32_C(0x028A7681),
+ UINT32_C(0x0444E09E), UINT32_C(0x07A1C182), UINT32_C(0x06CEB6B8),
+ UINT32_C(0x0402E972) } },
+ { { UINT32_C(0x0A37CD61), UINT32_C(0x07A90498), UINT32_C(0x03236B70),
+ UINT32_C(0x010D1CA8), UINT32_C(0x0C8EE94C), UINT32_C(0x01332402),
+ UINT32_C(0x00D01671), UINT32_C(0x0D20BD0A), UINT32_C(0x04F8905D),
+ UINT32_C(0x0CB75503), UINT32_C(0x07C71184), UINT32_C(0x04D224FF),
+ UINT32_C(0x05EF5D3B), UINT32_C(0x02D2D84B), UINT32_C(0x0776D6B8),
+ UINT32_C(0x01B04C47), UINT32_C(0x0C6883AD), UINT32_C(0x041BC984),
+ UINT32_C(0x0738830F) },
+ { UINT32_C(0x008A7408), UINT32_C(0x01833053), UINT32_C(0x0DCDED77),
+ UINT32_C(0x0660E3CD), UINT32_C(0x003541F4), UINT32_C(0x06650324),
+ UINT32_C(0x056D1103), UINT32_C(0x012DDC16), UINT32_C(0x04858446),
+ UINT32_C(0x031BD98F), UINT32_C(0x07EA97C0), UINT32_C(0x033EA10E),
+ UINT32_C(0x07E40598), UINT32_C(0x03935067), UINT32_C(0x06BD3C58),
+ UINT32_C(0x0709A382), UINT32_C(0x0FFD62B5), UINT32_C(0x03ACA64E),
+ UINT32_C(0x02BDB05C) } },
+ { { UINT32_C(0x019DDB66), UINT32_C(0x0151276D), UINT32_C(0x0D169D42),
+ UINT32_C(0x07424F74), UINT32_C(0x0073574B), UINT32_C(0x029D6033),
+ UINT32_C(0x04805B63), UINT32_C(0x0FF3CCB8), UINT32_C(0x0657BEB9),
+ UINT32_C(0x06710C8D), UINT32_C(0x076A0EFE), UINT32_C(0x05FFC38A),
+ UINT32_C(0x039B2127), UINT32_C(0x04A7D60B), UINT32_C(0x0D352201),
+ UINT32_C(0x0459932F), UINT32_C(0x0A56306E), UINT32_C(0x05D63C8E),
+ UINT32_C(0x01727D3E) },
+ { UINT32_C(0x0A228C02), UINT32_C(0x0454E2FD), UINT32_C(0x0C5CF406),
+ UINT32_C(0x072A6748), UINT32_C(0x09478B3C), UINT32_C(0x01C032C4),
+ UINT32_C(0x024B1CF3), UINT32_C(0x07BCB89A), UINT32_C(0x017F8136),
+ UINT32_C(0x03BFA207), UINT32_C(0x0032CE35), UINT32_C(0x01301C08),
+ UINT32_C(0x01F1D68E), UINT32_C(0x024447E0), UINT32_C(0x00655D3F),
+ UINT32_C(0x04B5B6DB), UINT32_C(0x08F50A61), UINT32_C(0x07FE19DA),
+ UINT32_C(0x01906979) } },
+ { { UINT32_C(0x04E80EB1), UINT32_C(0x052DB749), UINT32_C(0x0FA876FF),
+ UINT32_C(0x014D563E), UINT32_C(0x0DD8DCB4), UINT32_C(0x06D08CF5),
+ UINT32_C(0x0088B6C9), UINT32_C(0x099DAF2C), UINT32_C(0x06ADE3E9),
+ UINT32_C(0x05F27F40), UINT32_C(0x076292C5), UINT32_C(0x02149C44),
+ UINT32_C(0x04ECED26), UINT32_C(0x04016166), UINT32_C(0x0E8DD0F0),
+ UINT32_C(0x02703366), UINT32_C(0x09A4D3F8), UINT32_C(0x000C4924),
+ UINT32_C(0x066F3B89) },
+ { UINT32_C(0x00F92986), UINT32_C(0x001B8CB3), UINT32_C(0x0C27E556),
+ UINT32_C(0x05EAB0C7), UINT32_C(0x0A95BBEF), UINT32_C(0x011331B7),
+ UINT32_C(0x03245504), UINT32_C(0x0B108EBA), UINT32_C(0x0704FE66),
+ UINT32_C(0x0AEECF39), UINT32_C(0x0485E096), UINT32_C(0x0D5B3E1E),
+ UINT32_C(0x02DB3A00), UINT32_C(0x06FBA80E), UINT32_C(0x0AEE0EA5),
+ UINT32_C(0x064273CE), UINT32_C(0x0CD775D3), UINT32_C(0x00232462),
+ UINT32_C(0x0347DCE7) } },
+ { { UINT32_C(0x029AE558), UINT32_C(0x07BED198), UINT32_C(0x073802BF),
+ UINT32_C(0x0528429C), UINT32_C(0x02A79F18), UINT32_C(0x045BFA11),
+ UINT32_C(0x07B77865), UINT32_C(0x065D4D35), UINT32_C(0x03701A97),
+ UINT32_C(0x03C87FB5), UINT32_C(0x07338AED), UINT32_C(0x0260F0C6),
+ UINT32_C(0x032E371B), UINT32_C(0x048EAB15), UINT32_C(0x06488CED),
+ UINT32_C(0x04349BDC), UINT32_C(0x09FF872F), UINT32_C(0x01EBC954),
+ UINT32_C(0x02644425) },
+ { UINT32_C(0x0AAD22D1), UINT32_C(0x04DA634D), UINT32_C(0x0931B0A2),
+ UINT32_C(0x0366BA6D), UINT32_C(0x0A03F852), UINT32_C(0x003C4DA2),
+ UINT32_C(0x07BDDE59), UINT32_C(0x00543C06), UINT32_C(0x05EA4710),
+ UINT32_C(0x0622BACC), UINT32_C(0x03C86D6F), UINT32_C(0x0810EAB1),
+ UINT32_C(0x0128E64D), UINT32_C(0x02C5B6EF), UINT32_C(0x0F37432C),
+ UINT32_C(0x0391A4CD), UINT32_C(0x09344B8B), UINT32_C(0x007DDA34),
+ UINT32_C(0x02408EDC) } },
+ { { UINT32_C(0x0EB8B398), UINT32_C(0x068DF986), UINT32_C(0x0BCADF8A),
+ UINT32_C(0x01829A9B), UINT32_C(0x017C9B77), UINT32_C(0x0446621A),
+ UINT32_C(0x026EE0C4), UINT32_C(0x0E0FE9B2), UINT32_C(0x0528FE1C),
+ UINT32_C(0x08E6DD5A), UINT32_C(0x018FB2E0), UINT32_C(0x0FD2A7AB),
+ UINT32_C(0x002E71A2), UINT32_C(0x069C2EFB), UINT32_C(0x0156F759),
+ UINT32_C(0x04F3A78E), UINT32_C(0x022C4533), UINT32_C(0x069A2816),
+ UINT32_C(0x03C034B1) },
+ { UINT32_C(0x0D05FF6A), UINT32_C(0x07761186), UINT32_C(0x0D73ABC6),
+ UINT32_C(0x06AC086B), UINT32_C(0x0BF965A1), UINT32_C(0x05F6546D),
+ UINT32_C(0x07767397), UINT32_C(0x005C4608), UINT32_C(0x005803C4),
+ UINT32_C(0x024EE133), UINT32_C(0x05FC51BD), UINT32_C(0x099F0D97),
+ UINT32_C(0x00437C0C), UINT32_C(0x0553A827), UINT32_C(0x0FB0EB60),
+ UINT32_C(0x06A7AEC5), UINT32_C(0x07C31264), UINT32_C(0x020D4B32),
+ UINT32_C(0x045F6381) } },
+ { { UINT32_C(0x04D9F1F8), UINT32_C(0x05315A15), UINT32_C(0x01990B25),
+ UINT32_C(0x01A6DE98), UINT32_C(0x036D854A), UINT32_C(0x03D25F0D),
+ UINT32_C(0x06673F83), UINT32_C(0x04C56936), UINT32_C(0x019ACD66),
+ UINT32_C(0x0C1F1C47), UINT32_C(0x04AD0FD3), UINT32_C(0x0148F4FA),
+ UINT32_C(0x07BC3A93), UINT32_C(0x02F86E22), UINT32_C(0x0291F62B),
+ UINT32_C(0x01F87233), UINT32_C(0x0F616501), UINT32_C(0x06C1B9E5),
+ UINT32_C(0x05FB6CAA) },
+ { UINT32_C(0x0DAF0C41), UINT32_C(0x050BE47B), UINT32_C(0x0DD799BF),
+ UINT32_C(0x00BB8754), UINT32_C(0x07221726), UINT32_C(0x00F26A35),
+ UINT32_C(0x0474A809), UINT32_C(0x0250B288), UINT32_C(0x0680A8C1),
+ UINT32_C(0x09FDC598), UINT32_C(0x00424EA2), UINT32_C(0x09CADE7E),
+ UINT32_C(0x0092845D), UINT32_C(0x0301B24F), UINT32_C(0x0CF7BF3E),
+ UINT32_C(0x0747B26E), UINT32_C(0x04110EBF), UINT32_C(0x002FC650),
+ UINT32_C(0x066AF8B8) } },
+ { { UINT32_C(0x06DBC74A), UINT32_C(0x02C31098), UINT32_C(0x069497D4),
+ UINT32_C(0x048864EC), UINT32_C(0x01E12C96), UINT32_C(0x03EE9F03),
+ UINT32_C(0x05400CB4), UINT32_C(0x00B9E174), UINT32_C(0x04923BC3),
+ UINT32_C(0x0B5B54EA), UINT32_C(0x04A635C8), UINT32_C(0x0039A770),
+ UINT32_C(0x079340D3), UINT32_C(0x02B053A6), UINT32_C(0x0AA8C800),
+ UINT32_C(0x073E66A4), UINT32_C(0x0304ED5B), UINT32_C(0x007ACB50),
+ UINT32_C(0x069EBA57) },
+ { UINT32_C(0x04FA3D53), UINT32_C(0x050EF28C), UINT32_C(0x09A3C2CF),
+ UINT32_C(0x03DE9C58), UINT32_C(0x085E0F9C), UINT32_C(0x069D187C),
+ UINT32_C(0x04624402), UINT32_C(0x0C81F8BF), UINT32_C(0x02E444D9),
+ UINT32_C(0x0D776F3C), UINT32_C(0x02B966E8), UINT32_C(0x017A5803),
+ UINT32_C(0x005E79FE), UINT32_C(0x017FF63B), UINT32_C(0x05B01559),
+ UINT32_C(0x03097D34), UINT32_C(0x0F3A10BA), UINT32_C(0x0712D05A),
+ UINT32_C(0x03904282) } },
+ { { UINT32_C(0x0727DDB2), UINT32_C(0x0322FBEE), UINT32_C(0x006E2FCD),
+ UINT32_C(0x07EA06FF), UINT32_C(0x0BA09E24), UINT32_C(0x00F733F8),
+ UINT32_C(0x03D6DCAE), UINT32_C(0x049125D5), UINT32_C(0x077E1A66),
+ UINT32_C(0x0D68AE84), UINT32_C(0x04F77FA6), UINT32_C(0x0964F229),
+ UINT32_C(0x011AD49C), UINT32_C(0x05CC02E9), UINT32_C(0x03E1CD67),
+ UINT32_C(0x06E9B6EE), UINT32_C(0x02ABE8BE), UINT32_C(0x056C7601),
+ UINT32_C(0x050C554C) },
+ { UINT32_C(0x01B068CF), UINT32_C(0x012F41C1), UINT32_C(0x0CD31293),
+ UINT32_C(0x056F1C35), UINT32_C(0x0716CA13), UINT32_C(0x0544293E),
+ UINT32_C(0x06007211), UINT32_C(0x04F726E6), UINT32_C(0x007D49EF),
+ UINT32_C(0x0E336972), UINT32_C(0x031C46EF), UINT32_C(0x025A6106),
+ UINT32_C(0x05AA92B9), UINT32_C(0x011700B0), UINT32_C(0x011058CF),
+ UINT32_C(0x00395DAC), UINT32_C(0x02BBCCE0), UINT32_C(0x029EAC52),
+ UINT32_C(0x028A26A5) } },
+ },
+ {
+ { { UINT32_C(0x0FFE4858), UINT32_C(0x044AC143), UINT32_C(0x06252D69),
+ UINT32_C(0x03691755), UINT32_C(0x0DE0F670), UINT32_C(0x0295E478),
+ UINT32_C(0x05945AF8), UINT32_C(0x0A5D32CA), UINT32_C(0x0234DE82),
+ UINT32_C(0x0F67E075), UINT32_C(0x06115CED), UINT32_C(0x00AE3A40),
+ UINT32_C(0x04F21740), UINT32_C(0x05BA53F6), UINT32_C(0x05840CD3),
+ UINT32_C(0x02246AB6), UINT32_C(0x0A7E5891), UINT32_C(0x00E30EE3),
+ UINT32_C(0x06E32125) },
+ { UINT32_C(0x028DA023), UINT32_C(0x0757D14A), UINT32_C(0x0F1F2367),
+ UINT32_C(0x071B23A0), UINT32_C(0x09FF6F22), UINT32_C(0x06AE99FC),
+ UINT32_C(0x07D2FAD3), UINT32_C(0x0C60DF70), UINT32_C(0x008ADC3F),
+ UINT32_C(0x090D9E92), UINT32_C(0x027C0C30), UINT32_C(0x01553F37),
+ UINT32_C(0x047ACF16), UINT32_C(0x017392AB), UINT32_C(0x05D9DD01),
+ UINT32_C(0x07D1EF5C), UINT32_C(0x039F6FB5), UINT32_C(0x029DC337),
+ UINT32_C(0x04960195) } },
+ { { UINT32_C(0x0994A7B1), UINT32_C(0x00E9A7BA), UINT32_C(0x03544C1B),
+ UINT32_C(0x0606BDF6), UINT32_C(0x01F3406A), UINT32_C(0x0635C178),
+ UINT32_C(0x04CA0BE9), UINT32_C(0x09B74F10), UINT32_C(0x046E4155),
+ UINT32_C(0x0655718B), UINT32_C(0x06B58CFD), UINT32_C(0x00E2656C),
+ UINT32_C(0x0426833D), UINT32_C(0x063C550C), UINT32_C(0x049DDCA9),
+ UINT32_C(0x04F6A9FC), UINT32_C(0x0676F8FD), UINT32_C(0x07BCA38C),
+ UINT32_C(0x059BDCBC) },
+ { UINT32_C(0x096F6D73), UINT32_C(0x0378FAEB), UINT32_C(0x0AA2949D),
+ UINT32_C(0x02979AD2), UINT32_C(0x0FD54FA0), UINT32_C(0x0358AB66),
+ UINT32_C(0x012D1C2E), UINT32_C(0x0A3E9433), UINT32_C(0x012502DC),
+ UINT32_C(0x0BF42C60), UINT32_C(0x02403252), UINT32_C(0x0B59A13D),
+ UINT32_C(0x07CE87D8), UINT32_C(0x06EFA510), UINT32_C(0x0F316813),
+ UINT32_C(0x048C6131), UINT32_C(0x0ABB4F2B), UINT32_C(0x00135CF6),
+ UINT32_C(0x019B839C) } },
+ { { UINT32_C(0x0CDE12CD), UINT32_C(0x01F2EE46), UINT32_C(0x096668FC),
+ UINT32_C(0x06800020), UINT32_C(0x0D8D4DC3), UINT32_C(0x01F9D872),
+ UINT32_C(0x0074B363), UINT32_C(0x08E353D0), UINT32_C(0x06B87B06),
+ UINT32_C(0x05F1A3E4), UINT32_C(0x03D67702), UINT32_C(0x0AD5ACE9),
+ UINT32_C(0x024E9994), UINT32_C(0x03C2A440), UINT32_C(0x05A6C55C),
+ UINT32_C(0x045CAA47), UINT32_C(0x0AC34E77), UINT32_C(0x068E05E3),
+ UINT32_C(0x0598564E) },
+ { UINT32_C(0x0366B021), UINT32_C(0x017935A2), UINT32_C(0x04F773DB),
+ UINT32_C(0x04629F66), UINT32_C(0x096AE2DC), UINT32_C(0x00DB3EE0),
+ UINT32_C(0x05684F63), UINT32_C(0x00391BA5), UINT32_C(0x07270BBB),
+ UINT32_C(0x0E28A705), UINT32_C(0x02BB0A4B), UINT32_C(0x097DCA61),
+ UINT32_C(0x04E133F5), UINT32_C(0x04899B3E), UINT32_C(0x00637ACF),
+ UINT32_C(0x02D4E63D), UINT32_C(0x09635CB7), UINT32_C(0x02DEDDE2),
+ UINT32_C(0x02229A95) } },
+ { { UINT32_C(0x0CD34315), UINT32_C(0x02E1C8DC), UINT32_C(0x067A6FB7),
+ UINT32_C(0x03DB6FAE), UINT32_C(0x07281C55), UINT32_C(0x046AC647),
+ UINT32_C(0x002E790C), UINT32_C(0x0F3D1BC4), UINT32_C(0x0533A625),
+ UINT32_C(0x06417AC2), UINT32_C(0x018ACECE), UINT32_C(0x0B7019D6),
+ UINT32_C(0x06EDA9DA), UINT32_C(0x01938AF8), UINT32_C(0x029911BB),
+ UINT32_C(0x03E2995B), UINT32_C(0x0C0E3FBA), UINT32_C(0x011596D1),
+ UINT32_C(0x00271C3C) },
+ { UINT32_C(0x0356A25A), UINT32_C(0x072A1ED9), UINT32_C(0x0EAF77B0),
+ UINT32_C(0x02B4B853), UINT32_C(0x0C759255), UINT32_C(0x02FB6C3D),
+ UINT32_C(0x0704DFA8), UINT32_C(0x0D59777F), UINT32_C(0x078F4FA8),
+ UINT32_C(0x03C11635), UINT32_C(0x02E52765), UINT32_C(0x02ACB74C),
+ UINT32_C(0x007731B9), UINT32_C(0x0137AD56), UINT32_C(0x063A4E6E),
+ UINT32_C(0x06744404), UINT32_C(0x09B78353), UINT32_C(0x04631A57),
+ UINT32_C(0x018C7F7E) } },
+ { { UINT32_C(0x0EAD4FF9), UINT32_C(0x05871450), UINT32_C(0x07F9BF26),
+ UINT32_C(0x02BC1D4E), UINT32_C(0x00CD4484), UINT32_C(0x04EBA4AB),
+ UINT32_C(0x01DEDBB8), UINT32_C(0x0E25B38D), UINT32_C(0x049D1268),
+ UINT32_C(0x0D04AABB), UINT32_C(0x01AEF51D), UINT32_C(0x00829E43),
+ UINT32_C(0x05402C62), UINT32_C(0x0368D70D), UINT32_C(0x03775E01),
+ UINT32_C(0x04503803), UINT32_C(0x02B6C48D), UINT32_C(0x01FD101D),
+ UINT32_C(0x0025FF9E) },
+ { UINT32_C(0x0B8B195A), UINT32_C(0x02323FFC), UINT32_C(0x00557FA3),
+ UINT32_C(0x073ED365), UINT32_C(0x0A376D54), UINT32_C(0x023A3994),
+ UINT32_C(0x00F1CC64), UINT32_C(0x080DCBBA), UINT32_C(0x01BB869C),
+ UINT32_C(0x084DE7DF), UINT32_C(0x03102B44), UINT32_C(0x0559CF4A),
+ UINT32_C(0x0385604A), UINT32_C(0x05CB3A44), UINT32_C(0x022C8F10),
+ UINT32_C(0x00AC8251), UINT32_C(0x0D40C893), UINT32_C(0x00107891),
+ UINT32_C(0x06795987) } },
+ { { UINT32_C(0x06920A2A), UINT32_C(0x051ED07D), UINT32_C(0x0D40A6DB),
+ UINT32_C(0x004D5082), UINT32_C(0x0BB2B0B9), UINT32_C(0x046EEDFC),
+ UINT32_C(0x077C4F4D), UINT32_C(0x0025B307), UINT32_C(0x00CCCEED),
+ UINT32_C(0x05AD182A), UINT32_C(0x0734F059), UINT32_C(0x0B480EE5),
+ UINT32_C(0x0170F1CB), UINT32_C(0x0417A672), UINT32_C(0x05B933B3),
+ UINT32_C(0x0279BB07), UINT32_C(0x0341E8CB), UINT32_C(0x071F7EBF),
+ UINT32_C(0x0231AF93) },
+ { UINT32_C(0x01CA3CCC), UINT32_C(0x042A30AF), UINT32_C(0x0E1E55F1),
+ UINT32_C(0x07A6A1AC), UINT32_C(0x0D95EC2F), UINT32_C(0x029E2CCD),
+ UINT32_C(0x00847505), UINT32_C(0x0184F443), UINT32_C(0x04B6D717),
+ UINT32_C(0x03764831), UINT32_C(0x043E0649), UINT32_C(0x0378A536),
+ UINT32_C(0x0430CAB4), UINT32_C(0x05B08C42), UINT32_C(0x0B147E31),
+ UINT32_C(0x0270B565), UINT32_C(0x056846E1), UINT32_C(0x0393806E),
+ UINT32_C(0x0102687E) } },
+ { { UINT32_C(0x0EB5DCD3), UINT32_C(0x0185FC5D), UINT32_C(0x03181617),
+ UINT32_C(0x01479862), UINT32_C(0x0D1E00A3), UINT32_C(0x000E2351),
+ UINT32_C(0x041EA413), UINT32_C(0x0EC09039), UINT32_C(0x00213EFE),
+ UINT32_C(0x02085A51), UINT32_C(0x027B7641), UINT32_C(0x0EE239C0),
+ UINT32_C(0x06D0F7BB), UINT32_C(0x0267C803), UINT32_C(0x0B79A7EE),
+ UINT32_C(0x0681FFDF), UINT32_C(0x08DFF64B), UINT32_C(0x0688C37C),
+ UINT32_C(0x03D1AE9F) },
+ { UINT32_C(0x03B68E6C), UINT32_C(0x07F04BE5), UINT32_C(0x060E4D0D),
+ UINT32_C(0x0534899D), UINT32_C(0x0FA52B9C), UINT32_C(0x001C4752),
+ UINT32_C(0x00BCA60E), UINT32_C(0x041ED165), UINT32_C(0x01DBEB9D),
+ UINT32_C(0x04BEFD90), UINT32_C(0x05B1A36F), UINT32_C(0x0C6DA7CD),
+ UINT32_C(0x025F29BF), UINT32_C(0x0143D052), UINT32_C(0x099FCD3B),
+ UINT32_C(0x04934EE0), UINT32_C(0x00F9287C), UINT32_C(0x06BF2174),
+ UINT32_C(0x05D3AAEB) } },
+ { { UINT32_C(0x0B07B1BF), UINT32_C(0x008B8614), UINT32_C(0x00E21485),
+ UINT32_C(0x07064A8F), UINT32_C(0x04328BCA), UINT32_C(0x0126ADF3),
+ UINT32_C(0x07D9CEFE), UINT32_C(0x0B5FE8D9), UINT32_C(0x03B144E7),
+ UINT32_C(0x0FF1E126), UINT32_C(0x06AF8F59), UINT32_C(0x07A6CE02),
+ UINT32_C(0x07F9BE52), UINT32_C(0x003588EF), UINT32_C(0x0EFF3D3A),
+ UINT32_C(0x052C77D2), UINT32_C(0x010CACE8), UINT32_C(0x05B1B51F),
+ UINT32_C(0x06F19D06) },
+ { UINT32_C(0x042166D8), UINT32_C(0x04CD028C), UINT32_C(0x039C24AE),
+ UINT32_C(0x02C03F19), UINT32_C(0x067F4B98), UINT32_C(0x020FC733),
+ UINT32_C(0x01DAB42C), UINT32_C(0x02FF3B82), UINT32_C(0x048BCF28),
+ UINT32_C(0x019BFE25), UINT32_C(0x05777D5F), UINT32_C(0x06871AF8),
+ UINT32_C(0x04139F9E), UINT32_C(0x07211D99), UINT32_C(0x0AD09893),
+ UINT32_C(0x01E0FD46), UINT32_C(0x02906E37), UINT32_C(0x028275DB),
+ UINT32_C(0x046A1575) } },
+ { { UINT32_C(0x08AA3834), UINT32_C(0x06C07864), UINT32_C(0x0E044947),
+ UINT32_C(0x03335EFD), UINT32_C(0x067B5E62), UINT32_C(0x034C6315),
+ UINT32_C(0x07572306), UINT32_C(0x07CFC444), UINT32_C(0x01B85C68),
+ UINT32_C(0x04AE9317), UINT32_C(0x004244BB), UINT32_C(0x02B9387A),
+ UINT32_C(0x07EC501D), UINT32_C(0x030A85A4), UINT32_C(0x035462ED),
+ UINT32_C(0x0713AD0C), UINT32_C(0x053851AC), UINT32_C(0x02FE3E5B),
+ UINT32_C(0x06B40EB3) },
+ { UINT32_C(0x053E08C6), UINT32_C(0x05772205), UINT32_C(0x030BB610),
+ UINT32_C(0x008EE615), UINT32_C(0x0B7E6CE7), UINT32_C(0x00783E50),
+ UINT32_C(0x0096806A), UINT32_C(0x066126FD), UINT32_C(0x051C1C80),
+ UINT32_C(0x0ECBCD5E), UINT32_C(0x03A28DED), UINT32_C(0x08FD6395),
+ UINT32_C(0x022A192F), UINT32_C(0x0736A4A0), UINT32_C(0x01369C64),
+ UINT32_C(0x02AB6ECE), UINT32_C(0x06E0E541), UINT32_C(0x03248146),
+ UINT32_C(0x00948603) } },
+ { { UINT32_C(0x069B34EA), UINT32_C(0x0336603F), UINT32_C(0x06DBFFB7),
+ UINT32_C(0x0300F54C), UINT32_C(0x03402123), UINT32_C(0x04E1356D),
+ UINT32_C(0x04422E8C), UINT32_C(0x0C555F86), UINT32_C(0x065AB272),
+ UINT32_C(0x053F830F), UINT32_C(0x0579A41E), UINT32_C(0x0FEFEF91),
+ UINT32_C(0x004E0795), UINT32_C(0x016107F9), UINT32_C(0x08D654BD),
+ UINT32_C(0x04ABFECE), UINT32_C(0x06C9D84D), UINT32_C(0x03813525),
+ UINT32_C(0x07CB6F50) },
+ { UINT32_C(0x09047156), UINT32_C(0x010B8EB7), UINT32_C(0x0CC6FC83),
+ UINT32_C(0x0431B14F), UINT32_C(0x03572502), UINT32_C(0x076096FF),
+ UINT32_C(0x0028C298), UINT32_C(0x066F3BBA), UINT32_C(0x00B06491),
+ UINT32_C(0x0665164A), UINT32_C(0x04A5A55D), UINT32_C(0x02DAC096),
+ UINT32_C(0x03E71E1C), UINT32_C(0x0256A93B), UINT32_C(0x04C0530A),
+ UINT32_C(0x062EDF21), UINT32_C(0x0F59E8F8), UINT32_C(0x019409ED),
+ UINT32_C(0x07A2F4BF) } },
+ { { UINT32_C(0x0665B1CF), UINT32_C(0x0034F110), UINT32_C(0x0E6E0C55),
+ UINT32_C(0x05548084), UINT32_C(0x0CB9C817), UINT32_C(0x010A8F87),
+ UINT32_C(0x012A9C49), UINT32_C(0x0982F57E), UINT32_C(0x00D5BB56),
+ UINT32_C(0x0649D707), UINT32_C(0x00C86A10), UINT32_C(0x0C3ED33B),
+ UINT32_C(0x065AEDD0), UINT32_C(0x061D08CC), UINT32_C(0x010AAD5D),
+ UINT32_C(0x015E11C5), UINT32_C(0x0CE68252), UINT32_C(0x03DCA282),
+ UINT32_C(0x023E7D61) },
+ { UINT32_C(0x094CC511), UINT32_C(0x053544CA), UINT32_C(0x067DDC2E),
+ UINT32_C(0x022C5BA7), UINT32_C(0x0E503DBC), UINT32_C(0x06CD2E73),
+ UINT32_C(0x058CE06F), UINT32_C(0x072AA3E8), UINT32_C(0x06DB1977),
+ UINT32_C(0x04494EBF), UINT32_C(0x00968BBC), UINT32_C(0x02E8F607),
+ UINT32_C(0x06F93369), UINT32_C(0x00836553), UINT32_C(0x05A73753),
+ UINT32_C(0x03A8B586), UINT32_C(0x00A046AC), UINT32_C(0x0211F089),
+ UINT32_C(0x0389954D) } },
+ { { UINT32_C(0x0BB13D25), UINT32_C(0x023A4F60), UINT32_C(0x05B894C3),
+ UINT32_C(0x01F6CF6C), UINT32_C(0x0F316A82), UINT32_C(0x07269483),
+ UINT32_C(0x0724D1FF), UINT32_C(0x081060C2), UINT32_C(0x07213116),
+ UINT32_C(0x0B65307F), UINT32_C(0x06CB9993), UINT32_C(0x04580D3B),
+ UINT32_C(0x064521E7), UINT32_C(0x07FA9810), UINT32_C(0x00B180DF),
+ UINT32_C(0x058701A7), UINT32_C(0x08BFB845), UINT32_C(0x0175BF68),
+ UINT32_C(0x02BF1464) },
+ { UINT32_C(0x04B66F01), UINT32_C(0x059EAFDA), UINT32_C(0x02EB7B38),
+ UINT32_C(0x0382ED4B), UINT32_C(0x0D3E8A47), UINT32_C(0x061E1C44),
+ UINT32_C(0x06369F05), UINT32_C(0x0221CD6C), UINT32_C(0x033836B4),
+ UINT32_C(0x0580C2E2), UINT32_C(0x071C3002), UINT32_C(0x0C51E97D),
+ UINT32_C(0x06D684C3), UINT32_C(0x074D62F1), UINT32_C(0x0851439A),
+ UINT32_C(0x038AB710), UINT32_C(0x0300D39E), UINT32_C(0x0390C464),
+ UINT32_C(0x04D98E09) } },
+ { { UINT32_C(0x0140A004), UINT32_C(0x00D68C0B), UINT32_C(0x080890B3),
+ UINT32_C(0x07D532CC), UINT32_C(0x05EC2C5B), UINT32_C(0x065415DB),
+ UINT32_C(0x021CBEF3), UINT32_C(0x0C92C4C7), UINT32_C(0x002C11E2),
+ UINT32_C(0x087FFDBE), UINT32_C(0x00BBD5AB), UINT32_C(0x0D3147C6),
+ UINT32_C(0x027322CF), UINT32_C(0x048AE30E), UINT32_C(0x0A78BD27),
+ UINT32_C(0x06E52637), UINT32_C(0x0F79BB43), UINT32_C(0x05C2CDD9),
+ UINT32_C(0x03AEDAB1) },
+ { UINT32_C(0x01F8F797), UINT32_C(0x05E078E8), UINT32_C(0x0A430953),
+ UINT32_C(0x079FE860), UINT32_C(0x098B3236), UINT32_C(0x00A0033B),
+ UINT32_C(0x0311C26A), UINT32_C(0x02325326), UINT32_C(0x021CEBBC),
+ UINT32_C(0x01C498E4), UINT32_C(0x02365440), UINT32_C(0x091FBA94),
+ UINT32_C(0x017487BB), UINT32_C(0x0321A8D5), UINT32_C(0x071AEF9F),
+ UINT32_C(0x047D457D), UINT32_C(0x01BCFB0E), UINT32_C(0x0071F7BC),
+ UINT32_C(0x075AEFAA) } },
+ { { UINT32_C(0x0C98DFAE), UINT32_C(0x01C5257A), UINT32_C(0x06506435),
+ UINT32_C(0x00916D1A), UINT32_C(0x0D65B633), UINT32_C(0x06BAC13A),
+ UINT32_C(0x013D2F72), UINT32_C(0x0B8C7FD1), UINT32_C(0x0068E619),
+ UINT32_C(0x0C30A25B), UINT32_C(0x016EBDF8), UINT32_C(0x0D8A2E42),
+ UINT32_C(0x01E2AB8D), UINT32_C(0x07855AFB), UINT32_C(0x01F15FBB),
+ UINT32_C(0x01DA4917), UINT32_C(0x074DB277), UINT32_C(0x030BAC3C),
+ UINT32_C(0x01B1B048) },
+ { UINT32_C(0x00C92FB5), UINT32_C(0x00781A5F), UINT32_C(0x0B53EE11),
+ UINT32_C(0x04366DE3), UINT32_C(0x0D7AFCA1), UINT32_C(0x04C3CAB8),
+ UINT32_C(0x031EB35F), UINT32_C(0x00CDDA16), UINT32_C(0x05DB2AA4),
+ UINT32_C(0x0EEC79C5), UINT32_C(0x0123CDB1), UINT32_C(0x0A41DC06),
+ UINT32_C(0x06880096), UINT32_C(0x069843C8), UINT32_C(0x0CF78DBD),
+ UINT32_C(0x0751C797), UINT32_C(0x0381D873), UINT32_C(0x055DD420),
+ UINT32_C(0x011ED33F) } },
+ { { UINT32_C(0x0629DD22), UINT32_C(0x0329136A), UINT32_C(0x0F4C3A86),
+ UINT32_C(0x02DF1D68), UINT32_C(0x0629460E), UINT32_C(0x04615D04),
+ UINT32_C(0x06370A73), UINT32_C(0x0FF4CD28), UINT32_C(0x031AD006),
+ UINT32_C(0x08F7AAC2), UINT32_C(0x05792159), UINT32_C(0x0680FF31),
+ UINT32_C(0x04E1BAE8), UINT32_C(0x02E9B2B2), UINT32_C(0x0033BF36),
+ UINT32_C(0x07DA8F9E), UINT32_C(0x0C93AB40), UINT32_C(0x01D743F3),
+ UINT32_C(0x07644D30) },
+ { UINT32_C(0x075200EB), UINT32_C(0x07C0784F), UINT32_C(0x0BE5A2EF),
+ UINT32_C(0x002C4071), UINT32_C(0x0BB7DD65), UINT32_C(0x004ADBD2),
+ UINT32_C(0x040D6568), UINT32_C(0x0F9A3BB6), UINT32_C(0x003E18E7),
+ UINT32_C(0x0B2FA6B5), UINT32_C(0x04ED429F), UINT32_C(0x06091338),
+ UINT32_C(0x01D161FD), UINT32_C(0x00454AAD), UINT32_C(0x0CAE06AA),
+ UINT32_C(0x04E95021), UINT32_C(0x04523C5D), UINT32_C(0x041594F0),
+ UINT32_C(0x065084CD) } },
+ { { UINT32_C(0x002145D7), UINT32_C(0x047D8374), UINT32_C(0x0467ABA3),
+ UINT32_C(0x051CC3F5), UINT32_C(0x0483BB69), UINT32_C(0x05CC8B8E),
+ UINT32_C(0x00E452BD), UINT32_C(0x04333A28), UINT32_C(0x04F1A76A),
+ UINT32_C(0x0CC64EC5), UINT32_C(0x05D9332C), UINT32_C(0x0E975BFD),
+ UINT32_C(0x036AEA82), UINT32_C(0x03B66BE1), UINT32_C(0x0C8D0897),
+ UINT32_C(0x00F4E2EA), UINT32_C(0x0E84A7FD), UINT32_C(0x04F8C351),
+ UINT32_C(0x03B65097) },
+ { UINT32_C(0x0DDB406F), UINT32_C(0x00890ADF), UINT32_C(0x03BBC60E),
+ UINT32_C(0x01C0CA21), UINT32_C(0x0A76C2EF), UINT32_C(0x01695DF8),
+ UINT32_C(0x07073F32), UINT32_C(0x0EED6813), UINT32_C(0x014D6ADC),
+ UINT32_C(0x0AD30E57), UINT32_C(0x0080597C), UINT32_C(0x051E8314),
+ UINT32_C(0x02334D30), UINT32_C(0x01C9AC19), UINT32_C(0x0D628FAA),
+ UINT32_C(0x03467107), UINT32_C(0x027B5A2C), UINT32_C(0x07FE2414),
+ UINT32_C(0x06D835AF) } },
+ },
+ {
+ { { UINT32_C(0x0EF34144), UINT32_C(0x030D91DC), UINT32_C(0x05517757),
+ UINT32_C(0x007F4856), UINT32_C(0x07EAF164), UINT32_C(0x058E3931),
+ UINT32_C(0x0713CF7A), UINT32_C(0x0D5B04EB), UINT32_C(0x0416E9E6),
+ UINT32_C(0x02479D66), UINT32_C(0x03230F77), UINT32_C(0x0E9111E0),
+ UINT32_C(0x004A4528), UINT32_C(0x02C7F7D1), UINT32_C(0x02C19F36),
+ UINT32_C(0x0456B2EE), UINT32_C(0x083CA160), UINT32_C(0x04377D25),
+ UINT32_C(0x02CC5D8D) },
+ { UINT32_C(0x024FDE34), UINT32_C(0x056A1AF8), UINT32_C(0x04A1F978),
+ UINT32_C(0x07F66131), UINT32_C(0x09CCCEFE), UINT32_C(0x056AE73E),
+ UINT32_C(0x0373907A), UINT32_C(0x08E4DFA2), UINT32_C(0x06104B90),
+ UINT32_C(0x0CB65FE3), UINT32_C(0x0157AEF0), UINT32_C(0x0346E5AE),
+ UINT32_C(0x06A8D9D0), UINT32_C(0x034F592B), UINT32_C(0x06A50F43),
+ UINT32_C(0x03B946D2), UINT32_C(0x0B23CFAE), UINT32_C(0x01428E19),
+ UINT32_C(0x01E96239) } },
+ { { UINT32_C(0x0FF5FDD9), UINT32_C(0x06FD0B27), UINT32_C(0x0E5375B8),
+ UINT32_C(0x02903F56), UINT32_C(0x0A0998F1), UINT32_C(0x04C7F7A7),
+ UINT32_C(0x07B849C2), UINT32_C(0x01F684C1), UINT32_C(0x03D27FA7),
+ UINT32_C(0x0ECDF852), UINT32_C(0x067A0FF9), UINT32_C(0x01170172),
+ UINT32_C(0x06847341), UINT32_C(0x0384EC35), UINT32_C(0x097FA0B1),
+ UINT32_C(0x056D5954), UINT32_C(0x0811FE39), UINT32_C(0x03141A8E),
+ UINT32_C(0x03197AAF) },
+ { UINT32_C(0x06B64713), UINT32_C(0x01EA477B), UINT32_C(0x0401B800),
+ UINT32_C(0x056A093F), UINT32_C(0x0B18523C), UINT32_C(0x05FBF38B),
+ UINT32_C(0x0000837C), UINT32_C(0x0205CC9C), UINT32_C(0x0211586E),
+ UINT32_C(0x00E95959), UINT32_C(0x011034DB), UINT32_C(0x0705835C),
+ UINT32_C(0x0534A7CA), UINT32_C(0x01BEEAE0), UINT32_C(0x011191B1),
+ UINT32_C(0x06AC6C8E), UINT32_C(0x0F65A0B0), UINT32_C(0x01E452CE),
+ UINT32_C(0x07AA591C) } },
+ { { UINT32_C(0x04BE78BD), UINT32_C(0x06F41AA4), UINT32_C(0x09895DC2),
+ UINT32_C(0x05E43C02), UINT32_C(0x0F5ED50D), UINT32_C(0x0055BA85),
+ UINT32_C(0x04B88B8C), UINT32_C(0x07C05237), UINT32_C(0x06B089B3),
+ UINT32_C(0x09D41AEF), UINT32_C(0x07A77F2E), UINT32_C(0x0B03794F),
+ UINT32_C(0x0272136B), UINT32_C(0x013E2617), UINT32_C(0x039B53A2),
+ UINT32_C(0x04704526), UINT32_C(0x0958114F), UINT32_C(0x01DF2245),
+ UINT32_C(0x0736ACD3) },
+ { UINT32_C(0x020FED74), UINT32_C(0x0142B2B5), UINT32_C(0x00BC648B),
+ UINT32_C(0x045D8303), UINT32_C(0x01238CE7), UINT32_C(0x041E6696),
+ UINT32_C(0x07794FE3), UINT32_C(0x02BC0623), UINT32_C(0x04D21409),
+ UINT32_C(0x05FABD03), UINT32_C(0x074FAEA0), UINT32_C(0x08FD5BE6),
+ UINT32_C(0x041F41AC), UINT32_C(0x046062AA), UINT32_C(0x06780730),
+ UINT32_C(0x035F4E6F), UINT32_C(0x016D4890), UINT32_C(0x05B93E77),
+ UINT32_C(0x01E38302) } },
+ { { UINT32_C(0x0736B7A8), UINT32_C(0x049E4056), UINT32_C(0x01935194),
+ UINT32_C(0x056AFE87), UINT32_C(0x0526EB80), UINT32_C(0x0763756F),
+ UINT32_C(0x0438F678), UINT32_C(0x074903F5), UINT32_C(0x0305EF19),
+ UINT32_C(0x0434448D), UINT32_C(0x05186915), UINT32_C(0x00E55244),
+ UINT32_C(0x017BD6D1), UINT32_C(0x0747C684), UINT32_C(0x0FEE9906),
+ UINT32_C(0x07BEA2FE), UINT32_C(0x04C3FEC5), UINT32_C(0x05EAB892),
+ UINT32_C(0x03E3B341) },
+ { UINT32_C(0x0DEF19D6), UINT32_C(0x03A56FE1), UINT32_C(0x09F33CC0),
+ UINT32_C(0x03E3A7C9), UINT32_C(0x04712359), UINT32_C(0x02515669),
+ UINT32_C(0x035C962B), UINT32_C(0x08C45240), UINT32_C(0x033CCA10),
+ UINT32_C(0x06965FA2), UINT32_C(0x04F88D82), UINT32_C(0x0FDE595A),
+ UINT32_C(0x0241F5B1), UINT32_C(0x03F203E1), UINT32_C(0x0BB7CDF8),
+ UINT32_C(0x046409AD), UINT32_C(0x08E4A186), UINT32_C(0x01723DD8),
+ UINT32_C(0x02B93AF0) } },
+ { { UINT32_C(0x0FACC519), UINT32_C(0x027F5A2C), UINT32_C(0x0CA8C450),
+ UINT32_C(0x03EC651F), UINT32_C(0x0B47E880), UINT32_C(0x01B9DB47),
+ UINT32_C(0x06895D1C), UINT32_C(0x0F1857B2), UINT32_C(0x06CC04B3),
+ UINT32_C(0x01C2D89D), UINT32_C(0x04525759), UINT32_C(0x0B6EACB4),
+ UINT32_C(0x07770FC8), UINT32_C(0x04A7FC79), UINT32_C(0x03B56F1C),
+ UINT32_C(0x0248A360), UINT32_C(0x0A73C4C6), UINT32_C(0x04BA5188),
+ UINT32_C(0x0400E477) },
+ { UINT32_C(0x0AEA3E6E), UINT32_C(0x05DA167B), UINT32_C(0x02C8D4B1),
+ UINT32_C(0x074DB11C), UINT32_C(0x05DB2724), UINT32_C(0x04492C83),
+ UINT32_C(0x00B62A05), UINT32_C(0x03A036B6), UINT32_C(0x07BC9211),
+ UINT32_C(0x05739939), UINT32_C(0x00FD8C64), UINT32_C(0x0E68B0EC),
+ UINT32_C(0x050FC3F3), UINT32_C(0x0446466F), UINT32_C(0x0A598C89),
+ UINT32_C(0x062CB99D), UINT32_C(0x0C97B1FA), UINT32_C(0x077F1F42),
+ UINT32_C(0x051B5A92) } },
+ { { UINT32_C(0x09C36058), UINT32_C(0x05929A37), UINT32_C(0x079147E4),
+ UINT32_C(0x0546B4E8), UINT32_C(0x0C41B43A), UINT32_C(0x05F16140),
+ UINT32_C(0x0124A189), UINT32_C(0x0D01EFB0), UINT32_C(0x00FCDC74),
+ UINT32_C(0x0D3E796F), UINT32_C(0x0597A54B), UINT32_C(0x097F7DE8),
+ UINT32_C(0x0677C89A), UINT32_C(0x036C6165), UINT32_C(0x0DFFFA33),
+ UINT32_C(0x0782CAAE), UINT32_C(0x07E6FE65), UINT32_C(0x04887038),
+ UINT32_C(0x0636D482) },
+ { UINT32_C(0x071EFA02), UINT32_C(0x07F91B7E), UINT32_C(0x0950028E),
+ UINT32_C(0x069527C7), UINT32_C(0x09CE6F6C), UINT32_C(0x01FEEAA0),
+ UINT32_C(0x014DED92), UINT32_C(0x0D94B717), UINT32_C(0x014B513D),
+ UINT32_C(0x0A97F421), UINT32_C(0x075448FA), UINT32_C(0x041A5F24),
+ UINT32_C(0x0721201F), UINT32_C(0x0444C83A), UINT32_C(0x07F6AE04),
+ UINT32_C(0x030824B5), UINT32_C(0x0246F2D9), UINT32_C(0x05F21CD9),
+ UINT32_C(0x06817477) } },
+ { { UINT32_C(0x0DDEF055), UINT32_C(0x01C63F00), UINT32_C(0x0570BDE9),
+ UINT32_C(0x07433A8A), UINT32_C(0x099522A9), UINT32_C(0x051DEDFE),
+ UINT32_C(0x01712838), UINT32_C(0x0C8ECC33), UINT32_C(0x04846773),
+ UINT32_C(0x0D5E2042), UINT32_C(0x017373E7), UINT32_C(0x04742EE4),
+ UINT32_C(0x01053131), UINT32_C(0x01BD8B10), UINT32_C(0x01A5A425),
+ UINT32_C(0x072BB78A), UINT32_C(0x01A26990), UINT32_C(0x02CD45F0),
+ UINT32_C(0x03124D19) },
+ { UINT32_C(0x01A2F1BD), UINT32_C(0x02C1057A), UINT32_C(0x07B6C2D1),
+ UINT32_C(0x00B79FA6), UINT32_C(0x09B44B1B), UINT32_C(0x0428D7E8),
+ UINT32_C(0x04C94C23), UINT32_C(0x0DFB15C5), UINT32_C(0x02F5DBF7),
+ UINT32_C(0x0BC452A9), UINT32_C(0x044F06AF), UINT32_C(0x06C3295D),
+ UINT32_C(0x0661CB9B), UINT32_C(0x0001E990), UINT32_C(0x022A6D5E),
+ UINT32_C(0x03420E57), UINT32_C(0x0D5E7F7E), UINT32_C(0x0593D853),
+ UINT32_C(0x00938C95) } },
+ { { UINT32_C(0x0899A80A), UINT32_C(0x063E3726), UINT32_C(0x08972EC5),
+ UINT32_C(0x037C93BE), UINT32_C(0x031E1342), UINT32_C(0x07C51EDF),
+ UINT32_C(0x03702DD4), UINT32_C(0x086F89E1), UINT32_C(0x047EBB47),
+ UINT32_C(0x06A291B7), UINT32_C(0x0685EBFA), UINT32_C(0x0EF566F4),
+ UINT32_C(0x02FC8735), UINT32_C(0x03A7F885), UINT32_C(0x0963A567),
+ UINT32_C(0x02DEC9A4), UINT32_C(0x033285D3), UINT32_C(0x0049779E),
+ UINT32_C(0x05AB7D24) },
+ { UINT32_C(0x04E67976), UINT32_C(0x03AD342E), UINT32_C(0x006D58B0),
+ UINT32_C(0x0490C968), UINT32_C(0x0428E13C), UINT32_C(0x0183F7B5),
+ UINT32_C(0x0168EF02), UINT32_C(0x031E9F33), UINT32_C(0x079C2D32),
+ UINT32_C(0x0EC6C4B2), UINT32_C(0x06334DE3), UINT32_C(0x04E10D5F),
+ UINT32_C(0x0431C81B), UINT32_C(0x001EE024), UINT32_C(0x01F6A3D0),
+ UINT32_C(0x0009B04D), UINT32_C(0x0A95C815), UINT32_C(0x06C721B5),
+ UINT32_C(0x07DEE1A8) } },
+ { { UINT32_C(0x0C112CB8), UINT32_C(0x00691E2E), UINT32_C(0x01DBEB00),
+ UINT32_C(0x077CCE8A), UINT32_C(0x03E91FE4), UINT32_C(0x0690BBBF),
+ UINT32_C(0x0577CA8A), UINT32_C(0x00B5C974), UINT32_C(0x029377A0),
+ UINT32_C(0x06FDF488), UINT32_C(0x00872436), UINT32_C(0x0506D32E),
+ UINT32_C(0x055C17BB), UINT32_C(0x03B00666), UINT32_C(0x0D26AAA8),
+ UINT32_C(0x03829C3F), UINT32_C(0x08B67A64), UINT32_C(0x0475D296),
+ UINT32_C(0x027FEFC5) },
+ { UINT32_C(0x06814D18), UINT32_C(0x01588692), UINT32_C(0x0D4F0EDD),
+ UINT32_C(0x007DFA60), UINT32_C(0x042E603A), UINT32_C(0x00885394),
+ UINT32_C(0x05F797E2), UINT32_C(0x041238B4), UINT32_C(0x052305E5),
+ UINT32_C(0x0D9515E8), UINT32_C(0x05B10FCD), UINT32_C(0x08F6C6F8),
+ UINT32_C(0x043FB734), UINT32_C(0x014BE940), UINT32_C(0x0E882EEE),
+ UINT32_C(0x0077B050), UINT32_C(0x02093150), UINT32_C(0x05A0B712),
+ UINT32_C(0x06E640E8) } },
+ { { UINT32_C(0x0BE77EA4), UINT32_C(0x03634A86), UINT32_C(0x01F8DFF4),
+ UINT32_C(0x005A0F6B), UINT32_C(0x0D30990A), UINT32_C(0x0712090D),
+ UINT32_C(0x048C153A), UINT32_C(0x029E8CA3), UINT32_C(0x052B7982),
+ UINT32_C(0x01355D1B), UINT32_C(0x00109FDB), UINT32_C(0x029EF3CE),
+ UINT32_C(0x02FA1090), UINT32_C(0x033F025F), UINT32_C(0x03D1969F),
+ UINT32_C(0x052EDB5F), UINT32_C(0x04D2BEF3), UINT32_C(0x06BF5DE5),
+ UINT32_C(0x00C8983F) },
+ { UINT32_C(0x04B8EB93), UINT32_C(0x0058C176), UINT32_C(0x00A13CB4),
+ UINT32_C(0x053DF577), UINT32_C(0x0156AEB4), UINT32_C(0x005E3851),
+ UINT32_C(0x069CEAE2), UINT32_C(0x0030FF4F), UINT32_C(0x001DA227),
+ UINT32_C(0x05AF81D3), UINT32_C(0x03D80D8D), UINT32_C(0x0A3E8600),
+ UINT32_C(0x03D228FC), UINT32_C(0x0665245C), UINT32_C(0x09E5CE2E),
+ UINT32_C(0x03843A9B), UINT32_C(0x02F2D31B), UINT32_C(0x041832DC),
+ UINT32_C(0x02E66351) } },
+ { { UINT32_C(0x05730C8D), UINT32_C(0x06092618), UINT32_C(0x079F5AFA),
+ UINT32_C(0x06F3E0CF), UINT32_C(0x092BC672), UINT32_C(0x0276DE36),
+ UINT32_C(0x02D07EDC), UINT32_C(0x0FC6A29F), UINT32_C(0x0486EFA2),
+ UINT32_C(0x0909E264), UINT32_C(0x056F98E8), UINT32_C(0x08A33777),
+ UINT32_C(0x007820C7), UINT32_C(0x07E651CF), UINT32_C(0x0928B418),
+ UINT32_C(0x05EF7EA1), UINT32_C(0x0BE35987), UINT32_C(0x023FE702),
+ UINT32_C(0x04B874D9) },
+ { UINT32_C(0x001A8D36), UINT32_C(0x03FC40DA), UINT32_C(0x00561AB4),
+ UINT32_C(0x036E4547), UINT32_C(0x0D462FB9), UINT32_C(0x07B2E89D),
+ UINT32_C(0x0616BF2B), UINT32_C(0x02FA3373), UINT32_C(0x067EE578),
+ UINT32_C(0x02B81792), UINT32_C(0x03A32F95), UINT32_C(0x019591EC),
+ UINT32_C(0x047F05AA), UINT32_C(0x058E2F29), UINT32_C(0x04CECEE9),
+ UINT32_C(0x07DF3632), UINT32_C(0x02BFB16E), UINT32_C(0x03AB1AD0),
+ UINT32_C(0x0610FCE9) } },
+ { { UINT32_C(0x0CE87EAC), UINT32_C(0x00235BF1), UINT32_C(0x0EAE0AF1),
+ UINT32_C(0x03D89DD3), UINT32_C(0x0B789073), UINT32_C(0x01AC0815),
+ UINT32_C(0x055721C2), UINT32_C(0x0B2BAD77), UINT32_C(0x05787CF1),
+ UINT32_C(0x00C70041), UINT32_C(0x00EEE049), UINT32_C(0x0D01B922),
+ UINT32_C(0x022A24F8), UINT32_C(0x0317FAC7), UINT32_C(0x0D5F402C),
+ UINT32_C(0x0439541B), UINT32_C(0x07D56CC2), UINT32_C(0x00EB80BF),
+ UINT32_C(0x00E40AA6) },
+ { UINT32_C(0x0A01F6F0), UINT32_C(0x020DA18A), UINT32_C(0x073C68C0),
+ UINT32_C(0x05338AFA), UINT32_C(0x0DDC8CB0), UINT32_C(0x001C0CED),
+ UINT32_C(0x07A82BBC), UINT32_C(0x081BF5E1), UINT32_C(0x00B876DD),
+ UINT32_C(0x09864ED3), UINT32_C(0x07F89153), UINT32_C(0x0A066C82),
+ UINT32_C(0x042461BC), UINT32_C(0x07592D13), UINT32_C(0x02DBFA28),
+ UINT32_C(0x0371D64F), UINT32_C(0x0326B139), UINT32_C(0x0545030E),
+ UINT32_C(0x03B02EDD) } },
+ { { UINT32_C(0x0C8AA41D), UINT32_C(0x02999435), UINT32_C(0x011470BE),
+ UINT32_C(0x02448ABD), UINT32_C(0x0C3A559A), UINT32_C(0x03DE4EDA),
+ UINT32_C(0x0267ACAB), UINT32_C(0x05B64BAF), UINT32_C(0x06167A36),
+ UINT32_C(0x080925DF), UINT32_C(0x0748EB2E), UINT32_C(0x0262E572),
+ UINT32_C(0x06655A71), UINT32_C(0x02DC7E31), UINT32_C(0x009FA448),
+ UINT32_C(0x05991E95), UINT32_C(0x0FA3D04A), UINT32_C(0x0484BE25),
+ UINT32_C(0x0438E396) },
+ { UINT32_C(0x044C41BB), UINT32_C(0x02EFDFC2), UINT32_C(0x0F459DA9),
+ UINT32_C(0x04A94A2D), UINT32_C(0x03F47C03), UINT32_C(0x07FA71AF),
+ UINT32_C(0x03DC178C), UINT32_C(0x0129963B), UINT32_C(0x021E1FD4),
+ UINT32_C(0x0E7487EB), UINT32_C(0x00C3DDB0), UINT32_C(0x06EE0434),
+ UINT32_C(0x06D2712F), UINT32_C(0x07842656), UINT32_C(0x013F8F26),
+ UINT32_C(0x01F9766F), UINT32_C(0x061BD12C), UINT32_C(0x02B96EB7),
+ UINT32_C(0x01F8FA20) } },
+ { { UINT32_C(0x0FB80E07), UINT32_C(0x050B08F2), UINT32_C(0x064554C9),
+ UINT32_C(0x078E1F81), UINT32_C(0x09ED8841), UINT32_C(0x0596ADC2),
+ UINT32_C(0x034DF164), UINT32_C(0x020E6E12), UINT32_C(0x018EDA4D),
+ UINT32_C(0x0174E31B), UINT32_C(0x03B107F1), UINT32_C(0x010EC155),
+ UINT32_C(0x07FA899A), UINT32_C(0x0717505D), UINT32_C(0x05819825),
+ UINT32_C(0x0542EC55), UINT32_C(0x038DD6D7), UINT32_C(0x0497E5A0),
+ UINT32_C(0x03081495) },
+ { UINT32_C(0x064986F4), UINT32_C(0x03BD600B), UINT32_C(0x04B78E0D),
+ UINT32_C(0x0098465F), UINT32_C(0x0E7E78C0), UINT32_C(0x0127CC0E),
+ UINT32_C(0x07A3BC64), UINT32_C(0x001DBF18), UINT32_C(0x06A78B45),
+ UINT32_C(0x0D3A5A6B), UINT32_C(0x0682C6C2), UINT32_C(0x0B8EE95B),
+ UINT32_C(0x066E64B3), UINT32_C(0x04178CB0), UINT32_C(0x0FC2F66E),
+ UINT32_C(0x04EABB3C), UINT32_C(0x084AF2DE), UINT32_C(0x04C297C1),
+ UINT32_C(0x0136B06E) } },
+ { { UINT32_C(0x07DF6D6E), UINT32_C(0x01F00ED6), UINT32_C(0x02705D3E),
+ UINT32_C(0x038023D6), UINT32_C(0x0A85D53D), UINT32_C(0x01C4664A),
+ UINT32_C(0x0610B36C), UINT32_C(0x02BAE274), UINT32_C(0x03566DBB),
+ UINT32_C(0x0854659C), UINT32_C(0x00F106D4), UINT32_C(0x09D0A630),
+ UINT32_C(0x01B5D98A), UINT32_C(0x01B27CA8), UINT32_C(0x0F254343),
+ UINT32_C(0x075491B9), UINT32_C(0x025D2274), UINT32_C(0x04F17B63),
+ UINT32_C(0x06865DA3) },
+ { UINT32_C(0x0D4C1CFE), UINT32_C(0x0612B559), UINT32_C(0x0D29CCC2),
+ UINT32_C(0x06835607), UINT32_C(0x0E442A4F), UINT32_C(0x003F2EA3),
+ UINT32_C(0x04DA7E80), UINT32_C(0x079ABF17), UINT32_C(0x062A7A50),
+ UINT32_C(0x0FE31E03), UINT32_C(0x044D195D), UINT32_C(0x01A9DC51),
+ UINT32_C(0x05B8C361), UINT32_C(0x06390D3D), UINT32_C(0x0544BD42),
+ UINT32_C(0x02DB7A09), UINT32_C(0x0367E705), UINT32_C(0x01B34C53),
+ UINT32_C(0x055F8181) } },
+ { { UINT32_C(0x0F3F00C1), UINT32_C(0x04C36A17), UINT32_C(0x0CB05A60),
+ UINT32_C(0x05742C4B), UINT32_C(0x029DC7BA), UINT32_C(0x00946765),
+ UINT32_C(0x01F6280B), UINT32_C(0x0A250657), UINT32_C(0x057853BE),
+ UINT32_C(0x027C17D4), UINT32_C(0x061E6EE7), UINT32_C(0x068934C0),
+ UINT32_C(0x0225275D), UINT32_C(0x004E706A), UINT32_C(0x08A0E33D),
+ UINT32_C(0x02EFB382), UINT32_C(0x0231B332), UINT32_C(0x045E20A6),
+ UINT32_C(0x076538EE) },
+ { UINT32_C(0x072461C9), UINT32_C(0x071D932B), UINT32_C(0x099D4C01),
+ UINT32_C(0x0401E666), UINT32_C(0x07DB6FB0), UINT32_C(0x049F43E4),
+ UINT32_C(0x056167EA), UINT32_C(0x0D49C41D), UINT32_C(0x05F10CA9),
+ UINT32_C(0x080EC5BB), UINT32_C(0x05C98C31), UINT32_C(0x01E1F452),
+ UINT32_C(0x07E42338), UINT32_C(0x04049AA9), UINT32_C(0x032E5588),
+ UINT32_C(0x01E28C9C), UINT32_C(0x04BCDC8D), UINT32_C(0x04309C54),
+ UINT32_C(0x02042514) } },
+ },
+ {
+ { { UINT32_C(0x02648196), UINT32_C(0x01BF352B), UINT32_C(0x0FCEC15F),
+ UINT32_C(0x02D3A085), UINT32_C(0x011002A5), UINT32_C(0x026E7651),
+ UINT32_C(0x021C2A73), UINT32_C(0x0E3392B7), UINT32_C(0x01A26456),
+ UINT32_C(0x00E05940), UINT32_C(0x05C6D0D8), UINT32_C(0x085D0F62),
+ UINT32_C(0x03B743E5), UINT32_C(0x05B2C76F), UINT32_C(0x0B270AB3),
+ UINT32_C(0x076B0EF8), UINT32_C(0x0E5EF80C), UINT32_C(0x0751E040),
+ UINT32_C(0x0769C73A) },
+ { UINT32_C(0x0D9BC7BB), UINT32_C(0x01B398D4), UINT32_C(0x094E3D5E),
+ UINT32_C(0x0679261C), UINT32_C(0x0F579BC0), UINT32_C(0x0087234F),
+ UINT32_C(0x01C48CDA), UINT32_C(0x01065BB9), UINT32_C(0x04A8A1F3),
+ UINT32_C(0x097D469B), UINT32_C(0x046FC17A), UINT32_C(0x00CAE969),
+ UINT32_C(0x02E690B5), UINT32_C(0x0187C437), UINT32_C(0x000FCD13),
+ UINT32_C(0x07C0FA30), UINT32_C(0x02F0D63C), UINT32_C(0x0583AE53),
+ UINT32_C(0x036A77FE) } },
+ { { UINT32_C(0x01DE62A2), UINT32_C(0x03B6F417), UINT32_C(0x08D8470C),
+ UINT32_C(0x041AB290), UINT32_C(0x0D3155E4), UINT32_C(0x043123A7),
+ UINT32_C(0x06EC3DAC), UINT32_C(0x09575F29), UINT32_C(0x05CC8C01),
+ UINT32_C(0x028CF2E0), UINT32_C(0x00BB01F9), UINT32_C(0x01E4C554),
+ UINT32_C(0x07B3F1F5), UINT32_C(0x00E4DC2E), UINT32_C(0x0F6F4AA9),
+ UINT32_C(0x03F7C702), UINT32_C(0x0EC18583), UINT32_C(0x02949031),
+ UINT32_C(0x05C16F04) },
+ { UINT32_C(0x03BFC242), UINT32_C(0x06AF3468), UINT32_C(0x0509C734),
+ UINT32_C(0x002581C3), UINT32_C(0x0CD6F167), UINT32_C(0x068B6408),
+ UINT32_C(0x07D05F00), UINT32_C(0x0D520CDF), UINT32_C(0x02C463E5),
+ UINT32_C(0x003D2B75), UINT32_C(0x02640D09), UINT32_C(0x0C38D324),
+ UINT32_C(0x016E198B), UINT32_C(0x01BF3B79), UINT32_C(0x08EFB3AE),
+ UINT32_C(0x01B11ADD), UINT32_C(0x0428FEBD), UINT32_C(0x0288A4BC),
+ UINT32_C(0x02ED3D8D) } },
+ { { UINT32_C(0x0FE3927A), UINT32_C(0x004463DC), UINT32_C(0x0A23634B),
+ UINT32_C(0x02C96252), UINT32_C(0x088ACC38), UINT32_C(0x003687F2),
+ UINT32_C(0x07070A41), UINT32_C(0x0A3D6F58), UINT32_C(0x02ACC6F9),
+ UINT32_C(0x07A117B7), UINT32_C(0x04BF3041), UINT32_C(0x006C3D57),
+ UINT32_C(0x05E2A443), UINT32_C(0x00D534BB), UINT32_C(0x01838CCA),
+ UINT32_C(0x07E9698D), UINT32_C(0x0463E2DC), UINT32_C(0x05A8243F),
+ UINT32_C(0x02BC2618) },
+ { UINT32_C(0x0EBC6638), UINT32_C(0x04B3F3FB), UINT32_C(0x0A7F699B),
+ UINT32_C(0x070541A8), UINT32_C(0x00275BF7), UINT32_C(0x0335548D),
+ UINT32_C(0x00C681F5), UINT32_C(0x0AE9575E), UINT32_C(0x02032835),
+ UINT32_C(0x027F35BF), UINT32_C(0x00A83998), UINT32_C(0x04869978),
+ UINT32_C(0x04F819CA), UINT32_C(0x075D1DAF), UINT32_C(0x0B79E387),
+ UINT32_C(0x033A57AB), UINT32_C(0x057298F2), UINT32_C(0x0583C4E3),
+ UINT32_C(0x067E752D) } },
+ { { UINT32_C(0x06B4D0F2), UINT32_C(0x059C637E), UINT32_C(0x0515A54F),
+ UINT32_C(0x01CB93DA), UINT32_C(0x0AF87FEF), UINT32_C(0x07247119),
+ UINT32_C(0x0368E1D8), UINT32_C(0x0287508B), UINT32_C(0x04E3B00B),
+ UINT32_C(0x03EDF00C), UINT32_C(0x0060EB2B), UINT32_C(0x009B64B7),
+ UINT32_C(0x0059A064), UINT32_C(0x02C48CC2), UINT32_C(0x0D938166),
+ UINT32_C(0x039A77EF), UINT32_C(0x04F26973), UINT32_C(0x015B1DA7),
+ UINT32_C(0x048D6DB3) },
+ { UINT32_C(0x011EBBDB), UINT32_C(0x06BC0045), UINT32_C(0x0275B56E),
+ UINT32_C(0x03B89420), UINT32_C(0x013420FC), UINT32_C(0x076F18E5),
+ UINT32_C(0x00A74F63), UINT32_C(0x0E0F64B7), UINT32_C(0x00503282),
+ UINT32_C(0x094735D1), UINT32_C(0x013CC6D6), UINT32_C(0x0E5C0E1C),
+ UINT32_C(0x015BA8D6), UINT32_C(0x07D45F0A), UINT32_C(0x0A29FE38),
+ UINT32_C(0x0029F319), UINT32_C(0x03AC2D85), UINT32_C(0x027ECAF3),
+ UINT32_C(0x029D9051) } },
+ { { UINT32_C(0x0EA400A9), UINT32_C(0x0158306B), UINT32_C(0x015222F8),
+ UINT32_C(0x07A029A5), UINT32_C(0x01BD2907), UINT32_C(0x0570C0F6),
+ UINT32_C(0x0751FAE1), UINT32_C(0x07964BF7), UINT32_C(0x009AA3B7),
+ UINT32_C(0x03DF8285), UINT32_C(0x005D2075), UINT32_C(0x0DDBE6E5),
+ UINT32_C(0x04FB407B), UINT32_C(0x05ABE7D8), UINT32_C(0x0C49401A),
+ UINT32_C(0x04BA9696), UINT32_C(0x03CCE450), UINT32_C(0x04636480),
+ UINT32_C(0x03F1ABE9) },
+ { UINT32_C(0x03EA1F68), UINT32_C(0x0676F7FA), UINT32_C(0x078995D6),
+ UINT32_C(0x01690C80), UINT32_C(0x0DDD1529), UINT32_C(0x007F78C9),
+ UINT32_C(0x0408771E), UINT32_C(0x0513A792), UINT32_C(0x003B85AB),
+ UINT32_C(0x016D7EB5), UINT32_C(0x05E5699C), UINT32_C(0x0BECEE12),
+ UINT32_C(0x00107C5D), UINT32_C(0x00E4EB89), UINT32_C(0x02F4C652),
+ UINT32_C(0x04E39F7A), UINT32_C(0x034AED07), UINT32_C(0x0212550E),
+ UINT32_C(0x0188E07E) } },
+ { { UINT32_C(0x0FBBA24C), UINT32_C(0x01E20A63), UINT32_C(0x0FA95AAC),
+ UINT32_C(0x01C44416), UINT32_C(0x0F08DC76), UINT32_C(0x043CBDF1),
+ UINT32_C(0x012ABC29), UINT32_C(0x0F6C4233), UINT32_C(0x06107D90),
+ UINT32_C(0x002CBE36), UINT32_C(0x05234963), UINT32_C(0x059E8B8F),
+ UINT32_C(0x06167695), UINT32_C(0x04B21ABA), UINT32_C(0x094ABDA3),
+ UINT32_C(0x01B5AF79), UINT32_C(0x00351EF1), UINT32_C(0x03FE1EFE),
+ UINT32_C(0x03E83BD1) },
+ { UINT32_C(0x04ADEFE3), UINT32_C(0x028AF72F), UINT32_C(0x09E0C0D6),
+ UINT32_C(0x0104ED8F), UINT32_C(0x0AE0148F), UINT32_C(0x02B05ACD),
+ UINT32_C(0x066B1ED0), UINT32_C(0x0A3C6BFA), UINT32_C(0x032BBFF9),
+ UINT32_C(0x0F66AD88), UINT32_C(0x04A9A376), UINT32_C(0x0AF0D447),
+ UINT32_C(0x047BD087), UINT32_C(0x005F677C), UINT32_C(0x014088B0),
+ UINT32_C(0x00EDD8EE), UINT32_C(0x0598516D), UINT32_C(0x03FE1205),
+ UINT32_C(0x073098DE) } },
+ { { UINT32_C(0x02841A85), UINT32_C(0x0451A0F7), UINT32_C(0x076BCBFC),
+ UINT32_C(0x027E002B), UINT32_C(0x04ACD1B5), UINT32_C(0x03AADBAC),
+ UINT32_C(0x011F71FA), UINT32_C(0x0E1089CF), UINT32_C(0x058740CA),
+ UINT32_C(0x06DB26BB), UINT32_C(0x02494970), UINT32_C(0x07CCD9E0),
+ UINT32_C(0x05749062), UINT32_C(0x061E24EF), UINT32_C(0x0BA44927),
+ UINT32_C(0x01396A99), UINT32_C(0x0C2129A5), UINT32_C(0x06C4E538),
+ UINT32_C(0x02D308F2) },
+ { UINT32_C(0x0E7B0D82), UINT32_C(0x0295DE15), UINT32_C(0x059C10B0),
+ UINT32_C(0x0240D76A), UINT32_C(0x0AA33AC3), UINT32_C(0x02D5D368),
+ UINT32_C(0x05DF8706), UINT32_C(0x0A4B7001), UINT32_C(0x031DBF6C),
+ UINT32_C(0x0BC72CD8), UINT32_C(0x046962A7), UINT32_C(0x0D13BB53),
+ UINT32_C(0x039B98C0), UINT32_C(0x05AA84ED), UINT32_C(0x058D2735),
+ UINT32_C(0x0508AB59), UINT32_C(0x085DF0E3), UINT32_C(0x06AA60D9),
+ UINT32_C(0x0192578B) } },
+ { { UINT32_C(0x052517BF), UINT32_C(0x07C0E587), UINT32_C(0x038A5531),
+ UINT32_C(0x03EE1FF1), UINT32_C(0x062AB6E8), UINT32_C(0x06EF4CCB),
+ UINT32_C(0x00A09F25), UINT32_C(0x0DBE8342), UINT32_C(0x01D7E02F),
+ UINT32_C(0x094C49AE), UINT32_C(0x01445CE4), UINT32_C(0x0F435B7F),
+ UINT32_C(0x07CDF16E), UINT32_C(0x009B8491), UINT32_C(0x0B24E6F7),
+ UINT32_C(0x01648959), UINT32_C(0x00615CA9), UINT32_C(0x014879FC),
+ UINT32_C(0x015CCCCE) },
+ { UINT32_C(0x0BB6E5C0), UINT32_C(0x072270A8), UINT32_C(0x02BC713E),
+ UINT32_C(0x0194AF0E), UINT32_C(0x0745C682), UINT32_C(0x00066C6F),
+ UINT32_C(0x03D36CF5), UINT32_C(0x0593CBB1), UINT32_C(0x05AE790D),
+ UINT32_C(0x06B1FF53), UINT32_C(0x0620A507), UINT32_C(0x0CB462BF),
+ UINT32_C(0x068C215C), UINT32_C(0x06AB108C), UINT32_C(0x0B7E3900),
+ UINT32_C(0x03D88910), UINT32_C(0x0539E087), UINT32_C(0x04AE3141),
+ UINT32_C(0x035ED7D6) } },
+ { { UINT32_C(0x0254F3D7), UINT32_C(0x06792204), UINT32_C(0x0230569F),
+ UINT32_C(0x03D3FDA9), UINT32_C(0x0B84DD99), UINT32_C(0x07725C4C),
+ UINT32_C(0x06B0E7C3), UINT32_C(0x0B78D3DF), UINT32_C(0x078AC360),
+ UINT32_C(0x06CAB919), UINT32_C(0x02F4F70A), UINT32_C(0x013A8BD5),
+ UINT32_C(0x021D73E0), UINT32_C(0x044B1B4D), UINT32_C(0x0E88A7D4),
+ UINT32_C(0x05BAA6EC), UINT32_C(0x0526DE60), UINT32_C(0x01D8806A),
+ UINT32_C(0x04244303) },
+ { UINT32_C(0x0108C612), UINT32_C(0x0395A34F), UINT32_C(0x0339198F),
+ UINT32_C(0x01F179EC), UINT32_C(0x0708D6F3), UINT32_C(0x01DF5235),
+ UINT32_C(0x0232C546), UINT32_C(0x030C41B0), UINT32_C(0x015FE8CF),
+ UINT32_C(0x0F21BBB4), UINT32_C(0x0323FD77), UINT32_C(0x06DD81ED),
+ UINT32_C(0x04136906), UINT32_C(0x054B66A1), UINT32_C(0x0CBBD05A),
+ UINT32_C(0x0336CEE8), UINT32_C(0x0FCF1FFD), UINT32_C(0x041BBD8F),
+ UINT32_C(0x07AB12C9) } },
+ { { UINT32_C(0x0BBE227D), UINT32_C(0x05858F23), UINT32_C(0x04BF491E),
+ UINT32_C(0x05728183), UINT32_C(0x079C714E), UINT32_C(0x022A1FCF),
+ UINT32_C(0x01EF871B), UINT32_C(0x09EDB7B8), UINT32_C(0x01D525A3),
+ UINT32_C(0x0A87DA27), UINT32_C(0x043F0A4E), UINT32_C(0x09B1CDD1),
+ UINT32_C(0x00B92721), UINT32_C(0x00B6CCD6), UINT32_C(0x0D63DB15),
+ UINT32_C(0x023CE576), UINT32_C(0x0C4080E4), UINT32_C(0x033F2061),
+ UINT32_C(0x031AA1D9) },
+ { UINT32_C(0x07EC3A20), UINT32_C(0x01C69A3A), UINT32_C(0x001C25C7),
+ UINT32_C(0x0210B9C8), UINT32_C(0x08BDFFA8), UINT32_C(0x02E8214B),
+ UINT32_C(0x017C3E9B), UINT32_C(0x084D91D9), UINT32_C(0x038B3D24),
+ UINT32_C(0x0EC9081E), UINT32_C(0x026E58E8), UINT32_C(0x032908AE),
+ UINT32_C(0x02B2F37D), UINT32_C(0x058B11CB), UINT32_C(0x07538C24),
+ UINT32_C(0x06945091), UINT32_C(0x0F538568), UINT32_C(0x064897F5),
+ UINT32_C(0x03110AAF) } },
+ { { UINT32_C(0x093E7BB1), UINT32_C(0x026B09F0), UINT32_C(0x0763D63D),
+ UINT32_C(0x01CAD134), UINT32_C(0x053290E7), UINT32_C(0x03190F55),
+ UINT32_C(0x05929346), UINT32_C(0x090E1278), UINT32_C(0x01D360D4),
+ UINT32_C(0x0AE8B6AE), UINT32_C(0x036A79E4), UINT32_C(0x08B891A0),
+ UINT32_C(0x0448F896), UINT32_C(0x02316FA4), UINT32_C(0x0B3F9158),
+ UINT32_C(0x045DAD8C), UINT32_C(0x073BD91F), UINT32_C(0x0407FC71),
+ UINT32_C(0x0403F724) },
+ { UINT32_C(0x0C0213B3), UINT32_C(0x04667E35), UINT32_C(0x0E2CEB9C),
+ UINT32_C(0x064EC72A), UINT32_C(0x0A339F01), UINT32_C(0x01E44700),
+ UINT32_C(0x029951E3), UINT32_C(0x0F9E1903), UINT32_C(0x0760075A),
+ UINT32_C(0x0B3FB167), UINT32_C(0x015349C6), UINT32_C(0x04915326),
+ UINT32_C(0x06972404), UINT32_C(0x03D0B541), UINT32_C(0x0FFB253E),
+ UINT32_C(0x0670C067), UINT32_C(0x017EDCC3), UINT32_C(0x06348A30),
+ UINT32_C(0x0755DC54) } },
+ { { UINT32_C(0x0D72BA02), UINT32_C(0x07FF1EEA), UINT32_C(0x0066BDAD),
+ UINT32_C(0x039D956A), UINT32_C(0x04E892D7), UINT32_C(0x052419F2),
+ UINT32_C(0x034B725A), UINT32_C(0x095A35DA), UINT32_C(0x05559103),
+ UINT32_C(0x018A8F9F), UINT32_C(0x04FC3975), UINT32_C(0x0D1740D2),
+ UINT32_C(0x0375B900), UINT32_C(0x0761403F), UINT32_C(0x0B953A5F),
+ UINT32_C(0x04F2FF71), UINT32_C(0x0E1B0B58), UINT32_C(0x07D8573F),
+ UINT32_C(0x053E8C3E) },
+ { UINT32_C(0x055A3B73), UINT32_C(0x04EBD845), UINT32_C(0x0D3A5D27),
+ UINT32_C(0x03216043), UINT32_C(0x0A2D5A11), UINT32_C(0x03D32430),
+ UINT32_C(0x063F87FD), UINT32_C(0x0DBF84E0), UINT32_C(0x04C9934A),
+ UINT32_C(0x08BE9480), UINT32_C(0x02F6DE30), UINT32_C(0x052DB294),
+ UINT32_C(0x03230313), UINT32_C(0x04592516), UINT32_C(0x0B992B10),
+ UINT32_C(0x03125EE2), UINT32_C(0x0445BCF9), UINT32_C(0x07349143),
+ UINT32_C(0x05A112C7) } },
+ { { UINT32_C(0x0EA0B318), UINT32_C(0x03F1B159), UINT32_C(0x0487E52E),
+ UINT32_C(0x05D27B9C), UINT32_C(0x0EBAD615), UINT32_C(0x0459C5D9),
+ UINT32_C(0x073079D5), UINT32_C(0x078FD2D4), UINT32_C(0x006B7643),
+ UINT32_C(0x0A73DC2C), UINT32_C(0x041938CF), UINT32_C(0x098897E0),
+ UINT32_C(0x07660928), UINT32_C(0x058BF110), UINT32_C(0x0696BC61),
+ UINT32_C(0x07DE18FC), UINT32_C(0x0B815951), UINT32_C(0x04662BC8),
+ UINT32_C(0x054FF046) },
+ { UINT32_C(0x052466CC), UINT32_C(0x02C9E253), UINT32_C(0x07D1C495),
+ UINT32_C(0x024A0473), UINT32_C(0x0E5AEABA), UINT32_C(0x06DFF20F),
+ UINT32_C(0x03CCEFD9), UINT32_C(0x0F806D4B), UINT32_C(0x0192D911),
+ UINT32_C(0x06A7E064), UINT32_C(0x0136BD6C), UINT32_C(0x03CF3E59),
+ UINT32_C(0x036C910C), UINT32_C(0x02852F51), UINT32_C(0x0D2261F6),
+ UINT32_C(0x07B11789), UINT32_C(0x05D5440C), UINT32_C(0x068EB2BF),
+ UINT32_C(0x07C9D3D2) } },
+ { { UINT32_C(0x03F78C83), UINT32_C(0x026282EB), UINT32_C(0x0E7E58C8),
+ UINT32_C(0x01460384), UINT32_C(0x07F8288C), UINT32_C(0x004DDB38),
+ UINT32_C(0x068A22C1), UINT32_C(0x03B4E4B7), UINT32_C(0x046EC7F7),
+ UINT32_C(0x0F499BF8), UINT32_C(0x00E98F9D), UINT32_C(0x0201835A),
+ UINT32_C(0x06CDC18D), UINT32_C(0x054E87E0), UINT32_C(0x09E1190B),
+ UINT32_C(0x07C8570C), UINT32_C(0x0EE788C0), UINT32_C(0x003B8466),
+ UINT32_C(0x0513D8F7) },
+ { UINT32_C(0x082AE76F), UINT32_C(0x0467154F), UINT32_C(0x090D360C),
+ UINT32_C(0x04725E35), UINT32_C(0x077F0A4A), UINT32_C(0x01658344),
+ UINT32_C(0x07BFD41E), UINT32_C(0x0816DFE5), UINT32_C(0x01A64B33),
+ UINT32_C(0x07DEC344), UINT32_C(0x0404AABD), UINT32_C(0x0DD22DB3),
+ UINT32_C(0x0372E5A1), UINT32_C(0x01DD7525), UINT32_C(0x01C8CACD),
+ UINT32_C(0x06A4B923), UINT32_C(0x0CD78815), UINT32_C(0x03B62E43),
+ UINT32_C(0x0182DCE0) } },
+ { { UINT32_C(0x04B1FB35), UINT32_C(0x0061A026), UINT32_C(0x099D37D7),
+ UINT32_C(0x046459E6), UINT32_C(0x0E8A57EF), UINT32_C(0x001BD06E),
+ UINT32_C(0x04A92B84), UINT32_C(0x06098C4C), UINT32_C(0x0358B593),
+ UINT32_C(0x0D4DFE1C), UINT32_C(0x063599D3), UINT32_C(0x02DD18DC),
+ UINT32_C(0x03007901), UINT32_C(0x01E9DD8D), UINT32_C(0x0400CC35),
+ UINT32_C(0x0778E5F5), UINT32_C(0x05D5B6A3), UINT32_C(0x02FD411C),
+ UINT32_C(0x02B425A2) },
+ { UINT32_C(0x03812C10), UINT32_C(0x03B78EFC), UINT32_C(0x09532CE4),
+ UINT32_C(0x04F7D4A9), UINT32_C(0x0F7C04C8), UINT32_C(0x0683AE68),
+ UINT32_C(0x011B6140), UINT32_C(0x0156737D), UINT32_C(0x035A4EB9),
+ UINT32_C(0x0A0B7443), UINT32_C(0x064319EB), UINT32_C(0x0B315217),
+ UINT32_C(0x049C0FB2), UINT32_C(0x004E46BC), UINT32_C(0x0318D072),
+ UINT32_C(0x052D3EA9), UINT32_C(0x06A15FA8), UINT32_C(0x02E0D5AB),
+ UINT32_C(0x008DD356) } },
+ { { UINT32_C(0x0D00894F), UINT32_C(0x0415F67D), UINT32_C(0x0C243D11),
+ UINT32_C(0x02B8C573), UINT32_C(0x05C886B6), UINT32_C(0x073E2A37),
+ UINT32_C(0x01B4E4FA), UINT32_C(0x09A09251), UINT32_C(0x020282E5),
+ UINT32_C(0x0BCA7D2D), UINT32_C(0x066FF292), UINT32_C(0x09926C99),
+ UINT32_C(0x03617A48), UINT32_C(0x01530215), UINT32_C(0x063E7DBA),
+ UINT32_C(0x078B1DFB), UINT32_C(0x0C3844B7), UINT32_C(0x03201272),
+ UINT32_C(0x0778B4FA) },
+ { UINT32_C(0x09305F18), UINT32_C(0x04DACE51), UINT32_C(0x0D07FE4D),
+ UINT32_C(0x04990FE7), UINT32_C(0x07120719), UINT32_C(0x07AE031B),
+ UINT32_C(0x003430FE), UINT32_C(0x00C1FBD4), UINT32_C(0x036A0A51),
+ UINT32_C(0x0A6A12BB), UINT32_C(0x072B00FE), UINT32_C(0x0F112F16),
+ UINT32_C(0x002D898C), UINT32_C(0x00D7F3F0), UINT32_C(0x02CCB574),
+ UINT32_C(0x076345FF), UINT32_C(0x02C9358F), UINT32_C(0x017BCB4B),
+ UINT32_C(0x0579734A) } },
+ },
+ {
+ { { UINT32_C(0x0F0DB502), UINT32_C(0x007283D0), UINT32_C(0x08EF623D),
+ UINT32_C(0x03EA8C5E), UINT32_C(0x0A209E1F), UINT32_C(0x03A40740),
+ UINT32_C(0x02F81888), UINT32_C(0x0722A969), UINT32_C(0x03DCF02A),
+ UINT32_C(0x0B8BF42D), UINT32_C(0x046BF6EC), UINT32_C(0x04E7DE79),
+ UINT32_C(0x032FE5DF), UINT32_C(0x01C17AC3), UINT32_C(0x088F43CD),
+ UINT32_C(0x06D316FF), UINT32_C(0x00B6FB94), UINT32_C(0x03A7A692),
+ UINT32_C(0x03E132AC) },
+ { UINT32_C(0x045CE248), UINT32_C(0x0462F43F), UINT32_C(0x09F103B7),
+ UINT32_C(0x03CE6503), UINT32_C(0x02C55CD7), UINT32_C(0x01FAC8B9),
+ UINT32_C(0x07F7D41F), UINT32_C(0x049B3922), UINT32_C(0x0538164A),
+ UINT32_C(0x0C32168B), UINT32_C(0x021D15D5), UINT32_C(0x0FBE7AB4),
+ UINT32_C(0x049ABD36), UINT32_C(0x06689278), UINT32_C(0x090906E0),
+ UINT32_C(0x02853127), UINT32_C(0x032C40D9), UINT32_C(0x0284E722),
+ UINT32_C(0x05B9DA3D) } },
+ { { UINT32_C(0x08B06389), UINT32_C(0x039D7B29), UINT32_C(0x026E0D8E),
+ UINT32_C(0x038E31F2), UINT32_C(0x0F482001), UINT32_C(0x046C5627),
+ UINT32_C(0x0153F461), UINT32_C(0x0FC4C626), UINT32_C(0x035A22C9),
+ UINT32_C(0x0CB5BCED), UINT32_C(0x032AE85F), UINT32_C(0x097105A2),
+ UINT32_C(0x0661090D), UINT32_C(0x02190C38), UINT32_C(0x05F88BB1),
+ UINT32_C(0x020AFD4B), UINT32_C(0x07693E86), UINT32_C(0x036234B0),
+ UINT32_C(0x0201EE7C) },
+ { UINT32_C(0x05177EBC), UINT32_C(0x07334497), UINT32_C(0x021FB6DB),
+ UINT32_C(0x00E242A1), UINT32_C(0x06ACC48D), UINT32_C(0x0617860E),
+ UINT32_C(0x04002467), UINT32_C(0x006684B4), UINT32_C(0x005E7367),
+ UINT32_C(0x02210321), UINT32_C(0x06AE2E12), UINT32_C(0x0A170483),
+ UINT32_C(0x06811FED), UINT32_C(0x02AF7598), UINT32_C(0x099B28F0),
+ UINT32_C(0x04B2EAC3), UINT32_C(0x03144E87), UINT32_C(0x052C741C),
+ UINT32_C(0x00219EE8) } },
+ { { UINT32_C(0x00581DC0), UINT32_C(0x076911B9), UINT32_C(0x03F907DF),
+ UINT32_C(0x00FD8CCC), UINT32_C(0x0BD0DFDF), UINT32_C(0x0388BBE8),
+ UINT32_C(0x0549C09A), UINT32_C(0x0387AC55), UINT32_C(0x07AF40E6),
+ UINT32_C(0x0981B7A5), UINT32_C(0x05ADE4BE), UINT32_C(0x052D5C55),
+ UINT32_C(0x076A04D2), UINT32_C(0x032751B9), UINT32_C(0x0BCE279F),
+ UINT32_C(0x034D2A39), UINT32_C(0x0AEDCDAE), UINT32_C(0x00365DC7),
+ UINT32_C(0x03453CBF) },
+ { UINT32_C(0x0FAB453E), UINT32_C(0x011CF084), UINT32_C(0x09E21C47),
+ UINT32_C(0x06CF3197), UINT32_C(0x00831296), UINT32_C(0x057F4CE5),
+ UINT32_C(0x020F8EE8), UINT32_C(0x05B31872), UINT32_C(0x0779598D),
+ UINT32_C(0x07C7AC32), UINT32_C(0x05B64DC4), UINT32_C(0x0E058DB2),
+ UINT32_C(0x060142F5), UINT32_C(0x0757FAC8), UINT32_C(0x0320EFE8),
+ UINT32_C(0x03D158EA), UINT32_C(0x025240D2), UINT32_C(0x0116989D),
+ UINT32_C(0x04BFB887) } },
+ { { UINT32_C(0x0DB8A57B), UINT32_C(0x0056DCD3), UINT32_C(0x0355B904),
+ UINT32_C(0x03D5725A), UINT32_C(0x007C7371), UINT32_C(0x00CF4193),
+ UINT32_C(0x020AD78C), UINT32_C(0x0305EFAF), UINT32_C(0x03715E8F),
+ UINT32_C(0x04E06800), UINT32_C(0x0464FE0B), UINT32_C(0x041671C5),
+ UINT32_C(0x07289FAC), UINT32_C(0x045EC338), UINT32_C(0x049BEE4D),
+ UINT32_C(0x06F62A0E), UINT32_C(0x04025E36), UINT32_C(0x05D25CE9),
+ UINT32_C(0x07C568B5) },
+ { UINT32_C(0x0D4BD6B6), UINT32_C(0x00933993), UINT32_C(0x0B7EEBBA),
+ UINT32_C(0x0281309E), UINT32_C(0x065E8268), UINT32_C(0x035579CF),
+ UINT32_C(0x05550C9A), UINT32_C(0x0D7980B4), UINT32_C(0x0531F076),
+ UINT32_C(0x0CD2F37E), UINT32_C(0x03059FC3), UINT32_C(0x00281179),
+ UINT32_C(0x019AAC99), UINT32_C(0x017555A7), UINT32_C(0x0FF849A4),
+ UINT32_C(0x04EE5361), UINT32_C(0x08C87DDE), UINT32_C(0x004920CB),
+ UINT32_C(0x0472AE6B) } },
+ { { UINT32_C(0x05AD0B4E), UINT32_C(0x0000D01D), UINT32_C(0x0A1C822E),
+ UINT32_C(0x004A7A0A), UINT32_C(0x0AA08F1E), UINT32_C(0x05917BCC),
+ UINT32_C(0x073D4A38), UINT32_C(0x06389FF3), UINT32_C(0x047A94F0),
+ UINT32_C(0x06710D9B), UINT32_C(0x0752964E), UINT32_C(0x030EF732),
+ UINT32_C(0x01AE9023), UINT32_C(0x0752E2B4), UINT32_C(0x0343C25C),
+ UINT32_C(0x04C0A3C3), UINT32_C(0x0B4EFABB), UINT32_C(0x079ACB07),
+ UINT32_C(0x05BEE507) },
+ { UINT32_C(0x03494AD9), UINT32_C(0x05EA99AF), UINT32_C(0x0389480B),
+ UINT32_C(0x05160DCE), UINT32_C(0x010C3CBB), UINT32_C(0x04B92C2A),
+ UINT32_C(0x05F2D771), UINT32_C(0x0A57A2FD), UINT32_C(0x007C232D),
+ UINT32_C(0x0ECF6652), UINT32_C(0x06762C3E), UINT32_C(0x0531B5E7),
+ UINT32_C(0x03E82FC8), UINT32_C(0x01820A9D), UINT32_C(0x010298C1),
+ UINT32_C(0x040BB915), UINT32_C(0x06C4DE5F), UINT32_C(0x00F95873),
+ UINT32_C(0x00D564BB) } },
+ { { UINT32_C(0x06647B76), UINT32_C(0x05951386), UINT32_C(0x01C3CEEE),
+ UINT32_C(0x05B4A2A9), UINT32_C(0x00C0D10D), UINT32_C(0x07198ABC),
+ UINT32_C(0x0344EBA4), UINT32_C(0x01102AAD), UINT32_C(0x00A6BD8E),
+ UINT32_C(0x041FD3B9), UINT32_C(0x072FD40E), UINT32_C(0x04DF271A),
+ UINT32_C(0x07951CEE), UINT32_C(0x0434A805), UINT32_C(0x03CBC676),
+ UINT32_C(0x07E6DD9D), UINT32_C(0x037A89AF), UINT32_C(0x01076ABD),
+ UINT32_C(0x00509445) },
+ { UINT32_C(0x0D8A2C33), UINT32_C(0x05E083E6), UINT32_C(0x05C0317D),
+ UINT32_C(0x0602A2EA), UINT32_C(0x00A16254), UINT32_C(0x065050EB),
+ UINT32_C(0x014C68D6), UINT32_C(0x0EA8DF00), UINT32_C(0x002096BA),
+ UINT32_C(0x00D2E7B4), UINT32_C(0x03580F1C), UINT32_C(0x0237FA0E),
+ UINT32_C(0x01C7F56A), UINT32_C(0x054A6A4F), UINT32_C(0x03E879F4),
+ UINT32_C(0x008B47F5), UINT32_C(0x0EDF35FC), UINT32_C(0x01F3F7F0),
+ UINT32_C(0x03E78806) } },
+ { { UINT32_C(0x038F6A40), UINT32_C(0x05B8DCB9), UINT32_C(0x07D27CDC),
+ UINT32_C(0x03392DA1), UINT32_C(0x066611C2), UINT32_C(0x066344AA),
+ UINT32_C(0x05F431C8), UINT32_C(0x07255E87), UINT32_C(0x0135642A),
+ UINT32_C(0x051CFCBA), UINT32_C(0x045D25F5), UINT32_C(0x08BB7E3A),
+ UINT32_C(0x022605AB), UINT32_C(0x00C874AA), UINT32_C(0x0195652F),
+ UINT32_C(0x00E16A23), UINT32_C(0x0D18A297), UINT32_C(0x024B6188),
+ UINT32_C(0x025A9403) },
+ { UINT32_C(0x04F1EAD3), UINT32_C(0x03669651), UINT32_C(0x0E87093B),
+ UINT32_C(0x05F1CF35), UINT32_C(0x019B74E6), UINT32_C(0x0177BF8B),
+ UINT32_C(0x036B76B9), UINT32_C(0x0B817B29), UINT32_C(0x009C77FA),
+ UINT32_C(0x0202860C), UINT32_C(0x01D1AB54), UINT32_C(0x0B180712),
+ UINT32_C(0x06B274AA), UINT32_C(0x0121DBED), UINT32_C(0x0AEA446B),
+ UINT32_C(0x044661E9), UINT32_C(0x0C3EE1D4), UINT32_C(0x045027EE),
+ UINT32_C(0x014C275F) } },
+ { { UINT32_C(0x004023FD), UINT32_C(0x01669241), UINT32_C(0x0693C19B),
+ UINT32_C(0x0058FB3D), UINT32_C(0x0756B182), UINT32_C(0x075D0BEC),
+ UINT32_C(0x07A393EF), UINT32_C(0x0B75B610), UINT32_C(0x07D0B5FD),
+ UINT32_C(0x060DEE19), UINT32_C(0x02373BD5), UINT32_C(0x0A1D84BA),
+ UINT32_C(0x07E8F3AA), UINT32_C(0x01D80791), UINT32_C(0x09D535D0),
+ UINT32_C(0x01AB79C2), UINT32_C(0x0D7911BC), UINT32_C(0x03496555),
+ UINT32_C(0x0370FC52) },
+ { UINT32_C(0x0CA626DD), UINT32_C(0x018A8079), UINT32_C(0x02E35F36),
+ UINT32_C(0x00EF1C67), UINT32_C(0x0942648A), UINT32_C(0x05578B93),
+ UINT32_C(0x07DDB397), UINT32_C(0x095E9BED), UINT32_C(0x07DEB648),
+ UINT32_C(0x020D82EB), UINT32_C(0x02384172), UINT32_C(0x0988C739),
+ UINT32_C(0x035C1ACA), UINT32_C(0x053C61ED), UINT32_C(0x036A12D0),
+ UINT32_C(0x070600B9), UINT32_C(0x05505FED), UINT32_C(0x04D77717),
+ UINT32_C(0x04E32DD7) } },
+ { { UINT32_C(0x0F32AB3F), UINT32_C(0x03271637), UINT32_C(0x01E6E3C1),
+ UINT32_C(0x04B433DF), UINT32_C(0x0313D761), UINT32_C(0x01F05C43),
+ UINT32_C(0x01B6E232), UINT32_C(0x0B782E36), UINT32_C(0x0142A283),
+ UINT32_C(0x06A37377), UINT32_C(0x063B9255), UINT32_C(0x05FF47C8),
+ UINT32_C(0x02270CEE), UINT32_C(0x04B3AC67), UINT32_C(0x07D72B62),
+ UINT32_C(0x006133F9), UINT32_C(0x0BFDFB85), UINT32_C(0x04FE3C0B),
+ UINT32_C(0x0406E239) },
+ { UINT32_C(0x0737D38E), UINT32_C(0x07FBCD12), UINT32_C(0x00F51FBD),
+ UINT32_C(0x02A182A2), UINT32_C(0x062DA827), UINT32_C(0x01D9AB6A),
+ UINT32_C(0x0539AEBA), UINT32_C(0x0AB608B0), UINT32_C(0x0226B3BB),
+ UINT32_C(0x0ED7323F), UINT32_C(0x04ADDB11), UINT32_C(0x05B1E5DF),
+ UINT32_C(0x013ECB65), UINT32_C(0x0282983F), UINT32_C(0x02BDD0BD),
+ UINT32_C(0x07F0D675), UINT32_C(0x0C80C17E), UINT32_C(0x06B40353),
+ UINT32_C(0x01D570D9) } },
+ { { UINT32_C(0x0D4D4113), UINT32_C(0x0371ACBF), UINT32_C(0x076D0600),
+ UINT32_C(0x06867748), UINT32_C(0x0267DC5C), UINT32_C(0x04199EE8),
+ UINT32_C(0x015FF11F), UINT32_C(0x01DBB00A), UINT32_C(0x03C8E489),
+ UINT32_C(0x0218373A), UINT32_C(0x00180AE9), UINT32_C(0x0A2CAFBC),
+ UINT32_C(0x016437D1), UINT32_C(0x058A25D0), UINT32_C(0x0AB57613),
+ UINT32_C(0x07DF8B7E), UINT32_C(0x0985AF6A), UINT32_C(0x04CCAE37),
+ UINT32_C(0x0300D01F) },
+ { UINT32_C(0x092A3113), UINT32_C(0x05B20515), UINT32_C(0x0F0E530A),
+ UINT32_C(0x0605CBBF), UINT32_C(0x05FD19B3), UINT32_C(0x01593B38),
+ UINT32_C(0x003D988A), UINT32_C(0x03D76657), UINT32_C(0x017E79DC),
+ UINT32_C(0x02EC918C), UINT32_C(0x069A3B0F), UINT32_C(0x06FB78CA),
+ UINT32_C(0x07B0B30F), UINT32_C(0x0224A884), UINT32_C(0x0FF6CD50),
+ UINT32_C(0x07D9D639), UINT32_C(0x0D753C54), UINT32_C(0x04ED3D38),
+ UINT32_C(0x01E9C727) } },
+ { { UINT32_C(0x0201CD59), UINT32_C(0x01D5BE35), UINT32_C(0x0B2E0772),
+ UINT32_C(0x04E8E2C3), UINT32_C(0x06C76E20), UINT32_C(0x01464A0E),
+ UINT32_C(0x056C1CE9), UINT32_C(0x04E3B528), UINT32_C(0x037AAFAB),
+ UINT32_C(0x06CE134F), UINT32_C(0x06158AF6), UINT32_C(0x02AF338B),
+ UINT32_C(0x025085B6), UINT32_C(0x07AABBFC), UINT32_C(0x0670F3BE),
+ UINT32_C(0x0108503F), UINT32_C(0x0DC85D51), UINT32_C(0x07F4439A),
+ UINT32_C(0x046E6FC9) },
+ { UINT32_C(0x08FFB263), UINT32_C(0x01FF6045), UINT32_C(0x0C4E1676),
+ UINT32_C(0x038E4F62), UINT32_C(0x06DD24CD), UINT32_C(0x0142D912),
+ UINT32_C(0x015AAC36), UINT32_C(0x0DF58E09), UINT32_C(0x038F3D3B),
+ UINT32_C(0x014D0412), UINT32_C(0x0123F0AF), UINT32_C(0x0021ED27),
+ UINT32_C(0x0004843B), UINT32_C(0x05BF4326), UINT32_C(0x05A672B0),
+ UINT32_C(0x02B6453D), UINT32_C(0x0C7F1450), UINT32_C(0x04A895A4),
+ UINT32_C(0x061C3DF9) } },
+ { { UINT32_C(0x0E593E49), UINT32_C(0x07ABFF21), UINT32_C(0x076E69C7),
+ UINT32_C(0x05C81656), UINT32_C(0x0858D39E), UINT32_C(0x041FC1FA),
+ UINT32_C(0x03599A84), UINT32_C(0x0ECF483C), UINT32_C(0x0190C4E8),
+ UINT32_C(0x08EA24D2), UINT32_C(0x03536BE7), UINT32_C(0x0E3746C4),
+ UINT32_C(0x0632F6BA), UINT32_C(0x05CFBDCC), UINT32_C(0x060097CB),
+ UINT32_C(0x04B0546F), UINT32_C(0x0AB5C45F), UINT32_C(0x04F8975E),
+ UINT32_C(0x04C5D61F) },
+ { UINT32_C(0x062B46F6), UINT32_C(0x07516E20), UINT32_C(0x0C1F955C),
+ UINT32_C(0x001F66A2), UINT32_C(0x0ED0D917), UINT32_C(0x0406AF99),
+ UINT32_C(0x069CF83E), UINT32_C(0x0D4D8A00), UINT32_C(0x03D763C5),
+ UINT32_C(0x0E1FD9A7), UINT32_C(0x0056211F), UINT32_C(0x07531A2F),
+ UINT32_C(0x00973B69), UINT32_C(0x021DCD32), UINT32_C(0x09D0AC99),
+ UINT32_C(0x0549BFEA), UINT32_C(0x0305E319), UINT32_C(0x01342656),
+ UINT32_C(0x001B80FB) } },
+ { { UINT32_C(0x031FFCBB), UINT32_C(0x06BC2475), UINT32_C(0x090EA8B2),
+ UINT32_C(0x0716EDFB), UINT32_C(0x0418E2AE), UINT32_C(0x0381C978),
+ UINT32_C(0x05591029), UINT32_C(0x09BD26C6), UINT32_C(0x0460D4D5),
+ UINT32_C(0x07DAA20D), UINT32_C(0x01560E68), UINT32_C(0x04AAAB23),
+ UINT32_C(0x01EA985C), UINT32_C(0x0631896F), UINT32_C(0x0FD13830),
+ UINT32_C(0x0416257F), UINT32_C(0x069B78E7), UINT32_C(0x0016004F),
+ UINT32_C(0x07B5E05F) },
+ { UINT32_C(0x0749B010), UINT32_C(0x0716A42F), UINT32_C(0x0DEDE224),
+ UINT32_C(0x06E403DB), UINT32_C(0x01FC6739), UINT32_C(0x07F5928B),
+ UINT32_C(0x04FF09AE), UINT32_C(0x096D2235), UINT32_C(0x032412BF),
+ UINT32_C(0x0635ABB1), UINT32_C(0x0480F063), UINT32_C(0x0BA557CC),
+ UINT32_C(0x05C0FEF3), UINT32_C(0x01C7CB5C), UINT32_C(0x09482C2A),
+ UINT32_C(0x003CF65B), UINT32_C(0x0F39C07C), UINT32_C(0x00902580),
+ UINT32_C(0x053F7D95) } },
+ { { UINT32_C(0x00C6A752), UINT32_C(0x0600187B), UINT32_C(0x031FD29E),
+ UINT32_C(0x07202D01), UINT32_C(0x08706FD9), UINT32_C(0x003A8DA7),
+ UINT32_C(0x02BC4807), UINT32_C(0x0108B8E2), UINT32_C(0x03DCB4C3),
+ UINT32_C(0x00E5D109), UINT32_C(0x0133EBE8), UINT32_C(0x0DBC9FDB),
+ UINT32_C(0x037A84B4), UINT32_C(0x000D902A), UINT32_C(0x0B159D44),
+ UINT32_C(0x0385B949), UINT32_C(0x0BB24FD6), UINT32_C(0x05FFC44B),
+ UINT32_C(0x0402B0EA) },
+ { UINT32_C(0x0AFA8C2B), UINT32_C(0x03A224AC), UINT32_C(0x08FD7C67),
+ UINT32_C(0x072E1371), UINT32_C(0x01FA5FB1), UINT32_C(0x060D59B5),
+ UINT32_C(0x004D1058), UINT32_C(0x0193E727), UINT32_C(0x0093B083),
+ UINT32_C(0x0ABA0999), UINT32_C(0x07F25ECC), UINT32_C(0x0E8D4648),
+ UINT32_C(0x045B908B), UINT32_C(0x02C916E0), UINT32_C(0x052F14F8),
+ UINT32_C(0x00430404), UINT32_C(0x0B8E9A2B), UINT32_C(0x00F4BF45),
+ UINT32_C(0x03F0A1D1) } },
+ { { UINT32_C(0x0CEE5802), UINT32_C(0x00880798), UINT32_C(0x01C63FFC),
+ UINT32_C(0x071B8526), UINT32_C(0x0C1068FB), UINT32_C(0x052F9DB3),
+ UINT32_C(0x01DDC849), UINT32_C(0x0E84AF14), UINT32_C(0x06CD446D),
+ UINT32_C(0x0A9F92C6), UINT32_C(0x01676037), UINT32_C(0x02A0264C),
+ UINT32_C(0x0467C53C), UINT32_C(0x051C4EE1), UINT32_C(0x01F47FF0),
+ UINT32_C(0x022246B4), UINT32_C(0x07D42402), UINT32_C(0x0287119F),
+ UINT32_C(0x04434D4E) },
+ { UINT32_C(0x018DA0C0), UINT32_C(0x042E86EE), UINT32_C(0x08509770),
+ UINT32_C(0x04EDAEB9), UINT32_C(0x0A4009B5), UINT32_C(0x0335CB55),
+ UINT32_C(0x064D21EC), UINT32_C(0x0647F463), UINT32_C(0x07A167F4),
+ UINT32_C(0x023FB0E4), UINT32_C(0x062A970D), UINT32_C(0x00205267),
+ UINT32_C(0x036D3513), UINT32_C(0x07ABD182), UINT32_C(0x0B51FDBA),
+ UINT32_C(0x077B5CD0), UINT32_C(0x0896BFE4), UINT32_C(0x0300338E),
+ UINT32_C(0x06FF9581) } },
+ { { UINT32_C(0x054184BF), UINT32_C(0x02DCF217), UINT32_C(0x0880D0D9),
+ UINT32_C(0x019760C7), UINT32_C(0x0662BD25), UINT32_C(0x06A962DD),
+ UINT32_C(0x04C69173), UINT32_C(0x019D4A19), UINT32_C(0x05AD5A5F),
+ UINT32_C(0x0E23BF0B), UINT32_C(0x07D3C575), UINT32_C(0x0BCDA9CF),
+ UINT32_C(0x019497F7), UINT32_C(0x01914517), UINT32_C(0x027F0C56),
+ UINT32_C(0x048ED5F5), UINT32_C(0x078B0933), UINT32_C(0x01A7EB30),
+ UINT32_C(0x066D17B3) },
+ { UINT32_C(0x00A95EDC), UINT32_C(0x0386D25E), UINT32_C(0x039DE915),
+ UINT32_C(0x076A16CE), UINT32_C(0x05DCE4A7), UINT32_C(0x07C40607),
+ UINT32_C(0x06F1B7C2), UINT32_C(0x0A817858), UINT32_C(0x0147CB22),
+ UINT32_C(0x0D109609), UINT32_C(0x0454D2C5), UINT32_C(0x0D788CF4),
+ UINT32_C(0x03DCA054), UINT32_C(0x02A7B716), UINT32_C(0x05C66166),
+ UINT32_C(0x01AC2B32), UINT32_C(0x0D0C246B), UINT32_C(0x02E38AD2),
+ UINT32_C(0x039CDC10) } },
+ }
+};
+
+/*-
+ * Finite field inversion.
+ * Computed with exponentiation via FLT.
+ * Autogenerated: ecp/secp521r1/fe_inv.op3
+ * custom repunit addition chain
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp521r1_inv(fe_t output, const fe_t t1)
+{
+ int i;
+ /* temporary variables */
+ fe_t acc, t128, t16, t2, t256, t32, t4, t512, t516, t518, t519, t64, t8;
+
+ fiat_secp521r1_carry_square(acc, t1);
+ fiat_secp521r1_carry_mul(t2, acc, t1);
+ fiat_secp521r1_carry_square(acc, t2);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t4, acc, t2);
+ fiat_secp521r1_carry_square(acc, t4);
+ for (i = 0; i < 3; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t8, acc, t4);
+ fiat_secp521r1_carry_square(acc, t8);
+ for (i = 0; i < 7; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t16, acc, t8);
+ fiat_secp521r1_carry_square(acc, t16);
+ for (i = 0; i < 15; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t32, acc, t16);
+ fiat_secp521r1_carry_square(acc, t32);
+ for (i = 0; i < 31; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t64, acc, t32);
+ fiat_secp521r1_carry_square(acc, t64);
+ for (i = 0; i < 63; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t128, acc, t64);
+ fiat_secp521r1_carry_square(acc, t128);
+ for (i = 0; i < 127; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t256, acc, t128);
+ fiat_secp521r1_carry_square(acc, t256);
+ for (i = 0; i < 255; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t512, acc, t256);
+ fiat_secp521r1_carry_square(acc, t512);
+ for (i = 0; i < 3; i++)
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t516, acc, t4);
+ fiat_secp521r1_carry_square(acc, t516);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(t518, acc, t2);
+ fiat_secp521r1_carry_square(acc, t518);
+ fiat_secp521r1_carry_mul(t519, acc, t1);
+ fiat_secp521r1_carry_square(acc, t519);
+ fiat_secp521r1_carry_square(acc, acc);
+ fiat_secp521r1_carry_mul(output, acc, t1);
+}
+
+/*-
+ * Q := 2P, both projective, Q and P same pointers OK
+ * Autogenerated: op3/dbl_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 6
+ * ASSERT: a = -3
+ */
+static void
+point_double(pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X = P->X;
+ const limb_t *Y = P->Y;
+ const limb_t *Z = P->Z;
+ limb_t *X3 = Q->X;
+ limb_t *Y3 = Q->Y;
+ limb_t *Z3 = Q->Z;
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_square(t0, X);
+ fiat_secp521r1_carry_square(t1, Y);
+ fiat_secp521r1_carry_square(t2, Z);
+ fiat_secp521r1_carry_mul(t3, X, Y);
+ fiat_secp521r1_carry_add(t3, t3, t3);
+ fiat_secp521r1_carry_mul(t4, Y, Z);
+ fiat_secp521r1_carry_mul(Z3, X, Z);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+ fiat_secp521r1_carry_mul(Y3, b, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, Z3);
+ fiat_secp521r1_carry_add(X3, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, X3, Y3);
+ fiat_secp521r1_carry_sub(X3, t1, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Y3);
+ fiat_secp521r1_carry_mul(X3, X3, t3);
+ fiat_secp521r1_carry_add(t3, t2, t2);
+ fiat_secp521r1_carry_add(t2, t2, t3);
+ fiat_secp521r1_carry_mul(Z3, b, Z3);
+ fiat_secp521r1_carry_sub(Z3, Z3, t2);
+ fiat_secp521r1_carry_sub(Z3, Z3, t0);
+ fiat_secp521r1_carry_add(t3, Z3, Z3);
+ fiat_secp521r1_carry_add(Z3, Z3, t3);
+ fiat_secp521r1_carry_add(t3, t0, t0);
+ fiat_secp521r1_carry_add(t0, t3, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t0, t0, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t0, t4, t4);
+ fiat_secp521r1_carry_mul(Z3, t0, Z3);
+ fiat_secp521r1_carry_sub(X3, X3, Z3);
+ fiat_secp521r1_carry_mul(Z3, t0, t1);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+ fiat_secp521r1_carry_add(Z3, Z3, Z3);
+}
+
+/*-
+ * out1 = (arg1 == 0) ? 0 : nz
+ * NB: this is not a "mod p equiv" 0, but literal 0
+ * NB: this is not a real fiat-crypto function, just named that way for consistency.
+ */
+static void
+fiat_secp521r1_nonzero(limb_t *out1, const fe_t arg1)
+{
+ limb_t x1 = 0;
+ int i;
+
+ for (i = 0; i < LIMB_CNT; i++)
+ x1 |= arg1[i];
+ *out1 = x1;
+}
+
+/*-
+ * R := Q + P where R and Q are projective, P affine.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_mixed.op3
+ * https://eprint.iacr.org/2015/1060 Alg 5
+ * ASSERT: a = -3
+ */
+static void
+point_add_mixed(pt_prj_t *R, const pt_prj_t *Q, const pt_aff_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ fe_t X3;
+ fe_t Y3;
+ fe_t Z3;
+ limb_t nz;
+
+ /* check P for affine inf */
+ fiat_secp521r1_nonzero(&nz, P->Y);
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_mul(t0, X1, X2);
+ fiat_secp521r1_carry_mul(t1, Y1, Y2);
+ fiat_secp521r1_carry_add(t3, X2, Y2);
+ fiat_secp521r1_carry_add(t4, X1, Y1);
+ fiat_secp521r1_carry_mul(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, t0, t1);
+ fiat_secp521r1_carry_sub(t3, t3, t4);
+ fiat_secp521r1_carry_mul(t4, Y2, Z1);
+ fiat_secp521r1_carry_add(t4, t4, Y1);
+ fiat_secp521r1_carry_mul(Y3, X2, Z1);
+ fiat_secp521r1_carry_add(Y3, Y3, X1);
+ fiat_secp521r1_carry_mul(Z3, b, Z1);
+ fiat_secp521r1_carry_sub(X3, Y3, Z3);
+ fiat_secp521r1_carry_add(Z3, X3, X3);
+ fiat_secp521r1_carry_add(X3, X3, Z3);
+ fiat_secp521r1_carry_sub(Z3, t1, X3);
+ fiat_secp521r1_carry_add(X3, t1, X3);
+ fiat_secp521r1_carry_mul(Y3, b, Y3);
+ fiat_secp521r1_carry_add(t1, Z1, Z1);
+ fiat_secp521r1_carry_add(t2, t1, Z1);
+ fiat_secp521r1_carry_sub(Y3, Y3, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t1, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_add(t1, t0, t0);
+ fiat_secp521r1_carry_add(t0, t1, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t1, t4, Y3);
+ fiat_secp521r1_carry_mul(t2, t0, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t2);
+ fiat_secp521r1_carry_mul(X3, t3, X3);
+ fiat_secp521r1_carry_sub(X3, X3, t1);
+ fiat_secp521r1_carry_mul(Z3, t4, Z3);
+ fiat_secp521r1_carry_mul(t1, t3, t0);
+ fiat_secp521r1_carry_add(Z3, Z3, t1);
+
+ /* if P is inf, throw all that away and take Q */
+ fiat_secp521r1_selectznz(R->X, nz, Q->X, X3);
+ fiat_secp521r1_selectznz(R->Y, nz, Q->Y, Y3);
+ fiat_secp521r1_selectznz(R->Z, nz, Q->Z, Z3);
+}
+
+/*-
+ * R := Q + P all projective.
+ * R and Q same pointers OK
+ * R and P same pointers not OK
+ * Autogenerated: op3/add_proj.op3
+ * https://eprint.iacr.org/2015/1060 Alg 4
+ * ASSERT: a = -3
+ */
+static void
+point_add_proj(pt_prj_t *R, const pt_prj_t *Q, const pt_prj_t *P)
+{
+ /* temporary variables */
+ fe_t t0, t1, t2, t3, t4, t5;
+ /* constants */
+ const limb_t *b = const_b;
+ /* set pointers for legacy curve arith */
+ const limb_t *X1 = Q->X;
+ const limb_t *Y1 = Q->Y;
+ const limb_t *Z1 = Q->Z;
+ const limb_t *X2 = P->X;
+ const limb_t *Y2 = P->Y;
+ const limb_t *Z2 = P->Z;
+ limb_t *X3 = R->X;
+ limb_t *Y3 = R->Y;
+ limb_t *Z3 = R->Z;
+
+ /* the curve arith formula */
+ fiat_secp521r1_carry_mul(t0, X1, X2);
+ fiat_secp521r1_carry_mul(t1, Y1, Y2);
+ fiat_secp521r1_carry_mul(t2, Z1, Z2);
+ fiat_secp521r1_carry_add(t3, X1, Y1);
+ fiat_secp521r1_carry_add(t4, X2, Y2);
+ fiat_secp521r1_carry_mul(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, t0, t1);
+ fiat_secp521r1_carry_sub(t3, t3, t4);
+ fiat_secp521r1_carry_add(t4, Y1, Z1);
+ fiat_secp521r1_carry_add(t5, Y2, Z2);
+ fiat_secp521r1_carry_mul(t4, t4, t5);
+ fiat_secp521r1_carry_add(t5, t1, t2);
+ fiat_secp521r1_carry_sub(t4, t4, t5);
+ fiat_secp521r1_carry_add(X3, X1, Z1);
+ fiat_secp521r1_carry_add(Y3, X2, Z2);
+ fiat_secp521r1_carry_mul(X3, X3, Y3);
+ fiat_secp521r1_carry_add(Y3, t0, t2);
+ fiat_secp521r1_carry_sub(Y3, X3, Y3);
+ fiat_secp521r1_carry_mul(Z3, b, t2);
+ fiat_secp521r1_carry_sub(X3, Y3, Z3);
+ fiat_secp521r1_carry_add(Z3, X3, X3);
+ fiat_secp521r1_carry_add(X3, X3, Z3);
+ fiat_secp521r1_carry_sub(Z3, t1, X3);
+ fiat_secp521r1_carry_add(X3, t1, X3);
+ fiat_secp521r1_carry_mul(Y3, b, Y3);
+ fiat_secp521r1_carry_add(t1, t2, t2);
+ fiat_secp521r1_carry_add(t2, t1, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t2);
+ fiat_secp521r1_carry_sub(Y3, Y3, t0);
+ fiat_secp521r1_carry_add(t1, Y3, Y3);
+ fiat_secp521r1_carry_add(Y3, t1, Y3);
+ fiat_secp521r1_carry_add(t1, t0, t0);
+ fiat_secp521r1_carry_add(t0, t1, t0);
+ fiat_secp521r1_carry_sub(t0, t0, t2);
+ fiat_secp521r1_carry_mul(t1, t4, Y3);
+ fiat_secp521r1_carry_mul(t2, t0, Y3);
+ fiat_secp521r1_carry_mul(Y3, X3, Z3);
+ fiat_secp521r1_carry_add(Y3, Y3, t2);
+ fiat_secp521r1_carry_mul(X3, t3, X3);
+ fiat_secp521r1_carry_sub(X3, X3, t1);
+ fiat_secp521r1_carry_mul(Z3, t4, Z3);
+ fiat_secp521r1_carry_mul(t1, t3, t0);
+ fiat_secp521r1_carry_add(Z3, Z3, t1);
+}
+
+/* constants */
+#define RADIX 5
+#define DRADIX (1 << RADIX)
+#define DRADIX_WNAF ((DRADIX) << 1)
+
+/*-
+ * precomp for wnaf scalar multiplication:
+ * precomp[0] = 1P
+ * precomp[1] = 3P
+ * precomp[2] = 5P
+ * precomp[3] = 7P
+ * precomp[4] = 9P
+ * ...
+ */
+static void
+precomp_wnaf(pt_prj_t precomp[DRADIX / 2], const pt_aff_t *P)
+{
+ int i;
+
+ fe_copy(precomp[0].X, P->X);
+ fe_copy(precomp[0].Y, P->Y);
+ fe_copy(precomp[0].Z, const_one);
+ point_double(&precomp[DRADIX / 2 - 1], &precomp[0]);
+
+ for (i = 1; i < DRADIX / 2; i++)
+ point_add_proj(&precomp[i], &precomp[DRADIX / 2 - 1], &precomp[i - 1]);
+}
+
+/* fetch a scalar bit */
+static int
+scalar_get_bit(const unsigned char in[66], int idx)
+{
+ int widx, rshift;
+
+ widx = idx >> 3;
+ rshift = idx & 0x7;
+
+ if (idx < 0 || widx >= 66)
+ return 0;
+
+ return (in[widx] >> rshift) & 0x1;
+}
+
+/*-
+ * Compute "regular" wnaf representation of a scalar.
+ * See "Exponent Recoding and Regular Exponentiation Algorithms",
+ * Tunstall et al., AfricaCrypt 2009, Alg 6.
+ * It forces an odd scalar and outputs digits in
+ * {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...}
+ * i.e. signed odd digits with _no zeroes_ -- that makes it "regular".
+ */
+static void
+scalar_rwnaf(int8_t out[106], const unsigned char in[66])
+{
+ int i;
+ int8_t window, d;
+
+ window = (in[0] & (DRADIX_WNAF - 1)) | 1;
+ for (i = 0; i < 105; i++) {
+ d = (window & (DRADIX_WNAF - 1)) - DRADIX;
+ out[i] = d;
+ window = (window - d) >> RADIX;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 1) << 1;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 2) << 2;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 3) << 3;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 4) << 4;
+ window += scalar_get_bit(in, (i + 1) * RADIX + 5) << 5;
+ }
+ out[i] = window;
+}
+
+/*-
+ * Compute "textbook" wnaf representation of a scalar.
+ * NB: not constant time
+ */
+static void
+scalar_wnaf(int8_t out[529], const unsigned char in[66])
+{
+ int i;
+ int8_t window, d;
+
+ window = in[0] & (DRADIX_WNAF - 1);
+ for (i = 0; i < 529; i++) {
+ d = 0;
+ if ((window & 1) && ((d = window & (DRADIX_WNAF - 1)) & DRADIX))
+ d -= DRADIX_WNAF;
+ out[i] = d;
+ window = (window - d) >> 1;
+ window += scalar_get_bit(in, i + 1 + RADIX) << RADIX;
+ }
+}
+
+/*-
+ * Simultaneous scalar multiplication: interleaved "textbook" wnaf.
+ * NB: not constant time
+ */
+static void
+var_smul_wnaf_two(pt_aff_t *out, const unsigned char a[66],
+ const unsigned char b[66], const pt_aff_t *P)
+{
+ int i, d, is_neg, is_inf = 1, flipped = 0;
+ int8_t anaf[529] = { 0 };
+ int8_t bnaf[529] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_wnaf(anaf, a);
+ scalar_wnaf(bnaf, b);
+
+ for (i = 528; i >= 0; i--) {
+ if (!is_inf)
+ point_double(&Q, &Q);
+ if ((d = bnaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &precomp[d].X);
+ fe_copy(Q.Y, &precomp[d].Y);
+ fe_copy(Q.Z, &precomp[d].Z);
+ is_inf = 0;
+ } else
+ point_add_proj(&Q, &Q, &precomp[d]);
+ }
+ if ((d = anaf[i])) {
+ if ((is_neg = d < 0) != flipped) {
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ flipped ^= 1;
+ }
+ d = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+ if (is_inf) {
+ /* initialize accumulator */
+ fe_copy(Q.X, &lut_cmb[0][d].X);
+ fe_copy(Q.Y, &lut_cmb[0][d].Y);
+ fe_copy(Q.Z, const_one);
+ is_inf = 0;
+ } else
+ point_add_mixed(&Q, &Q, &lut_cmb[0][d]);
+ }
+ }
+
+ if (is_inf) {
+ /* initialize accumulator to inf: all-zero scalars */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+ }
+
+ if (flipped) {
+ /* correct sign */
+ fiat_secp521r1_carry_opp(Q.Y, Q.Y);
+ }
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Variable point scalar multiplication with "regular" wnaf.
+ * Here "regular" means _no zeroes_, so the sequence of
+ * EC arithmetic ops is fixed.
+ */
+static void
+var_smul_rwnaf(pt_aff_t *out, const unsigned char scalar[66],
+ const pt_aff_t *P)
+{
+ int i, j, d, diff, is_neg;
+ int8_t rnaf[106] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, lut = { { 0 }, { 0 }, { 0 } };
+ pt_prj_t precomp[DRADIX / 2];
+
+ precomp_wnaf(precomp, P);
+ scalar_rwnaf(rnaf, scalar);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ /* initialize accumulator to high digit */
+ d = (rnaf[105] - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(Q.X, diff, Q.X, precomp[j].X);
+ fiat_secp521r1_selectznz(Q.Y, diff, Q.Y, precomp[j].Y);
+ fiat_secp521r1_selectznz(Q.Z, diff, Q.Z, precomp[j].Z);
+ }
+
+ for (i = 104; i >= 0; i--) {
+ for (j = 0; j < RADIX; j++)
+ point_double(&Q, &Q);
+ d = rnaf[i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (j = 0; j < DRADIX / 2; j++) {
+ diff = (1 - (-(d ^ j) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(lut.X, diff, lut.X, precomp[j].X);
+ fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, precomp[j].Y);
+ fiat_secp521r1_selectznz(lut.Z, diff, lut.Z, precomp[j].Z);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp521r1_carry_opp(out->Y, lut.Y);
+ fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_proj(&Q, &Q, &lut);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, precomp[0].X);
+ fiat_secp521r1_carry_opp(lut.Y, precomp[0].Y);
+ fe_copy(lut.Z, precomp[0].Z);
+ point_add_proj(&lut, &lut, &Q);
+ fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, lut.X, Q.X);
+ fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, lut.Y, Q.Y);
+ fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, lut.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Fixed scalar multiplication: comb with interleaving.
+ */
+static void
+fixed_smul_cmb(pt_aff_t *out, const unsigned char scalar[66])
+{
+ int i, j, k, d, diff, is_neg = 0;
+ int8_t rnaf[106] = { 0 };
+ pt_prj_t Q = { { 0 }, { 0 }, { 0 } }, R = { { 0 }, { 0 }, { 0 } };
+ pt_aff_t lut = { { 0 }, { 0 } };
+
+ scalar_rwnaf(rnaf, scalar);
+
+ /* initalize accumulator to inf */
+ fe_set_zero(Q.X);
+ fe_copy(Q.Y, const_one);
+ fe_set_zero(Q.Z);
+
+#if defined(_MSC_VER)
+ /* result still unsigned: yes we know */
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+
+ for (i = 8; i >= 0; i--) {
+ for (j = 0; i != 8 && j < RADIX; j++)
+ point_double(&Q, &Q);
+ for (j = 0; j < 13; j++) {
+ if (j * 9 + i > 105)
+ continue;
+ d = rnaf[j * 9 + i];
+ /* is_neg = (d < 0) ? 1 : 0 */
+ is_neg = (d >> (8 * sizeof(int) - 1)) & 1;
+ /* d = abs(d) */
+ d = (d ^ -is_neg) + is_neg;
+ d = (d - 1) >> 1;
+ for (k = 0; k < DRADIX / 2; k++) {
+ diff = (1 - (-(d ^ k) >> (8 * sizeof(int) - 1))) & 1;
+ fiat_secp521r1_selectznz(lut.X, diff, lut.X, lut_cmb[j][k].X);
+ fiat_secp521r1_selectznz(lut.Y, diff, lut.Y, lut_cmb[j][k].Y);
+ }
+ /* negate lut point if digit is negative */
+ fiat_secp521r1_carry_opp(out->Y, lut.Y);
+ fiat_secp521r1_selectznz(lut.Y, is_neg, lut.Y, out->Y);
+ point_add_mixed(&Q, &Q, &lut);
+ }
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* conditionally subtract P if the scalar was even */
+ fe_copy(lut.X, lut_cmb[0][0].X);
+ fiat_secp521r1_carry_opp(lut.Y, lut_cmb[0][0].Y);
+ point_add_mixed(&R, &Q, &lut);
+ fiat_secp521r1_selectznz(Q.X, scalar[0] & 1, R.X, Q.X);
+ fiat_secp521r1_selectznz(Q.Y, scalar[0] & 1, R.Y, Q.Y);
+ fiat_secp521r1_selectznz(Q.Z, scalar[0] & 1, R.Z, Q.Z);
+
+ /* convert to affine -- NB depends on coordinate system */
+ fiat_secp521r1_inv(Q.Z, Q.Z);
+ fiat_secp521r1_carry_mul(out->X, Q.X, Q.Z);
+ fiat_secp521r1_carry_mul(out->Y, Q.Y, Q.Z);
+}
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char a[66],
+ const unsigned char b[66],
+ const unsigned char inx[66],
+ const unsigned char iny[66])
+{
+ pt_aff_t P;
+
+ fiat_secp521r1_from_bytes(P.X, inx);
+ fiat_secp521r1_from_bytes(P.Y, iny);
+ /* simultaneous scalar multiplication */
+ var_smul_wnaf_two(&P, a, b, &P);
+
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char scalar[66])
+{
+ pt_aff_t P;
+
+ /* fixed scmul function */
+ fixed_smul_cmb(&P, scalar);
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void
+point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char scalar[66],
+ const unsigned char inx[66],
+ const unsigned char iny[66])
+{
+ pt_aff_t P;
+
+ fiat_secp521r1_from_bytes(P.X, inx);
+ fiat_secp521r1_from_bytes(P.Y, iny);
+ /* var scmul function */
+ var_smul_rwnaf(&P, scalar, &P);
+ fiat_secp521r1_to_bytes(outx, P.X);
+ fiat_secp521r1_to_bytes(outy, P.Y);
+}
+
+#endif /* __SIZEOF_INT128__ */
diff --git a/security/nss/lib/freebl/ecl/ecp_secp521r1.h b/security/nss/lib/freebl/ecl/ecp_secp521r1.h
new file mode 100644
index 0000000000..317a201845
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp521r1.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecp_secp521r1_h_
+#define __ecp_secp521r1_h_
+
+/*-
+ * Wrapper: simultaneous scalar mutiplication.
+ * outx, outy := a * G + b * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void point_mul_two_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char a[66], const unsigned char b[66], const unsigned char inx[66], const unsigned char iny[66]);
+
+/*-
+ * Wrapper: fixed scalar mutiplication.
+ * outx, outy := scalar * G
+ * Everything is LE byte ordering.
+ */
+void point_mul_g_secp521r1(unsigned char outx[66], unsigned char outy[66],
+ const unsigned char scalar[66]);
+
+/*-
+ * Wrapper: variable point scalar mutiplication.
+ * outx, outy := scalar * P
+ * where P = (inx, iny).
+ * Everything is LE byte ordering.
+ */
+void point_mul_secp521r1(unsigned char outx[66], unsigned char outy[66], const unsigned char scalar[66], const unsigned char inx[66], const unsigned char iny[66]);
+
+#endif
diff --git a/security/nss/lib/freebl/ecl/ecp_secp521r1_wrap.c b/security/nss/lib/freebl/ecl/ecp_secp521r1_wrap.c
new file mode 100644
index 0000000000..b767624966
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_secp521r1_wrap.c
@@ -0,0 +1,255 @@
+/*-
+ * MIT License
+ * -
+ * Copyright (c) 2020 Luis Rivera-Zamarripa, Jesús-Javier Chi-Domínguez, Billy Bob Brumley
+ * -
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * -
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * -
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#undef RADIX
+#include "ecp.h"
+#include "ecp_secp521r1.h"
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/*-
+ * reverse bytes -- total hack
+ */
+#define MP_BE2LE(a) \
+ do { \
+ unsigned char z_bswap; \
+ z_bswap = a[0]; \
+ a[0] = a[65]; \
+ a[65] = z_bswap; \
+ z_bswap = a[1]; \
+ a[1] = a[64]; \
+ a[64] = z_bswap; \
+ z_bswap = a[2]; \
+ a[2] = a[63]; \
+ a[63] = z_bswap; \
+ z_bswap = a[3]; \
+ a[3] = a[62]; \
+ a[62] = z_bswap; \
+ z_bswap = a[4]; \
+ a[4] = a[61]; \
+ a[61] = z_bswap; \
+ z_bswap = a[5]; \
+ a[5] = a[60]; \
+ a[60] = z_bswap; \
+ z_bswap = a[6]; \
+ a[6] = a[59]; \
+ a[59] = z_bswap; \
+ z_bswap = a[7]; \
+ a[7] = a[58]; \
+ a[58] = z_bswap; \
+ z_bswap = a[8]; \
+ a[8] = a[57]; \
+ a[57] = z_bswap; \
+ z_bswap = a[9]; \
+ a[9] = a[56]; \
+ a[56] = z_bswap; \
+ z_bswap = a[10]; \
+ a[10] = a[55]; \
+ a[55] = z_bswap; \
+ z_bswap = a[11]; \
+ a[11] = a[54]; \
+ a[54] = z_bswap; \
+ z_bswap = a[12]; \
+ a[12] = a[53]; \
+ a[53] = z_bswap; \
+ z_bswap = a[13]; \
+ a[13] = a[52]; \
+ a[52] = z_bswap; \
+ z_bswap = a[14]; \
+ a[14] = a[51]; \
+ a[51] = z_bswap; \
+ z_bswap = a[15]; \
+ a[15] = a[50]; \
+ a[50] = z_bswap; \
+ z_bswap = a[16]; \
+ a[16] = a[49]; \
+ a[49] = z_bswap; \
+ z_bswap = a[17]; \
+ a[17] = a[48]; \
+ a[48] = z_bswap; \
+ z_bswap = a[18]; \
+ a[18] = a[47]; \
+ a[47] = z_bswap; \
+ z_bswap = a[19]; \
+ a[19] = a[46]; \
+ a[46] = z_bswap; \
+ z_bswap = a[20]; \
+ a[20] = a[45]; \
+ a[45] = z_bswap; \
+ z_bswap = a[21]; \
+ a[21] = a[44]; \
+ a[44] = z_bswap; \
+ z_bswap = a[22]; \
+ a[22] = a[43]; \
+ a[43] = z_bswap; \
+ z_bswap = a[23]; \
+ a[23] = a[42]; \
+ a[42] = z_bswap; \
+ z_bswap = a[24]; \
+ a[24] = a[41]; \
+ a[41] = z_bswap; \
+ z_bswap = a[25]; \
+ a[25] = a[40]; \
+ a[40] = z_bswap; \
+ z_bswap = a[26]; \
+ a[26] = a[39]; \
+ a[39] = z_bswap; \
+ z_bswap = a[27]; \
+ a[27] = a[38]; \
+ a[38] = z_bswap; \
+ z_bswap = a[28]; \
+ a[28] = a[37]; \
+ a[37] = z_bswap; \
+ z_bswap = a[29]; \
+ a[29] = a[36]; \
+ a[36] = z_bswap; \
+ z_bswap = a[30]; \
+ a[30] = a[35]; \
+ a[35] = z_bswap; \
+ z_bswap = a[31]; \
+ a[31] = a[34]; \
+ a[34] = z_bswap; \
+ z_bswap = a[32]; \
+ a[32] = a[33]; \
+ a[33] = z_bswap; \
+ } while (0)
+
+static mp_err
+point_mul_g_secp521r1_wrap(const mp_int *n, mp_int *out_x,
+ mp_int *out_y, const ECGroup *group)
+{
+ unsigned char b_x[66];
+ unsigned char b_y[66];
+ unsigned char b_n[66];
+ mp_err res;
+
+ ARGCHK(n != NULL && out_x != NULL && out_y != NULL, MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66));
+ MP_BE2LE(b_n);
+ point_mul_g_secp521r1(b_x, b_y, b_n);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66));
+
+CLEANUP:
+ return res;
+}
+
+static mp_err
+point_mul_secp521r1_wrap(const mp_int *n, const mp_int *in_x,
+ const mp_int *in_y, mp_int *out_x,
+ mp_int *out_y, const ECGroup *group)
+{
+ unsigned char b_x[66];
+ unsigned char b_y[66];
+ unsigned char b_n[66];
+ mp_err res;
+
+ ARGCHK(n != NULL && in_x != NULL && in_y != NULL && out_x != NULL &&
+ out_y != NULL,
+ MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n) > 521 || mp_cmp_z(n) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n, b_n, 66));
+ MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66));
+ MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66));
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_BE2LE(b_n);
+ point_mul_secp521r1(b_x, b_y, b_n, b_x, b_y);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66));
+
+CLEANUP:
+ return res;
+}
+
+static mp_err
+point_mul_two_secp521r1_wrap(const mp_int *n1, const mp_int *n2,
+ const mp_int *in_x,
+ const mp_int *in_y, mp_int *out_x,
+ mp_int *out_y,
+ const ECGroup *group)
+{
+ unsigned char b_x[66];
+ unsigned char b_y[66];
+ unsigned char b_n1[66];
+ unsigned char b_n2[66];
+ mp_err res;
+
+ /* If n2 == NULL or 0, this is just a base-point multiplication. */
+ if (n2 == NULL || mp_cmp_z(n2) == MP_EQ)
+ return point_mul_g_secp521r1_wrap(n1, out_x, out_y, group);
+
+ /* If n1 == NULL or 0, this is just an arbitary-point multiplication. */
+ if (n1 == NULL || mp_cmp_z(n1) == MP_EQ)
+ return point_mul_secp521r1_wrap(n2, in_x, in_y, out_x, out_y, group);
+
+ ARGCHK(in_x != NULL && in_y != NULL && out_x != NULL && out_y != NULL,
+ MP_BADARG);
+
+ /* fail on out of range scalars */
+ if (mpl_significant_bits(n1) > 521 || mp_cmp_z(n1) != MP_GT ||
+ mpl_significant_bits(n2) > 521 || mp_cmp_z(n2) != MP_GT)
+ return MP_RANGE;
+
+ MP_CHECKOK(mp_to_fixlen_octets(n1, b_n1, 66));
+ MP_CHECKOK(mp_to_fixlen_octets(n2, b_n2, 66));
+ MP_CHECKOK(mp_to_fixlen_octets(in_x, b_x, 66));
+ MP_CHECKOK(mp_to_fixlen_octets(in_y, b_y, 66));
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_BE2LE(b_n1);
+ MP_BE2LE(b_n2);
+ point_mul_two_secp521r1(b_x, b_y, b_n1, b_n2, b_x, b_y);
+ MP_BE2LE(b_x);
+ MP_BE2LE(b_y);
+ MP_CHECKOK(mp_read_unsigned_octets(out_x, b_x, 66));
+ MP_CHECKOK(mp_read_unsigned_octets(out_y, b_y, 66));
+
+CLEANUP:
+ return res;
+}
+
+mp_err
+ec_group_set_secp521r1(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P521) {
+ group->base_point_mul = &point_mul_g_secp521r1_wrap;
+ group->point_mul = &point_mul_secp521r1_wrap;
+ group->points_mul = &point_mul_two_secp521r1_wrap;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp
new file mode 100644
index 0000000000..af5c782a53
--- /dev/null
+++ b/security/nss/lib/freebl/exports.gyp
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+ 'includes': [
+ '../../coreconf/config.gypi'
+ ],
+ 'targets': [
+ {
+ 'target_name': 'lib_freebl_exports',
+ 'type': 'none',
+ 'copies': [
+ {
+ 'files': [
+ 'blapit.h',
+ 'ecl/ecl-exp.h',
+ 'shsign.h'
+ ],
+ 'conditions': [
+ [ 'OS=="linux"', {
+ 'files': [
+ 'nsslowhash.h',
+ ],
+ }],
+ ],
+ 'destination': '<(nss_public_dist_dir)/<(module)'
+ },
+ {
+ 'files': [
+ 'cmac.h',
+ 'alghmac.h',
+ 'blapi.h',
+ 'blake2b.h',
+ 'chacha20poly1305.h',
+ 'ec.h',
+ 'ecl/ecl-curve.h',
+ 'ecl/ecl.h',
+ 'ecl/eclt.h',
+ 'hmacct.h',
+ 'secmpi.h',
+ 'secrng.h'
+ ],
+ 'destination': '<(nss_private_dist_dir)/<(module)'
+ }
+ ]
+ }
+ ],
+ 'variables': {
+ 'module': 'nss'
+ }
+}
diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c
new file mode 100644
index 0000000000..e532a636ee
--- /dev/null
+++ b/security/nss/lib/freebl/fipsfreebl.c
@@ -0,0 +1,2059 @@
+/*
+ * PKCS #11 FIPS Power-Up Self Test.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */
+
+#ifndef NSS_FIPS_DISABLED
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "seccomon.h" /* Required for RSA. */
+#include "secerr.h"
+#include "prtypes.h"
+#include "secitem.h"
+#include "pkcs11t.h"
+#include "cmac.h"
+
+#include "ec.h" /* Required for EC */
+
+/*
+ * different platforms have different ways of calling and initial entry point
+ * when the dll/.so is loaded. Most platforms support either a posix pragma
+ * or the GCC attribute. Some platforms suppor a pre-defined name, and some
+ * platforms have a link line way of invoking this function.
+ */
+
+/* The pragma */
+#if defined(USE_INIT_PRAGMA)
+#pragma init(bl_startup_tests)
+#endif
+
+/* GCC Attribute */
+#if defined(__GNUC__) && !defined(NSS_NO_INIT_SUPPORT)
+#define INIT_FUNCTION __attribute__((constructor))
+#else
+#define INIT_FUNCTION
+#endif
+
+static void INIT_FUNCTION bl_startup_tests(void);
+
+/* Windows pre-defined entry */
+#if defined(XP_WIN) && !defined(NSS_NO_INIT_SUPPORT)
+#include <windows.h>
+
+BOOL WINAPI
+DllMain(
+ HINSTANCE hinstDLL, // handle to DLL module
+ DWORD fdwReason, // reason for calling function
+ LPVOID lpReserved) // reserved
+{
+ // Perform actions based on the reason for calling.
+ switch (fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ // Initialize once for each new process.
+ // Return FALSE to fail DLL load.
+ bl_startup_tests();
+ break;
+
+ case DLL_THREAD_ATTACH:
+ // Do thread-specific initialization.
+ break;
+
+ case DLL_THREAD_DETACH:
+ // Do thread-specific cleanup.
+ break;
+
+ case DLL_PROCESS_DETACH:
+ // Perform any necessary cleanup.
+ break;
+ }
+ return TRUE; // Successful DLL_PROCESS_ATTACH.
+}
+#endif
+
+/* insert other platform dependent init entry points here, or modify
+ * the linker line */
+
+/* FIPS preprocessor directives for RC2-ECB and RC2-CBC. */
+#define FIPS_RC2_KEY_LENGTH 5 /* 40-bits */
+#define FIPS_RC2_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_RC2_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for RC4. */
+#define FIPS_RC4_KEY_LENGTH 5 /* 40-bits */
+#define FIPS_RC4_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_RC4_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for DES-ECB and DES-CBC. */
+#define FIPS_DES_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_DES_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for DES3-CBC and DES3-ECB. */
+#define FIPS_DES3_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_DES3_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for AES-ECB and AES-CBC. */
+#define FIPS_AES_BLOCK_SIZE 16 /* 128-bits */
+#define FIPS_AES_ENCRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_DECRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_CMAC_LENGTH 16 /* 128-bits */
+#define FIPS_AES_128_KEY_SIZE 16 /* 128-bits */
+#define FIPS_AES_192_KEY_SIZE 24 /* 192-bits */
+#define FIPS_AES_256_KEY_SIZE 32 /* 256-bits */
+
+/* FIPS preprocessor directives for message digests */
+#define FIPS_KNOWN_HASH_MESSAGE_LENGTH 64 /* 512-bits */
+
+/* FIPS preprocessor directives for RSA. */
+#define FIPS_RSA_TYPE siBuffer
+#define FIPS_RSA_PUBLIC_EXPONENT_LENGTH 3 /* 24-bits */
+#define FIPS_RSA_PRIVATE_VERSION_LENGTH 1 /* 8-bits */
+#define FIPS_RSA_MESSAGE_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_COEFFICIENT_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIME0_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIME1_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_EXPONENT0_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_EXPONENT1_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIVATE_EXPONENT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_ENCRYPT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_DECRYPT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_SIGNATURE_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_MODULUS_LENGTH 256 /* 2048-bits */
+
+/* FIPS preprocessor directives for RNG. */
+#define FIPS_RNG_XKEY_LENGTH 32 /* 256-bits */
+
+static SECStatus
+freebl_fips_DES3_PowerUpSelfTest(void)
+{
+ /* DES3 Known Key (56-bits). */
+ static const PRUint8 des3_known_key[] = { "ANSI Triple-DES Key Data" };
+
+ /* DES3-CBC Known Initialization Vector (64-bits). */
+ static const PRUint8 des3_cbc_known_initialization_vector[] = { "Security" };
+
+ /* DES3 Known Plaintext (64-bits). */
+ static const PRUint8 des3_ecb_known_plaintext[] = { "Netscape" };
+ static const PRUint8 des3_cbc_known_plaintext[] = { "Netscape" };
+
+ /* DES3 Known Ciphertext (64-bits). */
+ static const PRUint8 des3_ecb_known_ciphertext[] = {
+ 0x55, 0x8e, 0xad, 0x3c, 0xee, 0x49, 0x69, 0xbe
+ };
+ static const PRUint8 des3_cbc_known_ciphertext[] = {
+ 0x43, 0xdc, 0x6a, 0xc1, 0xaf, 0xa6, 0x32, 0xf5
+ };
+
+ /* DES3 variables. */
+ PRUint8 des3_computed_ciphertext[FIPS_DES3_ENCRYPT_LENGTH];
+ PRUint8 des3_computed_plaintext[FIPS_DES3_DECRYPT_LENGTH];
+ DESContext *des3_context;
+ unsigned int des3_bytes_encrypted;
+ unsigned int des3_bytes_decrypted;
+ SECStatus des3_status;
+
+ /*******************************************************/
+ /* DES3-ECB Single-Round Known Answer Encryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key, NULL,
+ NSS_DES_EDE3, PR_TRUE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+ &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+ des3_ecb_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_ciphertext, des3_ecb_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-ECB Single-Round Known Answer Decryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key, NULL,
+ NSS_DES_EDE3, PR_FALSE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+ &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+ des3_ecb_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_plaintext, des3_ecb_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-CBC Single-Round Known Answer Encryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key,
+ des3_cbc_known_initialization_vector,
+ NSS_DES_EDE3_CBC, PR_TRUE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+ &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+ des3_cbc_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_ciphertext, des3_cbc_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-CBC Single-Round Known Answer Decryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key,
+ des3_cbc_known_initialization_vector,
+ NSS_DES_EDE3_CBC, PR_FALSE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+ &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+ des3_cbc_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_plaintext, des3_cbc_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+/* AES self-test for 128-bit, 192-bit, or 256-bit key sizes*/
+static SECStatus
+freebl_fips_AES_PowerUpSelfTest(int aes_key_size)
+{
+ /* AES Known Key (up to 256-bits). */
+ static const PRUint8 aes_known_key[] = { "AES-128 RIJNDAELLEADNJIR 821-SEA" };
+
+ /* AES-CBC Known Initialization Vector (128-bits). */
+ static const PRUint8 aes_cbc_known_initialization_vector[] = { "SecurityytiruceS" };
+
+ /* AES Known Plaintext (128-bits). (blocksize is 128-bits) */
+ static const PRUint8 aes_known_plaintext[] = { "NetscapeepacsteN" };
+
+ static const PRUint8 aes_gcm_known_aad[] = { "MozillaallizoM" };
+
+ /* AES Known Ciphertext (128-bit key). */
+ static const PRUint8 aes_ecb128_known_ciphertext[] = {
+ 0x3c, 0xa5, 0x96, 0xf3, 0x34, 0x6a, 0x96, 0xc1,
+ 0x03, 0x88, 0x16, 0x7b, 0x20, 0xbf, 0x35, 0x47
+ };
+
+ static const PRUint8 aes_cbc128_known_ciphertext[] = {
+ 0xcf, 0x15, 0x1d, 0x4f, 0x96, 0xe4, 0x4f, 0x63,
+ 0x15, 0x54, 0x14, 0x1d, 0x4e, 0xd8, 0xd5, 0xea
+ };
+
+ static const PRUint8 aes_gcm128_known_ciphertext[] = {
+ 0x63, 0xf4, 0x95, 0x28, 0xe6, 0x78, 0xee, 0x6e,
+ 0x4f, 0xe0, 0xfc, 0x8d, 0xd7, 0xa2, 0xb1, 0xff,
+ 0x0c, 0x97, 0x1b, 0x0a, 0xdd, 0x97, 0x75, 0xed,
+ 0x8b, 0xde, 0xbf, 0x16, 0x5e, 0x57, 0x6b, 0x4f
+ };
+
+ static const PRUint8 aes_cmac128_known_ciphertext[] = {
+ 0x54, 0x11, 0xe2, 0x57, 0xbd, 0x2a, 0xdf, 0x9d,
+ 0x1a, 0x89, 0x72, 0x80, 0x84, 0x4c, 0x7e, 0x93
+ };
+
+ /* AES Known Ciphertext (192-bit key). */
+ static const PRUint8 aes_ecb192_known_ciphertext[] = {
+ 0xa0, 0x18, 0x62, 0xed, 0x88, 0x19, 0xcb, 0x62,
+ 0x88, 0x1d, 0x4d, 0xfe, 0x84, 0x02, 0x89, 0x0e
+ };
+
+ static const PRUint8 aes_cbc192_known_ciphertext[] = {
+ 0x83, 0xf7, 0xa4, 0x76, 0xd1, 0x6f, 0x07, 0xbe,
+ 0x07, 0xbc, 0x43, 0x2f, 0x6d, 0xad, 0x29, 0xe1
+ };
+
+ static const PRUint8 aes_gcm192_known_ciphertext[] = {
+ 0xc1, 0x0b, 0x92, 0x1d, 0x68, 0x21, 0xf4, 0x25,
+ 0x41, 0x61, 0x20, 0x2d, 0x59, 0x7f, 0x53, 0xde,
+ 0x93, 0x39, 0xab, 0x09, 0x76, 0x41, 0x57, 0x2b,
+ 0x90, 0x2e, 0x44, 0xbb, 0x52, 0x03, 0xe9, 0x07
+ };
+
+ static const PRUint8 aes_cmac192_known_ciphertext[] = {
+ 0x0e, 0x07, 0x99, 0x1e, 0xf6, 0xee, 0xfa, 0x2c,
+ 0x1b, 0xfc, 0xce, 0x94, 0x92, 0x2d, 0xf1, 0xab
+ };
+
+ /* AES Known Ciphertext (256-bit key). */
+ static const PRUint8 aes_ecb256_known_ciphertext[] = {
+ 0xdb, 0xa6, 0x52, 0x01, 0x8a, 0x70, 0xae, 0x66,
+ 0x3a, 0x99, 0xd8, 0x95, 0x7f, 0xfb, 0x01, 0x67
+ };
+
+ static const PRUint8 aes_cbc256_known_ciphertext[] = {
+ 0x37, 0xea, 0x07, 0x06, 0x31, 0x1c, 0x59, 0x27,
+ 0xc5, 0xc5, 0x68, 0x71, 0x6e, 0x34, 0x40, 0x16
+ };
+
+ static const PRUint8 aes_gcm256_known_ciphertext[] = {
+ 0x5d, 0x9e, 0xd2, 0xa2, 0x74, 0x9c, 0xd9, 0x1c,
+ 0xd1, 0xc9, 0xee, 0x5d, 0xb6, 0xf2, 0xc9, 0xb6,
+ 0x79, 0x27, 0x53, 0x02, 0xa3, 0xdc, 0x22, 0xce,
+ 0xf4, 0xb0, 0xc1, 0x8c, 0x86, 0x51, 0xf5, 0xa1
+ };
+
+ static const PRUint8 aes_cmac256_known_ciphertext[] = {
+ 0xc1, 0x26, 0x69, 0x32, 0x51, 0x13, 0x65, 0xac,
+ 0x71, 0x23, 0xe4, 0xe7, 0xb9, 0x0c, 0x88, 0x9f
+
+ };
+
+ const PRUint8 *aes_ecb_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_ecb128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_ecb192_known_ciphertext : aes_ecb256_known_ciphertext;
+
+ const PRUint8 *aes_cbc_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cbc128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cbc192_known_ciphertext : aes_cbc256_known_ciphertext;
+
+ const PRUint8 *aes_gcm_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_gcm128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_gcm192_known_ciphertext : aes_gcm256_known_ciphertext;
+
+ const PRUint8 *aes_cmac_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cmac128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cmac192_known_ciphertext : aes_cmac256_known_ciphertext;
+
+ /* AES variables. */
+ PRUint8 aes_computed_ciphertext[FIPS_AES_ENCRYPT_LENGTH * 2];
+ PRUint8 aes_computed_plaintext[FIPS_AES_DECRYPT_LENGTH * 2];
+ AESContext *aes_context;
+ CMACContext *cmac_context;
+ unsigned int aes_bytes_encrypted;
+ unsigned int aes_bytes_decrypted;
+ CK_NSS_GCM_PARAMS gcmParams;
+ SECStatus aes_status;
+
+ /*check if aes_key_size is 128, 192, or 256 bits */
+ if ((aes_key_size != FIPS_AES_128_KEY_SIZE) &&
+ (aes_key_size != FIPS_AES_192_KEY_SIZE) &&
+ (aes_key_size != FIPS_AES_256_KEY_SIZE)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-ECB Single-Round Known Answer Encryption Test: */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_TRUE,
+ aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+ aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_ecb_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-ECB Single-Round Known Answer Decryption Test: */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_FALSE,
+ aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+ &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+ aes_ecb_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-CBC Single-Round Known Answer Encryption Test. */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key,
+ aes_cbc_known_initialization_vector,
+ NSS_AES_CBC, PR_TRUE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+ aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_cbc_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-CBC Single-Round Known Answer Decryption Test. */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key,
+ aes_cbc_known_initialization_vector,
+ NSS_AES_CBC, PR_FALSE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+ &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+ aes_cbc_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-GCM Single-Round Known Answer Encryption Test. */
+ /******************************************************/
+
+ gcmParams.pIv = (PRUint8 *)aes_cbc_known_initialization_vector;
+ gcmParams.ulIvLen = FIPS_AES_BLOCK_SIZE;
+ gcmParams.pAAD = (PRUint8 *)aes_gcm_known_aad;
+ gcmParams.ulAADLen = sizeof(aes_gcm_known_aad);
+ gcmParams.ulTagBits = FIPS_AES_BLOCK_SIZE * 8;
+ aes_context = AES_CreateContext(aes_known_key,
+ (PRUint8 *)&gcmParams,
+ NSS_AES_GCM, PR_TRUE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH * 2,
+ aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH * 2) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_gcm_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH * 2) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-GCM Single-Round Known Answer Decryption Test. */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key,
+ (PRUint8 *)&gcmParams,
+ NSS_AES_GCM, PR_FALSE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+ &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH * 2,
+ aes_gcm_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH * 2);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-CMAC Known Answer Encryption Test. */
+ /******************************************************/
+ cmac_context = CMAC_Create(CMAC_AES, aes_known_key, aes_key_size);
+
+ if (cmac_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = CMAC_Begin(cmac_context);
+ if (aes_status != SECSuccess) {
+ CMAC_Destroy(cmac_context, PR_TRUE);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ aes_status = CMAC_Update(cmac_context, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+ if (aes_status != SECSuccess) {
+ CMAC_Destroy(cmac_context, PR_TRUE);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ aes_status = CMAC_Finish(cmac_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_CMAC_LENGTH);
+
+ CMAC_Destroy(cmac_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_CMAC_LENGTH) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_cmac_known_ciphertext,
+ FIPS_AES_CMAC_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+/* Known Hash Message (512-bits). Used for all hashes (incl. SHA-N [N>1]). */
+static const PRUint8 known_hash_message[] = {
+ "The test message for the MD2, MD5, and SHA-1 hashing algorithms."
+};
+
+/****************************************************/
+/* Single Round HMAC SHA-X test */
+/****************************************************/
+static SECStatus
+freebl_fips_HMAC(unsigned char *hmac_computed,
+ const PRUint8 *secret_key,
+ unsigned int secret_key_length,
+ const PRUint8 *message,
+ unsigned int message_length,
+ HASH_HashType hashAlg)
+{
+ SECStatus hmac_status = SECFailure;
+ HMACContext *cx = NULL;
+ SECHashObject *hashObj = NULL;
+ unsigned int bytes_hashed = 0;
+
+ hashObj = (SECHashObject *)HASH_GetRawHashObject(hashAlg);
+
+ if (!hashObj)
+ return (SECFailure);
+
+ cx = HMAC_Create(hashObj, secret_key,
+ secret_key_length,
+ PR_TRUE); /* PR_TRUE for in FIPS mode */
+
+ if (cx == NULL)
+ return (SECFailure);
+
+ HMAC_Begin(cx);
+ HMAC_Update(cx, message, message_length);
+ hmac_status = HMAC_Finish(cx, hmac_computed, &bytes_hashed,
+ hashObj->length);
+
+ HMAC_Destroy(cx, PR_TRUE);
+
+ return (hmac_status);
+}
+
+static SECStatus
+freebl_fips_HMAC_PowerUpSelfTest(void)
+{
+ static const PRUint8 HMAC_known_secret_key[] = {
+ "Firefox and ThunderBird are awesome!"
+ };
+
+ static const PRUint8 HMAC_known_secret_key_length = sizeof HMAC_known_secret_key;
+
+ /* known SHA1 hmac (20 bytes) */
+ static const PRUint8 known_SHA1_hmac[] = {
+ 0xd5, 0x85, 0xf6, 0x5b, 0x39, 0xfa, 0xb9, 0x05,
+ 0x3b, 0x57, 0x1d, 0x61, 0xe7, 0xb8, 0x84, 0x1e,
+ 0x5d, 0x0e, 0x1e, 0x11
+ };
+
+ /* known SHA224 hmac (28 bytes) */
+ static const PRUint8 known_SHA224_hmac[] = {
+ 0x1c, 0xc3, 0x06, 0x8e, 0xce, 0x37, 0x68, 0xfb,
+ 0x1a, 0x82, 0x4a, 0xbe, 0x2b, 0x00, 0x51, 0xf8,
+ 0x9d, 0xb6, 0xe0, 0x90, 0x0d, 0x00, 0xc9, 0x64,
+ 0x9a, 0xb8, 0x98, 0x4e
+ };
+
+ /* known SHA256 hmac (32 bytes) */
+ static const PRUint8 known_SHA256_hmac[] = {
+ 0x05, 0x75, 0x9a, 0x9e, 0x70, 0x5e, 0xe7, 0x44,
+ 0xe2, 0x46, 0x4b, 0x92, 0x22, 0x14, 0x22, 0xe0,
+ 0x1b, 0x92, 0x8a, 0x0c, 0xfe, 0xf5, 0x49, 0xe9,
+ 0xa7, 0x1b, 0x56, 0x7d, 0x1d, 0x29, 0x40, 0x48
+ };
+
+ /* known SHA384 hmac (48 bytes) */
+ static const PRUint8 known_SHA384_hmac[] = {
+ 0xcd, 0x56, 0x14, 0xec, 0x05, 0x53, 0x06, 0x2b,
+ 0x7e, 0x9c, 0x8a, 0x18, 0x5e, 0xea, 0xf3, 0x91,
+ 0x33, 0xfb, 0x64, 0xf6, 0xe3, 0x9f, 0x89, 0x0b,
+ 0xaf, 0xbe, 0x83, 0x4d, 0x3f, 0x3c, 0x43, 0x4d,
+ 0x4a, 0x0c, 0x56, 0x98, 0xf8, 0xca, 0xb4, 0xaa,
+ 0x9a, 0xf4, 0x0a, 0xaf, 0x4f, 0x69, 0xca, 0x87
+ };
+
+ /* known SHA512 hmac (64 bytes) */
+ static const PRUint8 known_SHA512_hmac[] = {
+ 0xf6, 0x0e, 0x97, 0x12, 0x00, 0x67, 0x6e, 0xb9,
+ 0x0c, 0xb2, 0x63, 0xf0, 0x60, 0xac, 0x75, 0x62,
+ 0x70, 0x95, 0x2a, 0x52, 0x22, 0xee, 0xdd, 0xd2,
+ 0x71, 0xb1, 0xe8, 0x26, 0x33, 0xd3, 0x13, 0x27,
+ 0xcb, 0xff, 0x44, 0xef, 0x87, 0x97, 0x16, 0xfb,
+ 0xd3, 0x0b, 0x48, 0xbe, 0x12, 0x4e, 0xda, 0xb1,
+ 0x89, 0x90, 0xfb, 0x06, 0x0c, 0xbe, 0xe5, 0xc4,
+ 0xff, 0x24, 0x37, 0x3d, 0xc7, 0xe4, 0xe4, 0x37
+ };
+
+ SECStatus hmac_status;
+ PRUint8 hmac_computed[HASH_LENGTH_MAX];
+
+ /***************************************************/
+ /* HMAC SHA-1 Single-Round Known Answer HMAC Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA1);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA1_hmac,
+ SHA1_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-224 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA224);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA224_hmac,
+ SHA224_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-256 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA256);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA256_hmac,
+ SHA256_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-384 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA384);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA384_hmac,
+ SHA384_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-512 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA512);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA512_hmac,
+ SHA512_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+SECStatus
+freebl_fips_TLS_PowerUpSelfTest(void)
+{
+ static const PRUint8 TLS_known_secret_key[] = {
+ "Firefox and ThunderBird are awesome!"
+ };
+
+ static const PRUint8 TLS_known_secret_key_length = sizeof TLS_known_secret_key;
+
+ /* known tls prf with sha1/md5 */
+ static const PRUint8 known_TLS_PRF[] = {
+ 0x87, 0x4c, 0xc0, 0xc5, 0x15, 0x14, 0x2b, 0xdc,
+ 0x73, 0x48, 0x9e, 0x88, 0x9d, 0xf5, 0x83, 0x2f,
+ 0x2d, 0x66, 0x1e, 0x78, 0x6c, 0x54, 0x78, 0x29,
+ 0xb9, 0xa4, 0x4c, 0x90, 0x5e, 0xa2, 0xe6, 0x5c,
+ 0xf1, 0x4f, 0xb5, 0x95, 0xa5, 0x54, 0xc0, 0x9f,
+ 0x84, 0x47, 0xb4, 0x4c, 0xda, 0xae, 0x19, 0x29,
+ 0x2b, 0x91, 0x2a, 0x81, 0x9d, 0x3a, 0x30, 0x40,
+ 0xc5, 0xdf, 0xbb, 0xfa, 0xd8, 0x4c, 0xbc, 0x18
+ };
+
+ /* known SHA256 tls mac */
+ static const PRUint8 known_TLS_SHA256[] = {
+ 0x66, 0xd6, 0x94, 0xd4, 0x0d, 0x32, 0x61, 0x38,
+ 0x26, 0xf6, 0x8b, 0xfe, 0x9e, 0xac, 0xa2, 0xf5,
+ 0x40, 0x52, 0x74, 0x3f, 0xbe, 0xb8, 0xca, 0x94,
+ 0xc3, 0x64, 0xd6, 0x02, 0xf5, 0x88, 0x98, 0x35,
+ 0x73, 0x9f, 0xce, 0xaa, 0x68, 0xe3, 0x7c, 0x93,
+ 0x30, 0x21, 0x45, 0xec, 0xe9, 0x8f, 0x1c, 0x7e,
+ 0xd1, 0x54, 0xf5, 0xbe, 0xff, 0xc8, 0xd7, 0x72,
+ 0x7f, 0x9c, 0x0c, 0x7f, 0xa9, 0xd3, 0x4a, 0xd2
+ };
+
+#ifdef NSS_FULL_POST
+ /* known SHA224 tls mac */
+ static const PRUint8 known_TLS_SHA224[] = {
+ 0xd8, 0x68, 0x15, 0xff, 0xa1, 0xa2, 0x5e, 0x16,
+ 0xce, 0xb1, 0xfd, 0xbd, 0xda, 0x39, 0xbc, 0xa7,
+ 0x27, 0x32, 0x78, 0x94, 0x66, 0xf0, 0x84, 0xcf,
+ 0x46, 0xc0, 0x22, 0x76, 0xdc, 0x6b, 0x2e, 0xed,
+ 0x1d, 0x2d, 0xd2, 0x93, 0xfd, 0xae, 0xca, 0xf9,
+ 0xe0, 0x4c, 0x17, 0x23, 0x22, 0x5a, 0x73, 0x93,
+ 0x20, 0x0a, 0xbd, 0xa0, 0x72, 0xf8, 0x8b, 0x74,
+ 0xfb, 0xf1, 0xab, 0xb7, 0xe0, 0xec, 0x34, 0xc9
+ };
+
+ /* known SHA384 tls mac */
+ static const PRUint8 known_TLS_SHA384[] = {
+ 0xb2, 0xac, 0x06, 0x10, 0xad, 0x50, 0xd5, 0xdc,
+ 0xdb, 0x01, 0xea, 0xa6, 0x2d, 0x8a, 0x34, 0xb6,
+ 0xeb, 0x84, 0xbc, 0x37, 0xc9, 0x9f, 0xa1, 0x9c,
+ 0xd5, 0xbd, 0x4e, 0x66, 0x16, 0x24, 0xe5, 0x3d,
+ 0xce, 0x74, 0xe0, 0x30, 0x41, 0x5c, 0xdb, 0xb7,
+ 0x52, 0x1d, 0x2d, 0x4d, 0x9b, 0xbe, 0x6b, 0x86,
+ 0xda, 0x8a, 0xca, 0x73, 0x39, 0xb4, 0xc7, 0x8f,
+ 0x03, 0xb1, 0xf9, 0x7e, 0x65, 0xae, 0x17, 0x10
+ };
+
+ /* known SHA512 tls mac */
+ static const PRUint8 known_TLS_SHA512[] = {
+ 0x73, 0x21, 0x4f, 0x40, 0x81, 0x1e, 0x90, 0xa1,
+ 0x16, 0x40, 0x1e, 0x33, 0x69, 0xc5, 0x00, 0xc7,
+ 0xc4, 0x81, 0xa3, 0x4f, 0xa7, 0xcc, 0x4a, 0xeb,
+ 0x1a, 0x66, 0x00, 0x82, 0x52, 0xe2, 0x2f, 0x69,
+ 0x14, 0x59, 0x05, 0x7c, 0xb0, 0x32, 0xce, 0xcc,
+ 0xb7, 0xc9, 0xab, 0x0f, 0x73, 0x00, 0xe5, 0x52,
+ 0x9d, 0x6b, 0x0e, 0x66, 0x4b, 0xb3, 0x0b, 0x0d,
+ 0x34, 0x53, 0x97, 0x13, 0x84, 0x18, 0x31, 0x7a
+ };
+#endif
+
+ SECStatus status;
+ PRUint8 tls_computed[HASH_LENGTH_MAX];
+ SECItem secret;
+ SECItem seed;
+ SECItem result;
+ const char *tls_label = "fips test label";
+
+ secret.data = (unsigned char *)TLS_known_secret_key;
+ secret.len = TLS_known_secret_key_length;
+ seed.data = (unsigned char *)known_hash_message;
+ seed.len = FIPS_KNOWN_HASH_MESSAGE_LENGTH;
+ result.data = tls_computed;
+ result.len = sizeof(tls_computed);
+
+ /***************************************************/
+ /* TLS 1.0 PRF Known Answer Test */
+ /***************************************************/
+
+ status = TLS_PRF(&secret, tls_label, &seed, &result, PR_TRUE);
+
+ if ((status != SECSuccess) ||
+ (result.len != HASH_LENGTH_MAX) ||
+ (PORT_Memcmp(tls_computed, known_TLS_PRF,
+ HASH_LENGTH_MAX) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* TLS 1.2 SHA-256 Known Answer Test. */
+ /***************************************************/
+
+ status = TLS_P_hash(HASH_AlgSHA256, &secret, tls_label,
+ &seed, &result, PR_TRUE);
+
+ if ((status != SECSuccess) ||
+ (result.len != HASH_LENGTH_MAX) ||
+ (PORT_Memcmp(tls_computed, known_TLS_SHA256,
+ HASH_LENGTH_MAX) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+#ifdef NSS_FULL_POST
+ /***************************************************/
+ /* TLS 1.2 SHA-224 Known Answer Test. */
+ /***************************************************/
+
+ status = TLS_P_hash(HASH_AlgSHA224, &secret, tls_label,
+ &seed, &result, PR_TRUE);
+
+ if ((status != SECSuccess) ||
+ (result.len != HASH_LENGTH_MAX) ||
+ (PORT_Memcmp(tls_computed, known_TLS_SHA224,
+ HASH_LENGTH_MAX) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* TLS 1.2 SHA-384 Known Answer Test. */
+ /***************************************************/
+
+ status = TLS_P_hash(HASH_AlgSHA384, &secret, tls_label,
+ &seed, &result, PR_TRUE);
+
+ if ((status != SECSuccess) ||
+ (result.len != HASH_LENGTH_MAX) ||
+ (PORT_Memcmp(tls_computed, known_TLS_SHA384,
+ HASH_LENGTH_MAX) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* TLS 1.2 SHA-512 Known Answer Test. */
+ /***************************************************/
+
+ status = TLS_P_hash(HASH_AlgSHA512, &secret, tls_label,
+ &seed, &result, PR_TRUE);
+
+ if ((status != SECSuccess) ||
+ (result.len != HASH_LENGTH_MAX) ||
+ (PORT_Memcmp(tls_computed, known_TLS_SHA512,
+ HASH_LENGTH_MAX) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+#endif
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_SHA_PowerUpSelfTest(void)
+{
+ /* SHA-1 Known Digest Message (160-bits). */
+ static const PRUint8 sha1_known_digest[] = {
+ 0x0a, 0x6d, 0x07, 0xba, 0x1e, 0xbd, 0x8a, 0x1b,
+ 0x72, 0xf6, 0xc7, 0x22, 0xf1, 0x27, 0x9f, 0xf0,
+ 0xe0, 0x68, 0x47, 0x7a
+ };
+
+ /* SHA-224 Known Digest Message (224-bits). */
+ static const PRUint8 sha224_known_digest[] = {
+ 0x89, 0x5e, 0x7f, 0xfd, 0x0e, 0xd8, 0x35, 0x6f,
+ 0x64, 0x6d, 0xf2, 0xde, 0x5e, 0xed, 0xa6, 0x7f,
+ 0x29, 0xd1, 0x12, 0x73, 0x42, 0x84, 0x95, 0x4f,
+ 0x8e, 0x08, 0xe5, 0xcb
+ };
+
+ /* SHA-256 Known Digest Message (256-bits). */
+ static const PRUint8 sha256_known_digest[] = {
+ 0x38, 0xa9, 0xc1, 0xf0, 0x35, 0xf6, 0x5d, 0x61,
+ 0x11, 0xd4, 0x0b, 0xdc, 0xce, 0x35, 0x14, 0x8d,
+ 0xf2, 0xdd, 0xaf, 0xaf, 0xcf, 0xb7, 0x87, 0xe9,
+ 0x96, 0xa5, 0xd2, 0x83, 0x62, 0x46, 0x56, 0x79
+ };
+
+ /* SHA-384 Known Digest Message (384-bits). */
+ static const PRUint8 sha384_known_digest[] = {
+ 0x11, 0xfe, 0x1c, 0x00, 0x89, 0x48, 0xde, 0xb3,
+ 0x99, 0xee, 0x1c, 0x18, 0xb4, 0x10, 0xfb, 0xfe,
+ 0xe3, 0xa8, 0x2c, 0xf3, 0x04, 0xb0, 0x2f, 0xc8,
+ 0xa3, 0xc4, 0x5e, 0xea, 0x7e, 0x60, 0x48, 0x7b,
+ 0xce, 0x2c, 0x62, 0xf7, 0xbc, 0xa7, 0xe8, 0xa3,
+ 0xcf, 0x24, 0xce, 0x9c, 0xe2, 0x8b, 0x09, 0x72
+ };
+
+ /* SHA-512 Known Digest Message (512-bits). */
+ static const PRUint8 sha512_known_digest[] = {
+ 0xc8, 0xb3, 0x27, 0xf9, 0x0b, 0x24, 0xc8, 0xbf,
+ 0x4c, 0xba, 0x33, 0x54, 0xf2, 0x31, 0xbf, 0xdb,
+ 0xab, 0xfd, 0xb3, 0x15, 0xd7, 0xfa, 0x48, 0x99,
+ 0x07, 0x60, 0x0f, 0x57, 0x41, 0x1a, 0xdd, 0x28,
+ 0x12, 0x55, 0x25, 0xac, 0xba, 0x3a, 0x99, 0x12,
+ 0x2c, 0x7a, 0x8f, 0x75, 0x3a, 0xe1, 0x06, 0x6f,
+ 0x30, 0x31, 0xc9, 0x33, 0xc6, 0x1b, 0x90, 0x1a,
+ 0x6c, 0x98, 0x9a, 0x87, 0xd0, 0xb2, 0xf8, 0x07
+ };
+
+ /* SHA-X variables. */
+ PRUint8 sha_computed_digest[HASH_LENGTH_MAX];
+ SECStatus sha_status;
+
+ /*************************************************/
+ /* SHA-1 Single-Round Known Answer Hashing Test. */
+ /*************************************************/
+
+ sha_status = SHA1_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha1_known_digest,
+ SHA1_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-224 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA224_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha224_known_digest,
+ SHA224_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-256 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA256_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha256_known_digest,
+ SHA256_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-384 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA384_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha384_known_digest,
+ SHA384_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-512 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA512_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha512_known_digest,
+ SHA512_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RSA_PowerUpSelfTest(void)
+{
+ /* RSA Known Modulus used in both Public/Private Key Values (2048-bits). */
+ static const PRUint8 rsa_modulus[FIPS_RSA_MODULUS_LENGTH] = {
+ 0xb8, 0x15, 0x00, 0x33, 0xda, 0x0c, 0x9d, 0xa5,
+ 0x14, 0x8c, 0xde, 0x1f, 0x23, 0x07, 0x54, 0xe2,
+ 0xc6, 0xb9, 0x51, 0x04, 0xc9, 0x65, 0x24, 0x6e,
+ 0x0a, 0x46, 0x34, 0x5c, 0x37, 0x86, 0x6b, 0x88,
+ 0x24, 0x27, 0xac, 0xa5, 0x02, 0x79, 0xfb, 0xed,
+ 0x75, 0xc5, 0x3f, 0x6e, 0xdf, 0x05, 0x5f, 0x0f,
+ 0x20, 0x70, 0xa0, 0x5b, 0x85, 0xdb, 0xac, 0xb9,
+ 0x5f, 0x02, 0xc2, 0x64, 0x1e, 0x84, 0x5b, 0x3e,
+ 0xad, 0xbf, 0xf6, 0x2e, 0x51, 0xd6, 0xad, 0xf7,
+ 0xa7, 0x86, 0x75, 0x86, 0xec, 0xa7, 0xe1, 0xf7,
+ 0x08, 0xbf, 0xdc, 0x56, 0xb1, 0x3b, 0xca, 0xd8,
+ 0xfc, 0x51, 0xdf, 0x9a, 0x2a, 0x37, 0x06, 0xf2,
+ 0xd1, 0x6b, 0x9a, 0x5e, 0x2a, 0xe5, 0x20, 0x57,
+ 0x35, 0x9f, 0x1f, 0x98, 0xcf, 0x40, 0xc7, 0xd6,
+ 0x98, 0xdb, 0xde, 0xf5, 0x64, 0x53, 0xf7, 0x9d,
+ 0x45, 0xf3, 0xd6, 0x78, 0xb9, 0xe3, 0xa3, 0x20,
+ 0xcd, 0x79, 0x43, 0x35, 0xef, 0xd7, 0xfb, 0xb9,
+ 0x80, 0x88, 0x27, 0x2f, 0x63, 0xa8, 0x67, 0x3d,
+ 0x4a, 0xfa, 0x06, 0xc6, 0xd2, 0x86, 0x0b, 0xa7,
+ 0x28, 0xfd, 0xe0, 0x1e, 0x93, 0x4b, 0x17, 0x2e,
+ 0xb0, 0x11, 0x6f, 0xc6, 0x2b, 0x98, 0x0f, 0x15,
+ 0xe3, 0x87, 0x16, 0x7a, 0x7c, 0x67, 0x3e, 0x12,
+ 0x2b, 0xf8, 0xbe, 0x48, 0xc1, 0x97, 0x47, 0xf4,
+ 0x1f, 0x81, 0x80, 0x12, 0x28, 0xe4, 0x7b, 0x1e,
+ 0xb7, 0x00, 0xa4, 0xde, 0xaa, 0xfb, 0x0f, 0x77,
+ 0x84, 0xa3, 0xd6, 0xb2, 0x03, 0x48, 0xdd, 0x53,
+ 0x8b, 0x46, 0x41, 0x28, 0x52, 0xc4, 0x53, 0xf0,
+ 0x1c, 0x95, 0xd9, 0x36, 0xe0, 0x0f, 0x26, 0x46,
+ 0x9c, 0x61, 0x0e, 0x80, 0xca, 0x86, 0xaf, 0x39,
+ 0x95, 0xe5, 0x60, 0x43, 0x61, 0x3e, 0x2b, 0xb4,
+ 0xe8, 0xbd, 0x8d, 0x77, 0x62, 0xf5, 0x32, 0x43,
+ 0x2f, 0x4b, 0x65, 0x82, 0x14, 0xdd, 0x29, 0x5b
+ };
+
+ /* RSA Known Public Key Values (24-bits). */
+ static const PRUint8 rsa_public_exponent[FIPS_RSA_PUBLIC_EXPONENT_LENGTH] = { 0x01, 0x00, 0x01 };
+ /* RSA Known Private Key Values (version is 8-bits), */
+ /* (private exponent is 2048-bits), */
+ /* (private prime0 is 1024-bits), */
+ /* (private prime1 is 1024-bits), */
+ /* (private prime exponent0 is 1024-bits), */
+ /* (private prime exponent1 is 1024-bits), */
+ /* and (private coefficient is 1024-bits). */
+ static const PRUint8 rsa_version[] = { 0x00 };
+
+ static const PRUint8 rsa_private_exponent[FIPS_RSA_PRIVATE_EXPONENT_LENGTH] = {
+ 0x29, 0x08, 0x05, 0x53, 0x89, 0x76, 0xe6, 0x6c,
+ 0xb5, 0x77, 0xf0, 0xca, 0xdf, 0xf3, 0xf2, 0x67,
+ 0xda, 0x03, 0xd4, 0x9b, 0x4c, 0x88, 0xce, 0xe5,
+ 0xf8, 0x44, 0x4d, 0xc7, 0x80, 0x58, 0xe5, 0xff,
+ 0x22, 0x8f, 0xf5, 0x5b, 0x92, 0x81, 0xbe, 0x35,
+ 0xdf, 0xda, 0x67, 0x99, 0x3e, 0xfc, 0xe3, 0x83,
+ 0x6b, 0xa7, 0xaf, 0x16, 0xb7, 0x6f, 0x8f, 0xc0,
+ 0x81, 0xfd, 0x0b, 0x77, 0x65, 0x95, 0xfb, 0x00,
+ 0xad, 0x99, 0xec, 0x35, 0xc6, 0xe8, 0x23, 0x3e,
+ 0xe0, 0x88, 0x88, 0x09, 0xdb, 0x16, 0x50, 0xb7,
+ 0xcf, 0xab, 0x74, 0x61, 0x9e, 0x7f, 0xc5, 0x67,
+ 0x38, 0x56, 0xc7, 0x90, 0x85, 0x78, 0x5e, 0x84,
+ 0x21, 0x49, 0xea, 0xce, 0xb2, 0xa0, 0xff, 0xe4,
+ 0x70, 0x7f, 0x57, 0x7b, 0xa8, 0x36, 0xb8, 0x54,
+ 0x8d, 0x1d, 0xf5, 0x44, 0x9d, 0x68, 0x59, 0xf9,
+ 0x24, 0x6e, 0x85, 0x8f, 0xc3, 0x5f, 0x8a, 0x2c,
+ 0x94, 0xb7, 0xbc, 0x0e, 0xa5, 0xef, 0x93, 0x06,
+ 0x38, 0xcd, 0x07, 0x0c, 0xae, 0xb8, 0x44, 0x1a,
+ 0xd8, 0xe7, 0xf5, 0x9a, 0x1e, 0x9c, 0x18, 0xc7,
+ 0x6a, 0xc2, 0x7f, 0x28, 0x01, 0x4f, 0xb4, 0xb8,
+ 0x90, 0x97, 0x5a, 0x43, 0x38, 0xad, 0xe8, 0x95,
+ 0x68, 0x83, 0x1a, 0x1b, 0x10, 0x07, 0xe6, 0x02,
+ 0x52, 0x1f, 0xbf, 0x76, 0x6b, 0x46, 0xd6, 0xfb,
+ 0xc3, 0xbe, 0xb5, 0xac, 0x52, 0x53, 0x01, 0x1c,
+ 0xf3, 0xc5, 0xeb, 0x64, 0xf2, 0x1e, 0xc4, 0x38,
+ 0xe9, 0xaa, 0xd9, 0xc3, 0x72, 0x51, 0xa5, 0x44,
+ 0x58, 0x69, 0x0b, 0x1b, 0x98, 0x7f, 0xf2, 0x23,
+ 0xff, 0xeb, 0xf0, 0x75, 0x24, 0xcf, 0xc5, 0x1e,
+ 0xb8, 0x6a, 0xc5, 0x2f, 0x4f, 0x23, 0x50, 0x7d,
+ 0x15, 0x9d, 0x19, 0x7a, 0x0b, 0x82, 0xe0, 0x21,
+ 0x5b, 0x5f, 0x9d, 0x50, 0x2b, 0x83, 0xe4, 0x48,
+ 0xcc, 0x39, 0xe5, 0xfb, 0x13, 0x7b, 0x6f, 0x81
+ };
+
+ static const PRUint8 rsa_prime0[FIPS_RSA_PRIME0_LENGTH] = {
+ 0xe4, 0xbf, 0x21, 0x62, 0x9b, 0xa9, 0x77, 0x40,
+ 0x8d, 0x2a, 0xce, 0xa1, 0x67, 0x5a, 0x4c, 0x96,
+ 0x45, 0x98, 0x67, 0xbd, 0x75, 0x22, 0x33, 0x6f,
+ 0xe6, 0xcb, 0x77, 0xde, 0x9e, 0x97, 0x7d, 0x96,
+ 0x8c, 0x5e, 0x5d, 0x34, 0xfb, 0x27, 0xfc, 0x6d,
+ 0x74, 0xdb, 0x9d, 0x2e, 0x6d, 0xf6, 0xea, 0xfc,
+ 0xce, 0x9e, 0xda, 0xa7, 0x25, 0xa2, 0xf4, 0x58,
+ 0x6d, 0x0a, 0x3f, 0x01, 0xc2, 0xb4, 0xab, 0x38,
+ 0xc1, 0x14, 0x85, 0xb6, 0xfa, 0x94, 0xc3, 0x85,
+ 0xf9, 0x3c, 0x2e, 0x96, 0x56, 0x01, 0xe7, 0xd6,
+ 0x14, 0x71, 0x4f, 0xfb, 0x4c, 0x85, 0x52, 0xc4,
+ 0x61, 0x1e, 0xa5, 0x1e, 0x96, 0x13, 0x0d, 0x8f,
+ 0x66, 0xae, 0xa0, 0xcd, 0x7d, 0x25, 0x66, 0x19,
+ 0x15, 0xc2, 0xcf, 0xc3, 0x12, 0x3c, 0xe8, 0xa4,
+ 0x52, 0x4c, 0xcb, 0x28, 0x3c, 0xc4, 0xbf, 0x95,
+ 0x33, 0xe3, 0x81, 0xea, 0x0c, 0x6c, 0xa2, 0x05
+ };
+ static const PRUint8 rsa_prime1[FIPS_RSA_PRIME1_LENGTH] = {
+ 0xce, 0x03, 0x94, 0xf4, 0xa9, 0x2c, 0x1e, 0x06,
+ 0xe7, 0x40, 0x30, 0x01, 0xf7, 0xbb, 0x68, 0x8c,
+ 0x27, 0xd2, 0x15, 0xe3, 0x28, 0x49, 0x5b, 0xa8,
+ 0xc1, 0x9a, 0x42, 0x7e, 0x31, 0xf9, 0x08, 0x34,
+ 0x81, 0xa2, 0x0f, 0x04, 0x61, 0x34, 0xe3, 0x36,
+ 0x92, 0xb1, 0x09, 0x2b, 0xe9, 0xef, 0x84, 0x88,
+ 0xbe, 0x9c, 0x98, 0x60, 0xa6, 0x60, 0x84, 0xe9,
+ 0x75, 0x6f, 0xcc, 0x81, 0xd1, 0x96, 0xef, 0xdd,
+ 0x2e, 0xca, 0xc4, 0xf5, 0x42, 0xfb, 0x13, 0x2b,
+ 0x57, 0xbf, 0x14, 0x5e, 0xc2, 0x7f, 0x77, 0x35,
+ 0x29, 0xc4, 0xe5, 0xe0, 0xf9, 0x6d, 0x15, 0x4a,
+ 0x42, 0x56, 0x1c, 0x3e, 0x0c, 0xc5, 0xce, 0x70,
+ 0x08, 0x63, 0x1e, 0x73, 0xdb, 0x7e, 0x74, 0x05,
+ 0x32, 0x01, 0xc6, 0x36, 0x32, 0x75, 0x6b, 0xed,
+ 0x9d, 0xfe, 0x7c, 0x7e, 0xa9, 0x57, 0xb4, 0xe9,
+ 0x22, 0xe4, 0xe7, 0xfe, 0x36, 0x07, 0x9b, 0xdf
+ };
+ static const PRUint8 rsa_exponent0[FIPS_RSA_EXPONENT0_LENGTH] = {
+ 0x04, 0x5a, 0x3a, 0xa9, 0x64, 0xaa, 0xd9, 0xd1,
+ 0x09, 0x9e, 0x99, 0xe5, 0xea, 0x50, 0x86, 0x8a,
+ 0x89, 0x72, 0x77, 0xee, 0xdb, 0xee, 0xb5, 0xa9,
+ 0xd8, 0x6b, 0x60, 0xb1, 0x84, 0xb4, 0xff, 0x37,
+ 0xc1, 0x1d, 0xfe, 0x8a, 0x06, 0x89, 0x61, 0x3d,
+ 0x37, 0xef, 0x01, 0xd3, 0xa3, 0x56, 0x02, 0x6c,
+ 0xa3, 0x05, 0xd4, 0xc5, 0x3f, 0x6b, 0x15, 0x59,
+ 0x25, 0x61, 0xff, 0x86, 0xea, 0x0c, 0x84, 0x01,
+ 0x85, 0x72, 0xfd, 0x84, 0x58, 0xca, 0x41, 0xda,
+ 0x27, 0xbe, 0xe4, 0x68, 0x09, 0xe4, 0xe9, 0x63,
+ 0x62, 0x6a, 0x31, 0x8a, 0x67, 0x8f, 0x55, 0xde,
+ 0xd4, 0xb6, 0x3f, 0x90, 0x10, 0x6c, 0xf6, 0x62,
+ 0x17, 0x23, 0x15, 0x7e, 0x33, 0x76, 0x65, 0xb5,
+ 0xee, 0x7b, 0x11, 0x76, 0xf5, 0xbe, 0xe0, 0xf2,
+ 0x57, 0x7a, 0x8c, 0x97, 0x0c, 0x68, 0xf5, 0xf8,
+ 0x41, 0xcf, 0x7f, 0x66, 0x53, 0xac, 0x31, 0x7d
+ };
+ static const PRUint8 rsa_exponent1[FIPS_RSA_EXPONENT1_LENGTH] = {
+ 0x93, 0x54, 0x14, 0x6e, 0x73, 0x9d, 0x4d, 0x4b,
+ 0xfa, 0x8c, 0xf8, 0xc8, 0x2f, 0x76, 0x22, 0xea,
+ 0x38, 0x80, 0x11, 0x8f, 0x05, 0xfc, 0x90, 0x44,
+ 0x3b, 0x50, 0x2a, 0x45, 0x3d, 0x4f, 0xaf, 0x02,
+ 0x7d, 0xc2, 0x7b, 0xa2, 0xd2, 0x31, 0x94, 0x5c,
+ 0x2e, 0xc3, 0xd4, 0x9f, 0x47, 0x09, 0x37, 0x6a,
+ 0xe3, 0x85, 0xf1, 0xa3, 0x0c, 0xd8, 0xf1, 0xb4,
+ 0x53, 0x7b, 0xc4, 0x71, 0x02, 0x86, 0x42, 0xbb,
+ 0x96, 0xff, 0x03, 0xa3, 0xb2, 0x67, 0x03, 0xea,
+ 0x77, 0x31, 0xfb, 0x4b, 0x59, 0x24, 0xf7, 0x07,
+ 0x59, 0xfb, 0xa9, 0xba, 0x1e, 0x26, 0x58, 0x97,
+ 0x66, 0xa1, 0x56, 0x49, 0x39, 0xb1, 0x2c, 0x55,
+ 0x0a, 0x6a, 0x78, 0x18, 0xba, 0xdb, 0xcf, 0xf4,
+ 0xf7, 0x32, 0x35, 0xa2, 0x04, 0xab, 0xdc, 0xa7,
+ 0x6d, 0xd9, 0xd5, 0x06, 0x6f, 0xec, 0x7d, 0x40,
+ 0x4c, 0xe8, 0x0e, 0xd0, 0xc9, 0xaa, 0xdf, 0x59
+ };
+ static const PRUint8 rsa_coefficient[FIPS_RSA_COEFFICIENT_LENGTH] = {
+ 0x17, 0xd7, 0xf5, 0x0a, 0xf0, 0x68, 0x97, 0x96,
+ 0xc4, 0x29, 0x18, 0x77, 0x9a, 0x1f, 0xe3, 0xf3,
+ 0x12, 0x13, 0x0f, 0x7e, 0x7b, 0xb9, 0xc1, 0x91,
+ 0xf9, 0xc7, 0x08, 0x56, 0x5c, 0xa4, 0xbc, 0x83,
+ 0x71, 0xf9, 0x78, 0xd9, 0x2b, 0xec, 0xfe, 0x6b,
+ 0xdc, 0x2f, 0x63, 0xc9, 0xcd, 0x50, 0x14, 0x5b,
+ 0xd3, 0x6e, 0x85, 0x4d, 0x0c, 0xa2, 0x0b, 0xa0,
+ 0x09, 0xb6, 0xca, 0x34, 0x9c, 0xc2, 0xc1, 0x4a,
+ 0xb0, 0xbc, 0x45, 0x93, 0xa5, 0x7e, 0x99, 0xb5,
+ 0xbd, 0xe4, 0x69, 0x29, 0x08, 0x28, 0xd2, 0xcd,
+ 0xab, 0x24, 0x78, 0x48, 0x41, 0x26, 0x0b, 0x37,
+ 0xa3, 0x43, 0xd1, 0x95, 0x1a, 0xd6, 0xee, 0x22,
+ 0x1c, 0x00, 0x0b, 0xc2, 0xb7, 0xa4, 0xa3, 0x21,
+ 0xa9, 0xcd, 0xe4, 0x69, 0xd3, 0x45, 0x02, 0xb1,
+ 0xb7, 0x3a, 0xbf, 0x51, 0x35, 0x1b, 0x78, 0xc2,
+ 0xcf, 0x0c, 0x0d, 0x60, 0x09, 0xa9, 0x44, 0x02
+ };
+
+ /* RSA Known Plaintext Message (1024-bits). */
+ static const PRUint8 rsa_known_plaintext_msg[FIPS_RSA_MESSAGE_LENGTH] = {
+ "Known plaintext message utilized"
+ "for RSA Encryption & Decryption"
+ "blocks SHA256, SHA384 and "
+ "SHA512 RSA Signature KAT tests. "
+ "Known plaintext message utilized"
+ "for RSA Encryption & Decryption"
+ "blocks SHA256, SHA384 and "
+ "SHA512 RSA Signature KAT tests."
+ };
+
+ /* RSA Known Ciphertext (2048-bits). */
+ static const PRUint8 rsa_known_ciphertext[] = {
+ 0x04, 0x12, 0x46, 0xe3, 0x6a, 0xee, 0xde, 0xdd,
+ 0x49, 0xa1, 0xd9, 0x83, 0xf7, 0x35, 0xf9, 0x70,
+ 0x88, 0x03, 0x2d, 0x01, 0x8b, 0xd1, 0xbf, 0xdb,
+ 0xe5, 0x1c, 0x85, 0xbe, 0xb5, 0x0b, 0x48, 0x45,
+ 0x7a, 0xf0, 0xa0, 0xe3, 0xa2, 0xbb, 0x4b, 0xf6,
+ 0x27, 0xd0, 0x1b, 0x12, 0xe3, 0x77, 0x52, 0x34,
+ 0x9e, 0x8e, 0x03, 0xd2, 0xf8, 0x79, 0x6e, 0x39,
+ 0x79, 0x53, 0x3c, 0x44, 0x14, 0x94, 0xbb, 0x8d,
+ 0xaa, 0x14, 0x44, 0xa0, 0x7b, 0xa5, 0x8c, 0x93,
+ 0x5f, 0x99, 0xa4, 0xa3, 0x6e, 0x7a, 0x38, 0x40,
+ 0x78, 0xfa, 0x36, 0x91, 0x5e, 0x9a, 0x9c, 0xba,
+ 0x1e, 0xd4, 0xf9, 0xda, 0x4b, 0x0f, 0xa8, 0xa3,
+ 0x1c, 0xf3, 0x3a, 0xd1, 0xa5, 0xb4, 0x51, 0x16,
+ 0xed, 0x4b, 0xcf, 0xec, 0x93, 0x7b, 0x90, 0x21,
+ 0xbc, 0x3a, 0xf4, 0x0b, 0xd1, 0x3a, 0x2b, 0xba,
+ 0xa6, 0x7d, 0x5b, 0x53, 0xd8, 0x64, 0xf9, 0x29,
+ 0x7b, 0x7f, 0x77, 0x3e, 0x51, 0x4c, 0x9a, 0x94,
+ 0xd2, 0x4b, 0x4a, 0x8d, 0x61, 0x74, 0x97, 0xae,
+ 0x53, 0x6a, 0xf4, 0x90, 0xc2, 0x2c, 0x49, 0xe2,
+ 0xfa, 0xeb, 0x91, 0xc5, 0xe5, 0x83, 0x13, 0xc9,
+ 0x44, 0x4b, 0x95, 0x2c, 0x57, 0x70, 0x15, 0x5c,
+ 0x64, 0x8d, 0x1a, 0xfd, 0x2a, 0xc7, 0xb2, 0x9c,
+ 0x5c, 0x99, 0xd3, 0x4a, 0xfd, 0xdd, 0xf6, 0x82,
+ 0x87, 0x8c, 0x5a, 0xc4, 0xa8, 0x0d, 0x2a, 0xef,
+ 0xc3, 0xa2, 0x7e, 0x8e, 0x67, 0x9f, 0x6f, 0x63,
+ 0xdb, 0xbb, 0x1d, 0x31, 0xc4, 0xbb, 0xbc, 0x13,
+ 0x3f, 0x54, 0xc6, 0xf6, 0xc5, 0x28, 0x32, 0xab,
+ 0x96, 0x42, 0x10, 0x36, 0x40, 0x92, 0xbb, 0x57,
+ 0x55, 0x38, 0xf5, 0x43, 0x7e, 0x43, 0xc4, 0x65,
+ 0x47, 0x64, 0xaa, 0x0f, 0x4c, 0xe9, 0x49, 0x16,
+ 0xec, 0x6a, 0x50, 0xfd, 0x14, 0x49, 0xca, 0xdb,
+ 0x44, 0x54, 0xca, 0xbe, 0xa3, 0x0e, 0x5f, 0xef
+ };
+
+ static const RSAPublicKey bl_public_key = {
+ NULL,
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+ FIPS_RSA_MODULUS_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+ FIPS_RSA_PUBLIC_EXPONENT_LENGTH }
+ };
+ static const RSAPrivateKey bl_private_key = {
+ NULL,
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_version,
+ FIPS_RSA_PRIVATE_VERSION_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+ FIPS_RSA_MODULUS_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+ FIPS_RSA_PUBLIC_EXPONENT_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_private_exponent,
+ FIPS_RSA_PRIVATE_EXPONENT_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_prime0,
+ FIPS_RSA_PRIME0_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_prime1,
+ FIPS_RSA_PRIME1_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent0,
+ FIPS_RSA_EXPONENT0_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent1,
+ FIPS_RSA_EXPONENT1_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_coefficient,
+ FIPS_RSA_COEFFICIENT_LENGTH }
+ };
+
+ /* RSA variables. */
+ SECStatus rsa_status;
+ RSAPublicKey rsa_public_key;
+ RSAPrivateKey rsa_private_key;
+
+ PRUint8 rsa_computed_ciphertext[FIPS_RSA_ENCRYPT_LENGTH];
+ PRUint8 rsa_computed_plaintext[FIPS_RSA_DECRYPT_LENGTH];
+
+ rsa_public_key = bl_public_key;
+ rsa_private_key = bl_private_key;
+
+ /**************************************************/
+ /* RSA Single-Round Known Answer Encryption Test. */
+ /**************************************************/
+
+ /* Perform RSA Public Key Encryption. */
+ rsa_status = RSA_PublicKeyOp(&rsa_public_key,
+ rsa_computed_ciphertext,
+ rsa_known_plaintext_msg);
+
+ if ((rsa_status != SECSuccess) ||
+ (PORT_Memcmp(rsa_computed_ciphertext, rsa_known_ciphertext,
+ FIPS_RSA_ENCRYPT_LENGTH) != 0))
+ goto rsa_loser;
+
+ /**************************************************/
+ /* RSA Single-Round Known Answer Decryption Test. */
+ /**************************************************/
+
+ /* Perform RSA Private Key Decryption. */
+ rsa_status = RSA_PrivateKeyOp(&rsa_private_key,
+ rsa_computed_plaintext,
+ rsa_known_ciphertext);
+
+ if ((rsa_status != SECSuccess) ||
+ (PORT_Memcmp(rsa_computed_plaintext, rsa_known_plaintext_msg,
+ FIPS_RSA_DECRYPT_LENGTH) != 0))
+ goto rsa_loser;
+
+ return (SECSuccess);
+
+rsa_loser:
+
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+}
+
+static SECStatus
+freebl_fips_ECDSA_Test(ECParams *ecparams,
+ const PRUint8 *knownSignature,
+ unsigned int knownSignatureLen)
+{
+
+ /* ECDSA Known Seed info for curves nistp256 and nistk283 */
+ static const PRUint8 ecdsa_Known_Seed[] = {
+ 0x6a, 0x9b, 0xf6, 0xf7, 0xce, 0xed, 0x79, 0x11,
+ 0xf0, 0xc7, 0xc8, 0x9a, 0xa5, 0xd1, 0x57, 0xb1,
+ 0x7b, 0x5a, 0x3b, 0x76, 0x4e, 0x7b, 0x7c, 0xbc,
+ 0xf2, 0x76, 0x1c, 0x1c, 0x7f, 0xc5, 0x53, 0x2f
+ };
+
+ static const PRUint8 msg[] = {
+ "Firefox and ThunderBird are awesome!"
+ };
+
+ unsigned char sha256[SHA256_LENGTH]; /* SHA-256 hash (256 bits) */
+ unsigned char sig[2 * MAX_ECKEY_LEN];
+ SECItem signature, digest;
+ ECPrivateKey *ecdsa_private_key = NULL;
+ ECPublicKey ecdsa_public_key;
+ SECStatus ecdsaStatus = SECSuccess;
+
+ /* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+ ecdsaStatus = EC_NewKeyFromSeed(ecparams, &ecdsa_private_key,
+ ecdsa_Known_Seed,
+ sizeof(ecdsa_Known_Seed));
+ if (ecdsaStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /* construct public key from private key. */
+ ecdsa_public_key.ecParams = ecdsa_private_key->ecParams;
+ ecdsa_public_key.publicValue = ecdsa_private_key->publicValue;
+
+ /* validate public key value */
+ ecdsaStatus = EC_ValidatePublicKey(&ecdsa_public_key.ecParams,
+ &ecdsa_public_key.publicValue);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ /* validate public key value */
+ ecdsaStatus = EC_ValidatePublicKey(&ecdsa_private_key->ecParams,
+ &ecdsa_private_key->publicValue);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ /***************************************************/
+ /* ECDSA Single-Round Known Answer Signature Test. */
+ /***************************************************/
+
+ ecdsaStatus = SHA256_HashBuf(sha256, msg, sizeof msg);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+ digest.type = siBuffer;
+ digest.data = sha256;
+ digest.len = SHA256_LENGTH;
+
+ memset(sig, 0, sizeof sig);
+ signature.type = siBuffer;
+ signature.data = sig;
+ signature.len = sizeof sig;
+
+ ecdsaStatus = ECDSA_SignDigestWithSeed(ecdsa_private_key, &signature,
+ &digest, ecdsa_Known_Seed, sizeof ecdsa_Known_Seed);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ if ((signature.len != knownSignatureLen) ||
+ (PORT_Memcmp(signature.data, knownSignature,
+ knownSignatureLen) != 0)) {
+ ecdsaStatus = SECFailure;
+ goto loser;
+ }
+
+ /******************************************************/
+ /* ECDSA Single-Round Known Answer Verification Test. */
+ /******************************************************/
+
+ /* Perform ECDSA verification process. */
+ ecdsaStatus = ECDSA_VerifyDigest(&ecdsa_public_key, &signature, &digest);
+
+loser:
+ /* free the memory for the private key arena*/
+ PORT_FreeArena(ecdsa_private_key->ecParams.arena, PR_FALSE);
+
+ if (ecdsaStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_ECDH_Test(ECParams *ecparams)
+{
+
+ /* ECDH Known result (reused old CAVS vector) */
+ static const PRUint8 ecdh_known_pub_key_1[] = {
+ EC_POINT_FORM_UNCOMPRESSED,
+ /* pubX */
+ 0x16, 0x81, 0x32, 0x86, 0xc8, 0xe4, 0x3a, 0x1f,
+ 0x5d, 0xe3, 0x06, 0x22, 0x8b, 0x99, 0x14, 0x25,
+ 0xf7, 0x9c, 0x5b, 0x1e, 0x96, 0x84, 0x85, 0x3b,
+ 0x17, 0xfe, 0xf3, 0x1c, 0x0e, 0xed, 0xc4, 0xce,
+ /* pubY */
+ 0x7a, 0x44, 0xfe, 0xbd, 0x91, 0x71, 0x7d, 0x73,
+ 0xd9, 0x45, 0xea, 0xae, 0x66, 0x78, 0xfa, 0x6e,
+ 0x46, 0xcd, 0xfa, 0x95, 0x15, 0x47, 0x62, 0x5d,
+ 0xbb, 0x1b, 0x9f, 0xe6, 0x39, 0xfc, 0xfd, 0x47
+ };
+ static const PRUint8 ecdh_known_priv_key_2[] = {
+ 0xb4, 0x2a, 0xe3, 0x69, 0x19, 0xec, 0xf0, 0x42,
+ 0x6d, 0x45, 0x8c, 0x94, 0x4a, 0x26, 0xa7, 0x5c,
+ 0xea, 0x9d, 0xd9, 0x0f, 0x59, 0xe0, 0x1a, 0x9d,
+ 0x7c, 0xb7, 0x1c, 0x04, 0x53, 0xb8, 0x98, 0x5a
+ };
+ static const PRUint8 ecdh_known_hash_result[] = {
+ 0x16, 0xf3, 0x85, 0xa2, 0x41, 0xf3, 0x7f, 0xc4,
+ 0x0b, 0x56, 0x47, 0xee, 0xa7, 0x74, 0xb9, 0xdb,
+ 0xe1, 0xfa, 0x22, 0xe9, 0x04, 0xf1, 0xb6, 0x12,
+ 0x4b, 0x44, 0x8a, 0xbb, 0xbc, 0x08, 0x2b, 0xa7
+ };
+
+ SECItem ecdh_priv_2, ecdh_pub_1;
+ SECItem ZZ = { 0, 0, 0 };
+ SECStatus ecdhStatus = SECSuccess;
+ PRUint8 computed_hash_result[HASH_LENGTH_MAX];
+
+ ecdh_priv_2.data = (PRUint8 *)ecdh_known_priv_key_2;
+ ecdh_priv_2.len = sizeof(ecdh_known_priv_key_2);
+ ecdh_pub_1.data = (PRUint8 *)ecdh_known_pub_key_1;
+ ecdh_pub_1.len = sizeof(ecdh_known_pub_key_1);
+
+ /* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+ ecdhStatus = ECDH_Derive(&ecdh_pub_1, ecparams, &ecdh_priv_2, PR_FALSE, &ZZ);
+ if (ecdhStatus != SECSuccess) {
+ goto loser;
+ }
+ ecdhStatus = SHA256_HashBuf(computed_hash_result, ZZ.data, ZZ.len);
+ if (ecdhStatus != SECSuccess) {
+ goto loser;
+ }
+
+ if (PORT_Memcmp(computed_hash_result, ecdh_known_hash_result,
+ sizeof(ecdh_known_hash_result)) != 0) {
+ ecdhStatus = SECFailure;
+ goto loser;
+ }
+
+loser:
+ if (ZZ.data) {
+ SECITEM_FreeItem(&ZZ, PR_FALSE);
+ }
+
+ if (ecdhStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_EC_PowerUpSelfTest()
+{
+
+ /* EC Known curve nistp256 == ECCCurve_X9_62_PRIME_256V1 params */
+ static const unsigned char p256_prime[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+ };
+ static const unsigned char p256_a[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC
+ };
+ static const unsigned char p256_b[] = {
+ 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, 0x76,
+ 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, 0x3B, 0xCE,
+ 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B
+ };
+ static const unsigned char p256_base[] = {
+ 0x04,
+ 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63,
+ 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1,
+ 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96,
+ 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C,
+ 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6,
+ 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5
+ };
+ static const unsigned char p256_order[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9,
+ 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+ };
+ static const unsigned char p256_encoding[] = {
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07
+ };
+ static const ECParams ec_known_P256_Params = {
+ NULL,
+ ec_params_named, /* arena, type */
+ /* fieldID */
+ { 256, ec_field_GFp, /* size and type */
+ { { siBuffer, (unsigned char *)p256_prime, sizeof(p256_prime) } }, /* u.prime */
+ 0,
+ 0,
+ 0 },
+ /* curve */
+ { /* a = curvea b = curveb */
+ /* curve.a */
+ { siBuffer, (unsigned char *)p256_a, sizeof(p256_a) },
+ /* curve.b */
+ { siBuffer, (unsigned char *)p256_b, sizeof(p256_b) },
+ /* curve.seed */
+ { siBuffer, NULL, 0 } },
+ /* base = 04xy*/
+ { siBuffer, (unsigned char *)p256_base, sizeof(p256_base) },
+ /* order */
+ { siBuffer, (unsigned char *)p256_order, sizeof(p256_order) },
+ 1, /* cofactor */
+ /* DEREncoding */
+ { siBuffer, (unsigned char *)p256_encoding, sizeof(p256_encoding) },
+ ECCurve_X9_62_PRIME_256V1,
+ /* curveOID */
+ { siBuffer, (unsigned char *)(p256_encoding) + 2, sizeof(p256_encoding) - 2 },
+ };
+
+ static const PRUint8 ecdsa_known_P256_signature[] = {
+ 0x07, 0xb1, 0xcb, 0x57, 0x20, 0xa7, 0x10, 0xd6,
+ 0x9d, 0x37, 0x4b, 0x1c, 0xdc, 0x35, 0x90, 0xff,
+ 0x1a, 0x2d, 0x98, 0x95, 0x1b, 0x2f, 0xeb, 0x7f,
+ 0xbb, 0x81, 0xca, 0xc0, 0x69, 0x75, 0xea, 0xc5,
+ 0xa7, 0xd2, 0x20, 0xdd, 0x45, 0xf9, 0x2b, 0xdd,
+ 0xda, 0x98, 0x99, 0x5b, 0x1c, 0x02, 0x3a, 0x27,
+ 0x8b, 0x7d, 0xb6, 0xed, 0x0e, 0xe0, 0xa7, 0xac,
+ 0xaa, 0x36, 0x2c, 0xfa, 0x1a, 0xdf, 0x0d, 0xe1
+ };
+
+ ECParams ecparams;
+
+ SECStatus rv;
+
+ /* ECDSA GF(p) prime field curve test */
+ ecparams = ec_known_P256_Params;
+ rv = freebl_fips_ECDSA_Test(&ecparams,
+ ecdsa_known_P256_signature,
+ sizeof ecdsa_known_P256_signature);
+ if (rv != SECSuccess) {
+ return (SECFailure);
+ }
+ /* ECDH GF(p) prime field curve test */
+ rv = freebl_fips_ECDH_Test(&ecparams);
+ if (rv != SECSuccess) {
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_DH_PowerUpSelfTest(void)
+{
+ /* DH Known P (2048-bits) */
+ static const PRUint8 dh_known_P[] = {
+ 0xc2, 0x79, 0xbb, 0x76, 0x32, 0x0d, 0x43, 0xfd,
+ 0x1b, 0x8c, 0xa2, 0x3c, 0x00, 0xdd, 0x6d, 0xef,
+ 0xf8, 0x1a, 0xd9, 0xc1, 0xa2, 0xf5, 0x73, 0x2b,
+ 0xdb, 0x1a, 0x3e, 0x84, 0x90, 0xeb, 0xe7, 0x8e,
+ 0x5f, 0x5c, 0x6b, 0xb6, 0x61, 0x89, 0xd1, 0x03,
+ 0xb0, 0x5f, 0x91, 0xe4, 0xd2, 0x82, 0x90, 0xfc,
+ 0x3c, 0x49, 0x69, 0x59, 0xc1, 0x51, 0x6a, 0x85,
+ 0x71, 0xe7, 0x5d, 0x72, 0x5a, 0x45, 0xad, 0x01,
+ 0x6f, 0x82, 0xae, 0xec, 0x91, 0x08, 0x2e, 0x7c,
+ 0x64, 0x93, 0x46, 0x1c, 0x68, 0xef, 0xc2, 0x03,
+ 0x28, 0x1d, 0x75, 0x3a, 0xeb, 0x9c, 0x46, 0xf0,
+ 0xc9, 0xdb, 0x99, 0x95, 0x13, 0x66, 0x4d, 0xd5,
+ 0x1a, 0x78, 0x92, 0x51, 0x89, 0x72, 0x28, 0x7f,
+ 0x20, 0x70, 0x41, 0x49, 0xa2, 0x86, 0xe9, 0xf9,
+ 0x78, 0x5f, 0x8d, 0x2e, 0x5d, 0xfa, 0xdb, 0x57,
+ 0xd4, 0x71, 0xdf, 0x66, 0xe3, 0x9e, 0x88, 0x70,
+ 0xa4, 0x21, 0x44, 0x6a, 0xc7, 0xae, 0x30, 0x2c,
+ 0x9c, 0x1f, 0x91, 0x57, 0xc8, 0x24, 0x34, 0x2d,
+ 0x7a, 0x4a, 0x43, 0xc2, 0x5f, 0xab, 0x64, 0x2e,
+ 0xaa, 0x28, 0x32, 0x95, 0x42, 0x7b, 0xa0, 0xcc,
+ 0xdf, 0xfd, 0x22, 0xc8, 0x56, 0x84, 0xc1, 0x62,
+ 0x15, 0xb2, 0x77, 0x86, 0x81, 0xfc, 0xa5, 0x12,
+ 0x3c, 0xca, 0x28, 0x17, 0x8f, 0x03, 0x16, 0x6e,
+ 0xb8, 0x24, 0xfa, 0x1b, 0x15, 0x02, 0xfd, 0x8b,
+ 0xb6, 0x0a, 0x1a, 0xf7, 0x47, 0x41, 0xc5, 0x2b,
+ 0x37, 0x3e, 0xa1, 0xbf, 0x68, 0xda, 0x1c, 0x55,
+ 0x44, 0xc3, 0xee, 0xa1, 0x63, 0x07, 0x11, 0x3b,
+ 0x5f, 0x00, 0x84, 0xb4, 0xc4, 0xe4, 0xa7, 0x97,
+ 0x29, 0xf8, 0xce, 0xab, 0xfc, 0x27, 0x3e, 0x34,
+ 0xe4, 0xc7, 0x81, 0x52, 0x32, 0x0e, 0x27, 0x3c,
+ 0xa6, 0x70, 0x3f, 0x4a, 0x54, 0xda, 0xdd, 0x60,
+ 0x26, 0xb3, 0x6e, 0x45, 0x26, 0x19, 0x41, 0x6f
+ };
+
+ static const PRUint8 dh_known_Y_1[] = {
+ 0xb4, 0xc7, 0x85, 0xba, 0xa6, 0x98, 0xb3, 0x77,
+ 0x41, 0x2b, 0xd9, 0x9a, 0x72, 0x90, 0xa4, 0xac,
+ 0xc4, 0xf7, 0xc2, 0x23, 0x9a, 0x68, 0xe2, 0x7d,
+ 0x3a, 0x54, 0x45, 0x91, 0xc1, 0xd7, 0x8a, 0x17,
+ 0x54, 0xd3, 0x37, 0xaa, 0x0c, 0xcd, 0x0b, 0xe2,
+ 0xf2, 0x34, 0x0f, 0x17, 0xa8, 0x07, 0x88, 0xaf,
+ 0xed, 0xc1, 0x02, 0xd4, 0xdb, 0xdc, 0x0f, 0x22,
+ 0x51, 0x23, 0x40, 0xb9, 0x65, 0x6d, 0x39, 0xf4,
+ 0xe1, 0x8b, 0x57, 0x7d, 0xb6, 0xd3, 0xf2, 0x6b,
+ 0x02, 0xa9, 0x36, 0xf0, 0x0d, 0xe3, 0xdb, 0x9a,
+ 0xbf, 0x20, 0x00, 0x4d, 0xec, 0x6f, 0x68, 0x95,
+ 0xee, 0x59, 0x4e, 0x3c, 0xb6, 0xda, 0x7b, 0x19,
+ 0x08, 0x9a, 0xef, 0x61, 0x43, 0xf5, 0xfb, 0x25,
+ 0x70, 0x19, 0xc1, 0x5f, 0x0e, 0x0f, 0x6a, 0x63,
+ 0x44, 0xe9, 0xcf, 0x33, 0xce, 0x13, 0x4f, 0x34,
+ 0x3c, 0x94, 0x40, 0x8d, 0xf2, 0x65, 0x42, 0xef,
+ 0x70, 0x54, 0xdd, 0x5f, 0xc1, 0xd7, 0x0b, 0xa6,
+ 0x06, 0xd5, 0xa6, 0x47, 0xae, 0x2c, 0x1f, 0x5a,
+ 0xa6, 0xb3, 0xc1, 0x38, 0x3a, 0x3b, 0x60, 0x94,
+ 0xa2, 0x95, 0xab, 0xb2, 0x86, 0x82, 0xc5, 0x3b,
+ 0xb8, 0x6f, 0x3e, 0x55, 0x86, 0x84, 0xe0, 0x00,
+ 0xe5, 0xef, 0xca, 0x5c, 0xec, 0x7e, 0x38, 0x0f,
+ 0x82, 0xa2, 0xb1, 0xee, 0x48, 0x1b, 0x32, 0xbb,
+ 0x5a, 0x33, 0xa5, 0x01, 0xba, 0xca, 0xa6, 0x64,
+ 0x61, 0xb6, 0xe5, 0x5c, 0x0e, 0x5f, 0x2c, 0x66,
+ 0x0d, 0x01, 0x6a, 0x20, 0x04, 0x70, 0x68, 0x82,
+ 0x93, 0x29, 0x15, 0x3b, 0x7a, 0x06, 0xb2, 0x92,
+ 0x61, 0xcd, 0x7e, 0xa4, 0xc1, 0x15, 0x64, 0x3b,
+ 0x3c, 0x51, 0x10, 0x4c, 0x87, 0xa6, 0xaf, 0x07,
+ 0xce, 0x46, 0x82, 0x75, 0xf3, 0x90, 0xf3, 0x21,
+ 0x55, 0x74, 0xc2, 0xe4, 0x96, 0x7d, 0xc3, 0xe6,
+ 0x33, 0xa5, 0xc6, 0x51, 0xef, 0xec, 0x90, 0x08
+ };
+
+ static const PRUint8 dh_known_x_2[] = {
+ 0x9e, 0x9b, 0xc3, 0x25, 0x53, 0xf9, 0xfc, 0x92,
+ 0xb6, 0xae, 0x54, 0x8e, 0x23, 0x4c, 0x94, 0xba,
+ 0x41, 0xe6, 0x29, 0x33, 0xb9, 0xdb, 0xff, 0x6d,
+ 0xa8, 0xb8, 0x48, 0x49, 0x66, 0x11, 0xa6, 0x13
+ };
+
+ static const PRUint8 dh_known_hash_result[] = {
+ 0x93, 0xa2, 0x89, 0x1c, 0x8a, 0xc3, 0x70, 0xbf,
+ 0xa7, 0xdf, 0xb6, 0xd7, 0x82, 0xfb, 0x87, 0x81,
+ 0x09, 0x47, 0xf3, 0x9f, 0x5a, 0xbf, 0x4f, 0x3f,
+ 0x8e, 0x5e, 0x06, 0xca, 0x30, 0xa7, 0xaf, 0x10
+ };
+
+ /* DH variables. */
+ SECStatus dhStatus;
+ SECItem dh_prime;
+ SECItem dh_pub_key_1;
+ SECItem dh_priv_key_2;
+ SECItem ZZ = { 0, 0, 0 };
+ PRUint8 computed_hash_result[HASH_LENGTH_MAX];
+
+ dh_prime.data = (PRUint8 *)dh_known_P;
+ dh_prime.len = sizeof(dh_known_P);
+ dh_pub_key_1.data = (PRUint8 *)dh_known_Y_1;
+ dh_pub_key_1.len = sizeof(dh_known_Y_1);
+ dh_priv_key_2.data = (PRUint8 *)dh_known_x_2;
+ dh_priv_key_2.len = sizeof(dh_known_x_2);
+
+ /* execute the derive */
+ dhStatus = DH_Derive(&dh_pub_key_1, &dh_prime, &dh_priv_key_2, &ZZ, dh_prime.len);
+ if (dhStatus != SECSuccess) {
+ goto loser;
+ }
+
+ dhStatus = SHA256_HashBuf(computed_hash_result, ZZ.data, ZZ.len);
+ if (dhStatus != SECSuccess) {
+ goto loser;
+ }
+
+ if (PORT_Memcmp(computed_hash_result, dh_known_hash_result,
+ sizeof(dh_known_hash_result)) != 0) {
+ dhStatus = SECFailure;
+ goto loser;
+ }
+
+loser:
+ if (ZZ.data) {
+ SECITEM_FreeItem(&ZZ, PR_FALSE);
+ }
+
+ if (dhStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RNG_PowerUpSelfTest(void)
+{
+ SECStatus rng_status = SECSuccess;
+
+ /*******************************************/
+ /* Run the SP 800-90 Health tests */
+ /*******************************************/
+ rng_status = PRNGTEST_RunHealthTests();
+ if (rng_status != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fipsSoftwareIntegrityTest(const char *libname)
+{
+ SECStatus rv = SECSuccess;
+
+ /* make sure that our check file signatures are OK */
+ if (!BLAPI_VerifySelf(libname)) {
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+#define DO_FREEBL 1
+#define DO_REST 2
+
+static SECStatus
+freebl_fipsPowerUpSelfTest(unsigned int tests)
+{
+ SECStatus rv;
+
+ /*
+ * stand alone freebl. Test hash, and rng
+ */
+ if (tests & DO_FREEBL) {
+
+ /* SHA-X Power-Up SelfTest(s). */
+ rv = freebl_fips_SHA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+ }
+
+ /*
+ * test the rest of the algorithms not accessed through freebl
+ * standalone */
+ if (tests & DO_REST) {
+
+ /* RNG Power-Up SelfTest(s). */
+ rv = freebl_fips_RNG_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* DES3 Power-Up SelfTest(s). */
+ rv = freebl_fips_DES3_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 128-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_128_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 192-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_192_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 256-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_256_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* HMAC SHA-X Power-Up SelfTest(s). */
+ rv = freebl_fips_HMAC_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* TLS PRF Power-Up SelfTest(s). */
+ rv = freebl_fips_TLS_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* NOTE: RSA can only be tested in full freebl. It requires access to
+ * the locking primitives */
+ /* RSA Power-Up SelfTest(s). */
+ rv = freebl_fips_RSA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* DH Power-Up SelfTest(s). */
+ rv = freebl_fips_DH_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* EC Power-Up SelfTest(s). */
+ rv = freebl_fips_EC_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+ }
+ /* Passed Power-Up SelfTest(s). */
+ return (SECSuccess);
+}
+
+/*
+ * state variables. NOTE: freebl has two uses: a standalone use which
+ * provided limitted access to the hash functions throught the NSSLOWHASH_
+ * interface and an joint use from softoken, using the function pointer
+ * table. The standalone use can operation without nspr or nss-util, while
+ * the joint use requires both to be loaded. Certain functions (like RSA)
+ * needs locking from NSPR, for instance.
+ *
+ * At load time, we need to handle the two uses separately. If nspr and
+ * nss-util are loaded, then we can run all the selftests, but if nspr and
+ * nss-util are not loaded, then we can't run all the selftests, and we need
+ * to prevent the softoken function pointer table from operating until the
+ * libraries are loaded and we try to use them.
+ */
+static PRBool self_tests_freebl_ran = PR_FALSE;
+static PRBool self_tests_ran = PR_FALSE;
+static PRBool self_tests_freebl_success = PR_FALSE;
+static PRBool self_tests_success = PR_FALSE;
+
+/*
+ * accessors for freebl
+ */
+PRBool
+BL_POSTRan(PRBool freebl_only)
+{
+ SECStatus rv;
+ /* if the freebl self tests didn't run, there is something wrong with
+ * our on load tests */
+ if (!self_tests_freebl_ran) {
+ return PR_FALSE;
+ }
+ /* if all the self tests have run, we are good */
+ if (self_tests_ran) {
+ return PR_TRUE;
+ }
+ /* if we only care about the freebl tests, we are good */
+ if (freebl_only) {
+ return PR_TRUE;
+ }
+ /* run the rest of the self tests */
+ /* We could get there if freebl was loaded without the rest of the support
+ * libraries, but now we want to use more than just a standalone freebl.
+ * This requires the other libraries to be loaded.
+ * If they are now loaded, Try to run the rest of the selftests,
+ * otherwise fail (disabling access to these algorithms) */
+ self_tests_ran = PR_TRUE;
+ BL_Init(); /* required by RSA */
+ RNG_RNGInit(); /* required by RSA */
+ rv = freebl_fipsPowerUpSelfTest(DO_REST);
+ if (rv == SECSuccess) {
+ self_tests_success = PR_TRUE;
+ }
+ return PR_TRUE;
+}
+
+#include "blname.c"
+
+/*
+ * This function is called at dll load time, the code tha makes this
+ * happen is platform specific on defined above.
+ */
+static void
+bl_startup_tests(void)
+{
+ const char *libraryName;
+ PRBool freebl_only = PR_FALSE;
+ SECStatus rv;
+
+ PORT_Assert(self_tests_freebl_ran == PR_FALSE);
+ PORT_Assert(self_tests_success == PR_FALSE);
+ self_tests_freebl_ran = PR_TRUE; /* we are running the tests */
+ self_tests_success = PR_FALSE; /* force it just in case */
+ self_tests_freebl_success = PR_FALSE; /* force it just in case */
+
+#ifdef FREEBL_NO_DEPEND
+ rv = FREEBL_InitStubs();
+ if (rv != SECSuccess) {
+ freebl_only = PR_TRUE;
+ }
+#endif
+
+ self_tests_freebl_ran = PR_TRUE; /* we are running the tests */
+
+ if (!freebl_only) {
+ self_tests_ran = PR_TRUE; /* we're running all the tests */
+ BL_Init(); /* needs to be called before RSA can be used */
+ RNG_RNGInit();
+ }
+
+ /* always run the post tests */
+ rv = freebl_fipsPowerUpSelfTest(freebl_only ? DO_FREEBL : DO_FREEBL | DO_REST);
+ if (rv != SECSuccess) {
+ return;
+ }
+
+ libraryName = getLibName();
+ rv = freebl_fipsSoftwareIntegrityTest(libraryName);
+ if (rv != SECSuccess) {
+ return;
+ }
+
+ /* posts are happy, allow the fips module to function now */
+ self_tests_freebl_success = PR_TRUE; /* we always test the freebl stuff */
+ if (!freebl_only) {
+ self_tests_success = PR_TRUE;
+ }
+}
+
+/*
+ * this is called from the freebl init entry points that controll access to
+ * all other freebl functions. This prevents freebl from operating if our
+ * power on selftest failed.
+ */
+SECStatus
+BL_FIPSEntryOK(PRBool freebl_only, PRBool rerun)
+{
+#ifdef NSS_NO_INIT_SUPPORT
+ /* this should only be set on platforms that can't handle one of the INIT
+ * schemes. This code allows those platforms to continue to function,
+ * though they don't meet the strict NIST requirements. If NSS_NO_INIT_SUPPORT
+ * is not set, and init support has not been properly enabled, freebl
+ * will always fail because of the test below
+ */
+ if (!self_tests_freebl_ran) {
+ bl_startup_tests();
+ }
+#endif
+ if (rerun) {
+ /* reset the flags */
+ self_tests_freebl_ran = PR_FALSE;
+ self_tests_success = PR_FALSE;
+ self_tests_success = PR_FALSE;
+ self_tests_freebl_success = PR_FALSE;
+ bl_startup_tests();
+ }
+ /* if the general self tests succeeded, we're done */
+ if (self_tests_success) {
+ return SECSuccess;
+ }
+ /* standalone freebl can initialize */
+ if (freebl_only && self_tests_freebl_success) {
+ return SECSuccess;
+ }
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
+#endif
diff --git a/security/nss/lib/freebl/freebl.def b/security/nss/lib/freebl/freebl.def
new file mode 100644
index 0000000000..164c843fdc
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.def
@@ -0,0 +1,26 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp
new file mode 100644
index 0000000000..14f213c1bf
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.gyp
@@ -0,0 +1,952 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+ 'includes': [
+ '../../coreconf/config.gypi'
+ ],
+ 'targets': [
+ {
+ 'target_name': 'intel-gcm-s_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'intel-aes.s',
+ 'intel-gcm.s',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'cc_is_clang==1 and force_integrated_as!=1', {
+ 'cflags': [
+ '-no-integrated-as',
+ ],
+ 'cflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ 'asflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'intel-gcm-wrap_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'intel-gcm-wrap.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ '(OS=="linux" or OS=="android") and target_arch=="x64"', {
+ 'dependencies': [
+ 'intel-gcm-s_lib',
+ ],
+ }],
+ ],
+ 'cflags': [
+ '-mssse3',
+ ],
+ 'cflags_mozilla': [
+ '-mssse3'
+ ],
+ },
+ {
+ 'target_name': 'hw-acc-crypto-avx',
+ 'type': 'static_library',
+ # 'sources': [
+ # All AVX hardware accelerated crypto currently requires x64
+ # ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'cflags': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2'
+ ],
+ 'cflags_mozilla': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2',
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ ],
+ # GCC doesn't define this.
+ 'defines': [
+ '__SSSE3__',
+ ],
+ }],
+ [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or \
+ OS=="netbsd" or OS=="openbsd"', {
+ 'cflags': [
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ ],
+ }],
+ # macOS build doesn't use cflags.
+ [ 'OS=="mac" or OS=="ios"', {
+ 'xcode_settings': {
+ 'OTHER_CFLAGS': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2',
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ ],
+ },
+ }],
+ [ 'target_arch=="arm"', {
+ # Gecko doesn't support non-NEON platform on Android, but tier-3
+ # platform such as Linux/arm will need it
+ 'cflags_mozilla': [
+ '-mfpu=neon'
+ ],
+ }],
+ [ 'target_arch=="x64"', {
+ 'sources': [
+ 'verified/Hacl_Poly1305_128.c',
+ 'verified/Hacl_Chacha20_Vec128.c',
+ 'verified/Hacl_Chacha20Poly1305_128.c',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'hw-acc-crypto-avx2',
+ 'type': 'static_library',
+ # 'sources': [
+ # All AVX2 hardware accelerated crypto currently requires x64
+ # ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'cflags': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2'
+ ],
+ 'cflags_mozilla': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2',
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ '-mavx2',
+ ],
+ # GCC doesn't define this.
+ 'defines': [
+ '__SSSE3__',
+ ],
+ }],
+ [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or \
+ OS=="netbsd" or OS=="openbsd"', {
+ 'cflags': [
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ '-mavx2',
+ ],
+ }],
+ # macOS build doesn't use cflags.
+ [ 'OS=="mac" or OS=="ios"', {
+ 'xcode_settings': {
+ 'OTHER_CFLAGS': [
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2',
+ '-mpclmul',
+ '-maes',
+ '-mavx',
+ '-mavx2',
+ ],
+ },
+ }],
+ [ 'target_arch=="arm"', {
+ # Gecko doesn't support non-NEON platform on Android, but tier-3
+ # platform such as Linux/arm will need it
+ 'cflags_mozilla': [
+ '-mfpu=neon'
+ ],
+ }],
+ [ 'target_arch=="x64"', {
+ 'sources': [
+ 'verified/Hacl_Poly1305_256.c',
+ 'verified/Hacl_Chacha20_Vec256.c',
+ 'verified/Hacl_Chacha20Poly1305_256.c',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'gcm-aes-x86_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'gcm-x86.c', 'aes-x86.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ # Enable isa option for pclmul and aes-ni; supported since gcc 4.4.
+ # This is only supported by x84/x64. It's not needed for Windows,
+ # unless clang-cl is used.
+ 'cflags_mozilla': [
+ '-mpclmul', '-maes'
+ ],
+ 'conditions': [
+ [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', {
+ 'cflags': [
+ '-mpclmul', '-maes'
+ ],
+ }],
+ # macOS build doesn't use cflags.
+ [ 'OS=="mac" or OS=="ios"', {
+ 'xcode_settings': {
+ 'OTHER_CFLAGS': [
+ '-mpclmul', '-maes'
+ ],
+ },
+ }]
+ ]
+ },
+ {
+ 'target_name': 'sha-x86_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'sha256-x86.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'cflags': [
+ '-msha',
+ '-mssse3',
+ '-msse4.1'
+ ],
+ 'cflags_mozilla': [
+ '-msha',
+ '-mssse3',
+ '-msse4.1'
+ ],
+ 'conditions': [
+ # macOS build doesn't use cflags.
+ [ 'OS=="mac" or OS=="ios"', {
+ 'xcode_settings': {
+ 'OTHER_CFLAGS': [
+ '-msha',
+ '-mssse3',
+ '-msse4.1'
+ ],
+ },
+ }]
+ ]
+ },
+ {
+ 'target_name': 'gcm-aes-arm32-neon_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'gcm-arm32-neon.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'cflags': [
+ '-march=armv7',
+ '-mfpu=neon',
+ '<@(softfp_cflags)',
+ ],
+ 'cflags_mozilla': [
+ '-mfpu=neon',
+ '<@(softfp_cflags)',
+ ]
+ },
+ {
+ 'target_name': 'gcm-aes-aarch64_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'gcm-aarch64.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'cflags': [
+ '-march=armv8-a+crypto'
+ ],
+ 'cflags_mozilla': [
+ '-march=armv8-a+crypto'
+ ]
+ },
+ {
+ 'target_name': 'gcm-aes-ppc_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'gcm-ppc.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'disable_crypto_vsx==0', {
+ 'cflags': [
+ '-mcrypto',
+ '-maltivec'
+ ],
+ 'cflags_mozilla': [
+ '-mcrypto',
+ '-maltivec'
+ ],
+ }, 'disable_crypto_vsx==1', {
+ 'cflags': [
+ '-maltivec'
+ ],
+ 'cflags_mozilla': [
+ '-maltivec'
+ ],
+ }],
+ [ 'ppc_abi==2', {
+ 'sources': [
+ 'sha512-p8.s',
+ ],
+ }],
+ ]
+ },
+ {
+ 'target_name': 'gcm-aes-ppc_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm.s',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'cc_is_clang==1 and force_integrated_as!=1', {
+ 'cflags': [
+ '-no-integrated-as',
+ ],
+ 'cflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ 'asflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': 'ppc-gcm-wrap-nodepend_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm-wrap.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'gcm-aes-ppc_lib',
+ ],
+ },
+ {
+ 'target_name': 'ppc-gcm-wrap_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'ppc-gcm-wrap.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'gcm-aes-ppc_lib',
+ ],
+ 'defines!': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ },
+ {
+ 'target_name': 'gcm-sha512-nodepend-ppc_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'sha512.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'disable_crypto_vsx==0', {
+ 'cflags': [
+ '-mcrypto',
+ '-maltivec',
+ '-mvsx',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ 'cflags_mozilla': [
+ '-mcrypto',
+ '-maltivec',
+ '-mvsx',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ }, 'disable_crypto_vsx==1', {
+ 'cflags': [
+ '-maltivec',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ 'cflags_mozilla': [
+ '-maltivec',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ }]
+ ]
+ },
+ {
+ 'target_name': 'gcm-sha512-ppc_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'sha512.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'disable_crypto_vsx==0', {
+ 'cflags': [
+ '-mcrypto',
+ '-maltivec',
+ '-mvsx',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ 'cflags_mozilla': [
+ '-mcrypto',
+ '-maltivec',
+ '-mvsx',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ }, 'disable_crypto_vsx==1', {
+ 'cflags': [
+ '-maltivec',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ 'cflags_mozilla': [
+ '-maltivec',
+ '-funroll-loops',
+ '-fpeel-loops'
+ ],
+ }]
+ ],
+ 'defines!': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ },
+ {
+ 'target_name': 'chacha20-ppc_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'chacha20poly1305-ppc.c',
+ 'chacha20-ppc64le.S',
+ ]
+ },
+ {
+ 'target_name': 'armv8_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'aes-armv8.c',
+ 'sha1-armv8.c',
+ 'sha256-armv8.c',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'conditions': [
+ [ 'target_arch=="arm"', {
+ 'cflags': [
+ '-march=armv8-a',
+ '-mfpu=crypto-neon-fp-armv8',
+ '<@(softfp_cflags)',
+ ],
+ 'cflags_mozilla': [
+ '-march=armv8-a',
+ '-mfpu=crypto-neon-fp-armv8',
+ '<@(softfp_cflags)',
+ ],
+ }, 'target_arch=="arm64" or target_arch=="aarch64"', {
+ 'cflags': [
+ '-march=armv8-a+crypto'
+ ],
+ 'cflags_mozilla': [
+ '-march=armv8-a+crypto'
+ ],
+ }]
+ ]
+ },
+ {
+ 'target_name': 'freebl',
+ 'type': 'static_library',
+ 'sources': [
+ 'loader.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ]
+ },
+ # Build a static freebl library so we can statically link it into
+ # the binary. This way we don't have to dlopen() the shared lib
+ # but can directly call freebl functions.
+ {
+ 'target_name': 'freebl_static',
+ 'type': 'static_library',
+ 'includes': [
+ 'freebl_base.gypi',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'hw-acc-crypto-avx',
+ 'hw-acc-crypto-avx2',
+ ],
+ 'conditions': [
+ [ 'target_arch=="ia32" or target_arch=="x64"', {
+ 'dependencies': [
+ 'gcm-aes-x86_c_lib',
+ ],
+ }, '(disable_arm_hw_aes==0 or disable_arm_hw_sha1==0 or disable_arm_hw_sha2==0) and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ 'dependencies': [
+ 'armv8_c_lib'
+ ],
+ }],
+ [ '(target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', {
+ 'dependencies': [
+ 'sha-x86_c_lib',
+ ],
+ }],
+ [ 'disable_arm32_neon==0 and target_arch=="arm"', {
+ 'dependencies': [
+ 'gcm-aes-arm32-neon_c_lib',
+ ],
+ }],
+ [ 'disable_arm32_neon==1 and target_arch=="arm"', {
+ 'defines!': [
+ 'NSS_DISABLE_ARM32_NEON',
+ ],
+ }],
+ [ 'target_arch=="arm64" or target_arch=="aarch64"', {
+ 'dependencies': [
+ 'gcm-aes-aarch64_c_lib',
+ ],
+ }],
+ [ 'disable_altivec==0 and target_arch=="ppc64"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-ppc_c_lib',
+ ],
+ }],
+ [ 'disable_altivec==0 and target_arch=="ppc64le"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-ppc_c_lib',
+ 'chacha20-ppc_lib',
+ 'ppc-gcm-wrap_c_lib',
+ ],
+ }],
+ [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', {
+ 'defines!': [
+ 'NSS_DISABLE_ALTIVEC',
+ ],
+ }],
+ [ 'disable_crypto_vsx==1 and (target_arch=="ppc" or target_arch=="ppc64" or target_arch=="ppc64le")', {
+ 'defines!': [
+ 'NSS_DISABLE_CRYPTO_VSX',
+ ],
+ }],
+ [ 'OS=="linux"', {
+ 'defines!': [
+ 'FREEBL_NO_DEPEND',
+ 'FREEBL_LOWHASH',
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ 'PPC_GCM',
+ ],
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ # The AES assembler code doesn't work in static builds.
+ # The linker complains about non-relocatable code, and I
+ # currently don't know how to fix this properly.
+ 'sources!': [
+ 'intel-aes.s',
+ 'intel-gcm.s',
+ ],
+ }],
+ ],
+ }],
+ ],
+ },
+ {
+ 'target_name': '<(freebl_name)',
+ 'type': 'shared_library',
+ 'includes': [
+ 'freebl_base.gypi',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'hw-acc-crypto-avx',
+ 'hw-acc-crypto-avx2',
+ ],
+ 'conditions': [
+ [ 'target_arch=="ia32" or target_arch=="x64"', {
+ 'dependencies': [
+ 'gcm-aes-x86_c_lib',
+ ]
+ }, 'target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64"', {
+ 'dependencies': [
+ 'armv8_c_lib',
+ ],
+ }],
+ [ '(target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', {
+ 'dependencies': [
+ 'sha-x86_c_lib',
+ ],
+ }],
+ [ 'disable_arm32_neon==0 and target_arch=="arm"', {
+ 'dependencies': [
+ 'gcm-aes-arm32-neon_c_lib',
+ ],
+ }],
+ [ 'disable_arm32_neon==1 and target_arch=="arm"', {
+ 'defines!': [
+ 'NSS_DISABLE_ARM32_NEON',
+ ],
+ }],
+ [ 'target_arch=="arm64" or target_arch=="aarch64"', {
+ 'dependencies': [
+ 'gcm-aes-aarch64_c_lib',
+ ],
+ }],
+ [ 'disable_altivec==0', {
+ 'conditions': [
+ [ 'target_arch=="ppc64"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-nodepend-ppc_c_lib',
+ ],
+ }, 'target_arch=="ppc64le"', {
+ 'dependencies': [
+ 'gcm-aes-ppc_c_lib',
+ 'gcm-sha512-nodepend-ppc_c_lib',
+ 'ppc-gcm-wrap-nodepend_c_lib',
+ ],
+ }],
+ ],
+ }],
+ [ 'disable_altivec==1 and (target_arch=="ppc64" or target_arch=="ppc64le")', {
+ 'defines!': [
+ 'NSS_DISABLE_ALTIVEC',
+ ],
+ }],
+ [ 'disable_crypto_vsx==1 and (target_arch=="ppc" or target_arch=="ppc64" or target_arch=="ppc64le")', {
+ 'defines!': [
+ 'NSS_DISABLE_CRYPTO_VSX',
+ ],
+ }],
+ [ 'OS!="linux"', {
+ 'conditions': [
+ [ 'moz_fold_libs==0', {
+ 'dependencies': [
+ '<(DEPTH)/lib/util/util.gyp:nssutil3',
+ ],
+ }, {
+ 'libraries': [
+ '<(moz_folded_library_name)',
+ ],
+ }],
+ ],
+ }],
+ [ '(OS=="linux" or OS=="android") and target_arch=="x64"', {
+ 'dependencies': [
+ 'intel-gcm-wrap_c_lib',
+ ],
+ }],
+ [ 'OS=="win" and (target_arch=="ia32" or target_arch=="x64") and cc_is_clang==1', {
+ 'dependencies': [
+ 'intel-gcm-wrap_c_lib',
+ ],
+ }],
+ [ 'OS=="linux"', {
+ 'sources': [
+ 'nsslowhash.c',
+ 'stubs.c',
+ ],
+ }],
+ ],
+ 'variables': {
+ 'conditions': [
+ [ 'OS=="linux"', {
+ 'mapfile': 'freebl_hash_vector.def',
+ }, {
+ 'mapfile': 'freebl.def',
+ }],
+ ]
+ },
+ },
+ {
+ 'target_name': 'freebl_64int_3',
+ 'includes': [
+ 'freebl_base.gypi',
+ ],
+ 'type': 'shared_library',
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'hw-acc-crypto-avx',
+ 'hw-acc-crypto-avx2',
+ ],
+ },
+ {
+ 'target_name': 'freebl_64fpu_3',
+ 'includes': [
+ 'freebl_base.gypi',
+ ],
+ 'type': 'shared_library',
+ 'sources': [
+ 'mpi/mpi_sparc.c',
+ 'mpi/mpv_sparcv9.s',
+ 'mpi/montmulfv9.s',
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ 'hw-acc-crypto-avx',
+ 'hw-acc-crypto-avx2',
+ ],
+ 'asflags_mozilla': [
+ '-mcpu=v9', '-Wa,-xarch=v9a'
+ ],
+ 'defines': [
+ 'MP_NO_MP_WORD',
+ 'MP_USE_UINT_DIGIT',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_USING_MONT_MULF',
+ 'MP_MONT_USE_MP_MUL',
+ ],
+ },
+ ],
+ 'conditions': [
+ [ 'OS=="linux"', {
+ # stub build
+ 'targets': [
+ {
+ 'target_name': 'freebl3',
+ 'type': 'shared_library',
+ 'defines': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ 'sources': [
+ 'lowhash_vector.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'variables': {
+ 'mapfile': 'freebl_hash.def'
+ }
+ },
+ ],
+ }],
+ ],
+ 'target_defaults': {
+ 'include_dirs': [
+ 'mpi',
+ 'ecl',
+ 'verified',
+ 'verified/internal',
+ 'verified/karamel/include',
+ 'verified/karamel/krmllib/dist/minimal',
+ 'deprecated',
+ ],
+ 'defines': [
+ 'SHLIB_SUFFIX=\"<(dll_suffix)\"',
+ 'SHLIB_PREFIX=\"<(dll_prefix)\"',
+ 'SHLIB_VERSION=\"3\"',
+ 'SOFTOKEN_SHLIB_VERSION=\"3\"',
+ 'RIJNDAEL_INCLUDE_TABLES',
+ 'MP_API_COMPATIBLE'
+ ],
+ 'conditions': [
+ [ 'OS=="win" and target_arch=="ia32"', {
+ 'msvs_settings': {
+ 'VCCLCompilerTool': {
+ #TODO: -Ox optimize flags
+ 'PreprocessorDefinitions': [
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ 'MP_USE_UINT_DIGIT',
+ 'MP_NO_MP_WORD',
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ },
+ },
+ }],
+ [ 'OS=="win" and target_arch=="x64"', {
+ 'msvs_settings': {
+ 'VCCLCompilerTool': {
+ #TODO: -Ox optimize flags
+ 'PreprocessorDefinitions': [
+ # Should be copied to mingw defines below
+ 'MP_IS_LITTLE_ENDIAN',
+ 'NSS_BEVAND_ARCFOUR',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ },
+ },
+ }],
+ [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="ia32" or target_arch=="x64") and disable_intel_hw_sha==0', {
+ 'defines': [
+ 'USE_HW_SHA2',
+ ],
+ }],
+ [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_aes==0', {
+ 'defines': [
+ 'USE_HW_AES',
+ ],
+ }],
+ [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha1==0', {
+ 'defines': [
+ 'USE_HW_SHA1',
+ ],
+ }],
+ [ '(OS=="win" or OS=="mac" or OS=="ios") and (target_arch=="arm64" or target_arch=="aarch64") and disable_arm_hw_sha2==0', {
+ 'defines': [
+ 'USE_HW_SHA2',
+ ],
+ }],
+ [ 'cc_use_gnu_ld==1 and OS=="win" and target_arch=="x64"', {
+ # mingw x64
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ ],
+ }],
+ # Poly1305_256 requires the flag to run
+ ['target_arch=="x64"', {
+ 'defines':[
+ 'HACL_CAN_COMPILE_VEC128',
+ 'HACL_CAN_COMPILE_VEC256',
+ 'HACL_CAN_COMPILE_INTRINSICS',
+ ],
+ }],
+ # MSVC has no __int128 type. Use emulated int128 and leave
+ # have_int128_support as-is for Curve25519 impl. selection.
+ [ 'have_int128_support==1 and (OS!="win" or cc_is_clang==1 or cc_is_gcc==1)', {
+ 'defines': [
+ # The Makefile does version-tests on GCC, but we're not doing that here.
+ 'HAVE_INT128_SUPPORT',
+ 'HACL_CAN_COMPILE_UINT128'
+ ],
+ }, {
+ 'defines': [
+ 'KRML_VERIFIED_UINT128',
+ ],
+ }],
+ [ 'OS=="linux"', {
+ 'defines': [
+ 'FREEBL_LOWHASH',
+ 'FREEBL_NO_DEPEND',
+ ],
+ 'conditions': [
+ [ 'disable_altivec==0 and target_arch=="ppc64le"', {
+ 'defines': [
+ 'PPC_GCM',
+ ],
+ }],
+ ],
+ }],
+ [ 'OS=="linux" or OS=="android"', {
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'NSS_BEVAND_ARCFOUR',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ ],
+ }],
+ [ 'target_arch=="x64"', {
+ 'defines': [
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ }],
+ [ 'target_arch=="ia32"', {
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ 'MP_USE_UINT_DIGIT',
+ ],
+ }],
+ [ 'target_arch=="arm"', {
+ 'defines': [
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_USE_UINT_DIGIT',
+ 'SHA_NO_LONG_LONG',
+ 'ARMHF',
+ ],
+ }],
+ [ 'disable_intel_hw_sha==0 and (target_arch=="ia32" or target_arch=="x64")', {
+ 'defines': [
+ 'USE_HW_SHA2',
+ ],
+ }],
+ [ 'disable_arm_hw_aes==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ 'defines': [
+ 'USE_HW_AES',
+ ],
+ }],
+ [ 'disable_arm_hw_sha1==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ 'defines': [
+ 'USE_HW_SHA1',
+ ],
+ }],
+ [ 'disable_arm_hw_sha2==0 and (target_arch=="arm" or target_arch=="arm64" or target_arch=="aarch64")', {
+ 'defines': [
+ 'USE_HW_SHA2',
+ ],
+ }],
+ ],
+ }],
+ ],
+ },
+ 'variables': {
+ 'module': 'nss',
+ 'conditions': [
+ [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
+ 'have_int128_support%': 1,
+ }, {
+ 'have_int128_support%': 0,
+ }],
+ [ 'target_arch=="arm"', {
+ # When the compiler uses the softfloat ABI, we want to use the compatible softfp ABI when enabling NEON for these objects.
+ # Confusingly, __SOFTFP__ is the name of the define for the softfloat ABI, not for the softfp ABI.
+ 'softfp_cflags': '<!(sh -c "${CC:-cc} -o - -E -dM - ${CFLAGS} < /dev/null | grep __SOFTFP__ > /dev/null && echo -mfloat-abi=softfp || true")',
+ }],
+ [ 'target_arch=="ppc64" or target_arch=="ppc64le"', {
+ 'ppc_abi': '<!(sh -c "${CC:-cc} -dM -E - < /dev/null | awk \'\\$2 == \\"_CALL_ELF\\" {print \\$3}\'")',
+ }],
+ ],
+ }
+}
diff --git a/security/nss/lib/freebl/freebl.rc b/security/nss/lib/freebl/freebl.rc
new file mode 100644
index 0000000000..444ae5d03f
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.rc
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "softkver.h"
+#include <winver.h>
+
+#define MY_LIBNAME "freebl"
+#define MY_FILEDESCRIPTION "NSS freebl Library"
+
+#define STRINGIZE(x) #x
+#define STRINGIZE2(x) STRINGIZE(x)
+#define SOFTOKEN_VMAJOR_STR STRINGIZE2(SOFTOKEN_VMAJOR)
+
+#ifdef _DEBUG
+#define MY_DEBUG_STR " (debug)"
+#define MY_FILEFLAGS_1 VS_FF_DEBUG
+#else
+#define MY_DEBUG_STR ""
+#define MY_FILEFLAGS_1 0x0L
+#endif
+#if SOFTOKEN_BETA
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1|VS_FF_PRERELEASE
+#else
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1
+#endif
+
+#ifdef WINNT
+#define MY_FILEOS VOS_NT_WINDOWS32
+#else
+#define MY_FILEOS VOS__WINDOWS32
+#endif
+
+#define MY_INTERNAL_NAME MY_LIBNAME SOFTOKEN_VMAJOR_STR
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version-information resource
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ PRODUCTVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS MY_FILEFLAGS_2
+ FILEOS MY_FILEOS
+ FILETYPE VFT_DLL
+ FILESUBTYPE 0x0L // not used
+
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904B0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "CompanyName", "Mozilla Foundation\0"
+ VALUE "FileDescription", MY_FILEDESCRIPTION MY_DEBUG_STR "\0"
+ VALUE "FileVersion", SOFTOKEN_VERSION "\0"
+ VALUE "InternalName", MY_INTERNAL_NAME "\0"
+ VALUE "OriginalFilename", MY_INTERNAL_NAME ".dll\0"
+ VALUE "ProductName", "Network Security Services\0"
+ VALUE "ProductVersion", SOFTOKEN_VERSION "\0"
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi
new file mode 100644
index 0000000000..8ef46da630
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_base.gypi
@@ -0,0 +1,249 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+ 'sources': [
+ 'aeskeywrap.c',
+ 'cmac.c',
+ 'alghmac.c',
+ 'arcfive.c',
+ 'arcfour.c',
+ 'blake2b.c',
+ 'camellia.c',
+ 'chacha20poly1305.c',
+ 'crypto_primitives.c',
+ 'ctr.c',
+ 'cts.c',
+ 'des.c',
+ 'desblapi.c',
+ 'dh.c',
+ 'drbg.c',
+ 'dsa.c',
+ 'ec.c',
+ 'ecdecode.c',
+ 'ecl/ec_naf.c',
+ 'ecl/ecl.c',
+ 'ecl/ecl_gf.c',
+ 'ecl/ecl_mult.c',
+ 'ecl/ecp_25519.c',
+ 'ecl/ecp_256.c',
+ 'ecl/ecp_256_32.c',
+ 'ecl/ecp_384.c',
+ 'ecl/ecp_521.c',
+ 'ecl/ecp_aff.c',
+ 'ecl/ecp_jac.c',
+ 'ecl/ecp_jm.c',
+ 'ecl/ecp_mont.c',
+ 'ecl/ecp_secp256r1.c',
+ 'ecl/ecp_secp384r1.c',
+ 'ecl/ecp_secp384r1_wrap.c',
+ 'ecl/ecp_secp521r1.c',
+ 'ecl/ecp_secp521r1_wrap.c',
+ 'verified/Hacl_P256.c',
+ 'verified/Hacl_P384.c',
+ 'verified/Hacl_P521.c',
+ 'fipsfreebl.c',
+ 'blinit.c',
+ 'freeblver.c',
+ 'gcm.c',
+ 'hmacct.c',
+ 'jpake.c',
+ 'kyber.c',
+ 'kyber-pqcrystals-ref.c',
+ 'ldvector.c',
+ 'md2.c',
+ 'md5.c',
+ 'mpi/mp_gf2m.c',
+ 'mpi/mpcpucache.c',
+ 'mpi/mpi.c',
+ 'mpi/mplogic.c',
+ 'mpi/mpmontg.c',
+ 'mpi/mpprime.c',
+ 'pqg.c',
+ 'rawhash.c',
+ 'rijndael.c',
+ 'rsa.c',
+ 'rsa_blind.c',
+ 'rsapkcs.c',
+ 'sha_fast.c',
+ 'shvfy.c',
+ 'sysrand.c',
+ 'tlsprfalg.c',
+ 'secmpi.c',
+ 'verified/Hacl_Hash_SHA3.c',
+ 'sha3.c',
+ 'shake.c',
+ ],
+ 'defines': [
+ # For kyber-pqcrystals-ref.c. If we ever decide to support Kyber512 or
+ # Kyber1024, we'll need to build separate static libraries with different
+ # values of KYBER_K.
+ 'KYBER_K=3',
+ ],
+ 'conditions': [
+ [ 'OS=="linux" or OS=="android"', {
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'sources': [
+ 'arcfour-amd64-gas.s',
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_common.S',
+ 'mpi/mp_comba.c',
+ ],
+ 'conditions': [
+ [ 'cc_is_clang==1 and fuzz!=1 and coverage!=1 and force_integrated_as!=1', {
+ 'cflags': [
+ '-no-integrated-as',
+ ],
+ 'cflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ 'asflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ }],
+ ],
+ }],
+ [ 'target_arch=="ia32"', {
+ 'sources': [
+ 'mpi/mpi_x86.s',
+ ],
+ }],
+ [ 'target_arch=="arm"', {
+ 'sources': [
+ 'mpi/mpi_arm.c',
+ ],
+ }],
+ [ 'target_arch=="ppc64le"', {
+ 'sources': [
+ 'chacha20poly1305-ppc.c',
+ 'chacha20-ppc64le.S',
+ ],
+ }]
+ ],
+ }],
+ [ 'OS=="win"', {
+ 'libraries': [
+ '-ladvapi32',
+ ],
+ 'conditions': [
+ [ 'cc_use_gnu_ld!=1 and target_arch=="x64"', {
+ 'sources': [
+ 'arcfour-amd64-masm.asm',
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_masm.asm',
+ 'mpi/mp_comba_amd64_masm.asm',
+ 'intel-aes-x64-masm.asm',
+ 'intel-gcm-x64-masm.asm',
+ ],
+ }],
+ [ 'cc_use_gnu_ld!=1 and target_arch=="ia32"', {
+ 'sources': [
+ 'mpi/mpi_x86_asm.c',
+ 'intel-aes-x86-masm.asm',
+ 'intel-gcm-x86-masm.asm',
+ ],
+ }],
+ [ 'cc_use_gnu_ld==1', {
+ # mingw
+ 'sources': [
+ ],
+ }],
+ [ 'cc_is_clang!=1', {
+ # MSVC
+ 'sources': [
+ 'intel-gcm-wrap.c',
+ ],
+ }],
+ ],
+ }],
+ ['have_int128_support==1', {
+ 'sources': [
+ # All intel x64 and 64-bit ARM architectures get the 64 bit version.
+ 'ecl/curve25519_64.c',
+ 'verified/Hacl_Curve25519_51.c',
+ ],
+ }, {
+ 'sources': [
+ # All other architectures get the generic 32 bit implementation.
+ 'ecl/curve25519_32.c',
+ ],
+ }],
+ ['(target_arch!="ppc64" and target_arch!="ppc64le") or disable_altivec==1', {
+ 'sources': [
+ # Gyp does not support per-file cflags, so working around like this.
+ # ppc performance greatly benefits from specific flags.
+ 'sha512.c',
+ ],
+ }],
+ [ 'disable_chachapoly==0', {
+ # The ChaCha20 code is linked in through the static ssse3-crypto lib on
+ # all platforms that support SSSE3. There are runtime checks in place to
+ # choose the correct ChaCha implementation at runtime.
+ 'sources': [
+ 'verified/Hacl_Chacha20.c',
+ 'verified/Hacl_Chacha20Poly1305_32.c',
+ 'verified/Hacl_Poly1305_32.c',
+ ],
+ }],
+ [ 'disable_deprecated_seed==0', {
+ 'sources': [
+ 'deprecated/seed.c',
+ ],
+ }],
+ [ 'disable_deprecated_rc2==0', {
+ 'sources': [
+ 'deprecated/alg2268.c',
+ ],
+ }],
+ [ 'fuzz==1', {
+ 'sources!': [ 'drbg.c' ],
+ 'sources': [ 'det_rng.c' ],
+ }],
+ [ 'fuzz_tls==1', {
+ 'defines': [
+ 'UNSAFE_FUZZER_MODE',
+ ],
+ }],
+ [ 'ct_verif==1', {
+ 'defines': [
+ 'CT_VERIF',
+ ],
+ }],
+ [ 'only_dev_random==1', {
+ 'defines': [
+ 'SEED_ONLY_DEV_URANDOM',
+ ]
+ }],
+ [ 'OS=="mac"', {
+ 'conditions': [
+ [ 'target_arch=="ia32"', {
+ 'sources': [
+ 'mpi/mpi_sse2.s',
+ ],
+ 'defines': [
+ 'MP_USE_UINT_DIGIT',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ ],
+ }, 'target_arch=="x64"', {
+ 'sources': [
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_common.S',
+ 'mpi/mp_comba.c',
+ ],
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ ],
+ }],
+ ],
+ }],
+ ],
+ 'ldflags': [
+ '-Wl,-Bsymbolic'
+ ],
+}
diff --git a/security/nss/lib/freebl/freebl_hash.def b/security/nss/lib/freebl/freebl_hash.def
new file mode 100644
index 0000000000..9fd27367e1
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash.def
@@ -0,0 +1,39 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
+;+NSSRAWHASH_3.12.3 { # NSS 3.12.3 release
+;+ global:
+NSSLOW_Init;
+NSSLOW_Shutdown;
+NSSLOWHASH_Length;
+NSSLOWHASH_Begin;
+NSSLOWHASH_Destroy;
+NSSLOWHASH_End;
+NSSLOWHASH_NewContext;
+NSSLOWHASH_Update;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freebl_hash_vector.def b/security/nss/lib/freebl/freebl_hash_vector.def
new file mode 100644
index 0000000000..9d7d07d545
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash_vector.def
@@ -0,0 +1,34 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
+;+NSSprivate_3.16 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+NSSLOW_GetVector;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freeblver.c b/security/nss/lib/freebl/freeblver.c
new file mode 100644
index 0000000000..9136f0b0bd
--- /dev/null
+++ b/security/nss/lib/freebl/freeblver.c
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Library identity and versioning */
+
+#include "softkver.h"
+
+#if defined(DEBUG)
+#define _DEBUG_STRING " (debug)"
+#else
+#define _DEBUG_STRING ""
+#endif
+
+/*
+ * Version information
+ */
+const char __nss_freebl_version[] = "Version: NSS " SOFTOKEN_VERSION _DEBUG_STRING;
diff --git a/security/nss/lib/freebl/gcm-aarch64.c b/security/nss/lib/freebl/gcm-aarch64.c
new file mode 100644
index 0000000000..3f3c046d75
--- /dev/null
+++ b/security/nss/lib/freebl/gcm-aarch64.c
@@ -0,0 +1,96 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "gcm.h"
+#include "secerr.h"
+
+/* old gcc doesn't support some poly64x2_t intrinsic */
+#if defined(__aarch64__) && defined(IS_LITTLE_ENDIAN) && \
+ (defined(__clang__) || defined(__GNUC__) && __GNUC__ > 6)
+
+#include <arm_neon.h>
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ uint8x16_t ci = vrbitq_u8(vreinterpretq_u8_u64(ghash->x));
+ vst1q_u8(outbuf, ci);
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ const poly64x2_t p = vdupq_n_p64(0x87);
+ const uint8x16_t zero = vdupq_n_u8(0);
+ const uint64x2_t h = ghash->h;
+ uint64x2_t ci = ghash->x;
+ unsigned int i;
+ uint8x16_t z_low, z_high;
+ uint8x16_t t_low, t_high;
+ poly64x2_t t1;
+ uint8x16_t t2;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ ci = vreinterpretq_u64_u8(veorq_u8(vreinterpretq_u8_u64(ci),
+ vrbitq_u8(vld1q_u8(buf))));
+
+ /* Do binary mult ghash->X = Ci * ghash->H. */
+ z_low = vreinterpretq_u8_p128(
+ vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u64(ci)),
+ (poly64_t)vget_low_p64(vreinterpretq_p64_u64(h))));
+ z_high = vreinterpretq_u8_p128(
+ vmull_high_p64(vreinterpretq_p64_u64(ci), vreinterpretq_p64_u64(h)));
+ t1 = vreinterpretq_p64_u8(
+ vextq_u8(vreinterpretq_u8_u64(h), vreinterpretq_u8_u64(h), 8));
+ t_low = vreinterpretq_u8_p128(
+ vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u64(ci)),
+ (poly64_t)vget_low_p64(t1)));
+ t_high = vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u64(ci), t1));
+ t2 = veorq_u8(t_high, t_low);
+ z_low = veorq_u8(z_low, vextq_u8(zero, t2, 8));
+ z_high = veorq_u8(z_high, vextq_u8(t2, zero, 8));
+
+ /* polynomial reduction */
+ t2 = vreinterpretq_u8_p128(vmull_high_p64(vreinterpretq_p64_u8(z_high), p));
+ z_high = veorq_u8(z_high, vextq_u8(t2, zero, 8));
+ z_low = veorq_u8(z_low, vextq_u8(zero, t2, 8));
+ ci = veorq_u64(vreinterpretq_u64_u8(z_low),
+ vreinterpretq_u64_p128(
+ vmull_p64((poly64_t)vget_low_p64(vreinterpretq_p64_u8(z_high)),
+ (poly64_t)vget_low_p64(p))));
+ }
+
+ ghash->x = ci;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ /* Workaround of "used uninitialized in this function" error */
+ uint64x2_t h = vdupq_n_u64(0);
+
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->x = vdupq_n_u64(0);
+ h = vsetq_lane_u64(__builtin_bswap64(ghash->h_low), h, 1);
+ h = vsetq_lane_u64(__builtin_bswap64(ghash->h_high), h, 0);
+ h = vreinterpretq_u64_u8(vrbitq_u8(vreinterpretq_u8_u64(h)));
+ ghash->h = h;
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x = vdupq_n_u64(0);
+ return SECSuccess;
+}
+
+#endif /* defined(__clang__) || (defined(__GNUC__) && __GNUC__ > 6) */
diff --git a/security/nss/lib/freebl/gcm-arm32-neon.c b/security/nss/lib/freebl/gcm-arm32-neon.c
new file mode 100644
index 0000000000..be04247701
--- /dev/null
+++ b/security/nss/lib/freebl/gcm-arm32-neon.c
@@ -0,0 +1,202 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "secerr.h"
+#include "prtypes.h"
+
+#if defined(IS_LITTLE_ENDIAN)
+
+#include <arm_neon.h>
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ vst1_u8(outbuf, vrev64_u8(vcreate_u8(ghash->x_high)));
+ vst1_u8(outbuf + 8, vrev64_u8(vcreate_u8(ghash->x_low)));
+ return SECSuccess;
+}
+
+/* Carry-less multiplication. a * b = ret. */
+static inline uint8x16_t
+clmul(const uint8x8_t a, const uint8x8_t b)
+{
+ uint8x16_t d, e, f, g, h, i, j, k, l, m, n;
+ uint8x8_t t_high, t_low;
+ uint8x16_t t0, t1, t2, t3;
+ const uint8x8_t k16 = vcreate_u8(0xffff);
+ const uint8x8_t k32 = vcreate_u8(0xffffffff);
+ const uint8x8_t k48 = vcreate_u8(0xffffffffffff);
+
+ // D = A * B
+ d = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
+ vreinterpret_p8_u8(b)));
+ // E = A * B1
+ e = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
+ vreinterpret_p8_u8(vext_u8(b, b, 1))));
+ // F = A1 * B
+ f = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 1)),
+ vreinterpret_p8_u8(b)));
+ // G = A * B2
+ g = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
+ vreinterpret_p8_u8(vext_u8(b, b, 2))));
+ // H = A2 * B
+ h = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 2)),
+ vreinterpret_p8_u8(b)));
+ // I = A * B3
+ i = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
+ vreinterpret_p8_u8(vext_u8(b, b, 3))));
+ // J = A3 * B
+ j = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(vext_u8(a, a, 3)),
+ vreinterpret_p8_u8(b)));
+ // K = A * B4
+ k = vreinterpretq_u8_p16(vmull_p8(vreinterpret_p8_u8(a),
+ vreinterpret_p8_u8(vext_u8(b, b, 4))));
+ // L = E + F
+ l = veorq_u8(e, f);
+ // M = G + H
+ m = veorq_u8(g, h);
+ // N = I + J
+ n = veorq_u8(i, j);
+
+ // t0 = (L) (P0 + P1) << 8
+ t_high = vget_high_u8(l);
+ t_low = vget_low_u8(l);
+ t_low = veor_u8(t_low, t_high);
+ t_high = vand_u8(t_high, k48);
+ t_low = veor_u8(t_low, t_high);
+ t0 = vcombine_u8(t_low, t_high);
+ t0 = vextq_u8(t0, t0, 15);
+
+ // t1 = (M) (P2 + P3) << 16
+ t_high = vget_high_u8(m);
+ t_low = vget_low_u8(m);
+ t_low = veor_u8(t_low, t_high);
+ t_high = vand_u8(t_high, k32);
+ t_low = veor_u8(t_low, t_high);
+ t1 = vcombine_u8(t_low, t_high);
+ t1 = vextq_u8(t1, t1, 14);
+
+ // t2 = (N) (P4 + P5) << 24
+ t_high = vget_high_u8(n);
+ t_low = vget_low_u8(n);
+ t_low = veor_u8(t_low, t_high);
+ t_high = vand_u8(t_high, k16);
+ t_low = veor_u8(t_low, t_high);
+ t2 = vcombine_u8(t_low, t_high);
+ t2 = vextq_u8(t2, t2, 13);
+
+ // t3 = (K) (P6 + P7) << 32
+ t_high = vget_high_u8(k);
+ t_low = vget_low_u8(k);
+ t_low = veor_u8(t_low, t_high);
+ t_high = vdup_n_u8(0);
+ t3 = vcombine_u8(t_low, t_high);
+ t3 = vextq_u8(t3, t3, 12);
+
+ t0 = veorq_u8(t0, t1);
+ t2 = veorq_u8(t2, t3);
+ return veorq_u8(veorq_u8(d, t0), t2);
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ const uint8x8_t h_low = vcreate_u8(ghash->h_low);
+ const uint8x8_t h_high = vcreate_u8(ghash->h_high);
+ uint8x16_t ci;
+ uint8x8_t ci_low;
+ uint8x8_t ci_high;
+ uint8x16_t z0, z2, z1a;
+ uint8x16_t z_high, z_low;
+ uint8x16_t t;
+ int64x2_t t1, t2, t3;
+ uint64x2_t z_low_l, z_low_r, z_high_l, z_high_r;
+ size_t i;
+
+ ci = vcombine_u8(vcreate_u8(ghash->x_low), vcreate_u8(ghash->x_high));
+
+ for (i = 0; i < count; i++, buf += 16) {
+ ci = veorq_u8(ci, vcombine_u8(vrev64_u8(vld1_u8(buf + 8)),
+ vrev64_u8(vld1_u8(buf))));
+ ci_high = vget_high_u8(ci);
+ ci_low = vget_low_u8(ci);
+
+ /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
+ z0 = clmul(ci_low, h_low);
+ z2 = clmul(ci_high, h_high);
+ z1a = clmul(veor_u8(ci_high, ci_low), veor_u8(h_high, h_low));
+ z1a = veorq_u8(z0, z1a);
+ z1a = veorq_u8(z2, z1a);
+ z_high = vcombine_u8(veor_u8(vget_low_u8(z2), vget_high_u8(z1a)),
+ vget_high_u8(z2));
+ z_low = vcombine_u8(vget_low_u8(z0),
+ veor_u8(vget_high_u8(z0), vget_low_u8(z1a)));
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ z_low_l = vshlq_n_u64(vreinterpretq_u64_u8(z_low), 1);
+ z_low_r = vshrq_n_u64(vreinterpretq_u64_u8(z_low), 63);
+ z_high_l = vshlq_n_u64(vreinterpretq_u64_u8(z_high), 1);
+ z_high_r = vshrq_n_u64(vreinterpretq_u64_u8(z_high), 63);
+ z_low = vreinterpretq_u8_u64(
+ vcombine_u64(vget_low_u64(z_low_l),
+ vorr_u64(vget_high_u64(z_low_l),
+ vget_low_u64(z_low_r))));
+ z_high = vreinterpretq_u8_u64(
+ vcombine_u64(vorr_u64(vget_low_u64(z_high_l),
+ vget_high_u64(z_low_r)),
+ vorr_u64(vget_high_u64(z_high_l),
+ vget_low_u64(z_high_r))));
+
+ /* Reduce */
+ t1 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 57);
+ t2 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 62);
+ t3 = vshlq_n_s64(vreinterpretq_s64_u8(z_low), 63);
+ t = vreinterpretq_u8_s64(veorq_s64(t1, veorq_s64(t2, t3)));
+
+ z_low = vcombine_u8(vget_low_u8(z_low),
+ veor_u8(vget_high_u8(z_low), vget_low_u8(t)));
+ z_high = vcombine_u8(veor_u8(vget_low_u8(z_high), vget_high_u8(t)),
+ vget_high_u8(z_high));
+
+ t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1));
+ z_high = veorq_u8(z_high, z_low);
+ z_low = veorq_u8(z_low, t);
+ t = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(t), 6));
+ z_low = vreinterpretq_u8_u64(
+ vshrq_n_u64(vreinterpretq_u64_u8(z_low), 1));
+ z_low = veorq_u8(z_low, z_high);
+ ci = veorq_u8(z_low, t);
+ }
+
+ vst1_u8((uint8_t *)&ghash->x_high, vget_high_u8(ci));
+ vst1_u8((uint8_t *)&ghash->x_low, vget_low_u8(ci));
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->x_low = 0;
+ ghash->x_high = 0;
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x_low = 0;
+ ghash->x_high = 0;
+ return SECSuccess;
+}
+
+#endif /* IS_LITTLE_ENDIAN */
diff --git a/security/nss/lib/freebl/gcm-ppc.c b/security/nss/lib/freebl/gcm-ppc.c
new file mode 100644
index 0000000000..9bd4f29569
--- /dev/null
+++ b/security/nss/lib/freebl/gcm-ppc.c
@@ -0,0 +1,109 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "gcm.h"
+#include "secerr.h"
+
+#if defined(USE_PPC_CRYPTO)
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ vec_xst_be((vec_u8)ghash->x, 0, outbuf);
+ return SECSuccess;
+}
+
+static vec_u64
+vpmsumd(const vec_u64 a, const vec_u64 b)
+{
+#if defined(__clang__)
+ /* Clang uses a different name */
+ return __builtin_altivec_crypto_vpmsumd(a, b);
+#elif (__GNUC__ >= 10) || (__GNUC__ == 9 && __GNUC_MINOR__ >= 3) || \
+ (__GNUC__ == 8 && __GNUC_MINOR__ >= 4) || \
+ (__GNUC__ == 7 && __GNUC_MINOR__ >= 5)
+ /* GCC versions not affected by https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91275 */
+ return __builtin_crypto_vpmsumd(a, b);
+#else
+ /* GCC versions where this builtin is buggy */
+ vec_u64 vr;
+ __asm("vpmsumd %0, %1, %2"
+ : "=v"(vr)
+ : "v"(a), "v"(b));
+ return vr;
+#endif
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ const vec_u8 leftshift = vec_splat_u8(1);
+ const vec_u64 onebit = (vec_u64){ 1, 0 };
+ const unsigned long long pd = 0xc2LLU << 56;
+
+ vec_u64 ci, v, r0, r1;
+ vec_u64 hibit;
+ unsigned i;
+
+ ci = ghash->x;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ /* clang needs the following cast away from const; maybe a bug in 7.0.0 */
+ v = (vec_u64)vec_xl_be(0, (unsigned char *)buf);
+ ci ^= v;
+
+ /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
+ r0 = vpmsumd((vec_u64){ ci[0], 0 }, (vec_u64){ ghash->h[0], 0 });
+ r1 = vpmsumd((vec_u64){ ci[1], 0 }, (vec_u64){ ghash->h[1], 0 });
+ v = (vec_u64){ ci[0] ^ ci[1], ghash->h[0] ^ ghash->h[1] };
+ v = vpmsumd((vec_u64){ v[0], 0 }, (vec_u64){ v[1], 0 });
+ v ^= r0;
+ v ^= r1;
+ r0 ^= (vec_u64){ 0, v[0] };
+ r1 ^= (vec_u64){ v[1], 0 };
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ hibit = (vec_u64)vec_splat((vec_u8)r0, 15);
+ hibit = (vec_u64)vec_rl((vec_u8)hibit, leftshift);
+ hibit &= onebit;
+ r0 = vec_sll(r0, leftshift);
+ r1 = vec_sll(r1, leftshift);
+ r1 |= hibit;
+
+ /* Reduce */
+ v = vpmsumd((vec_u64){ r0[0], 0 }, (vec_u64){ pd, 0 });
+ r0 ^= (vec_u64){ 0, v[0] };
+ r1 ^= (vec_u64){ v[1], 0 };
+ v = vpmsumd((vec_u64){ r0[1], 0 }, (vec_u64){ pd, 0 });
+ r1 ^= v;
+ ci = r0 ^ r1;
+ }
+
+ ghash->x = ci;
+
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ ghash->x = (vec_u64)vec_splat_u32(0);
+ ghash->h = (vec_u64){ ghash->h_low, ghash->h_high };
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x = (vec_u64)vec_splat_u32(0);
+ return SECSuccess;
+}
+
+#endif /* defined(USE_PPC_CRYPTO) */
diff --git a/security/nss/lib/freebl/gcm-x86.c b/security/nss/lib/freebl/gcm-x86.c
new file mode 100644
index 0000000000..e34d633943
--- /dev/null
+++ b/security/nss/lib/freebl/gcm-x86.c
@@ -0,0 +1,127 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "gcm.h"
+#include "secerr.h"
+
+#include <wmmintrin.h> /* clmul */
+
+#define WRITE64(x, bytes) \
+ (bytes)[0] = (x) >> 56; \
+ (bytes)[1] = (x) >> 48; \
+ (bytes)[2] = (x) >> 40; \
+ (bytes)[3] = (x) >> 32; \
+ (bytes)[4] = (x) >> 24; \
+ (bytes)[5] = (x) >> 16; \
+ (bytes)[6] = (x) >> 8; \
+ (bytes)[7] = (x);
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ uint64_t tmp_out[2];
+ _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
+ /* maxout must be larger than 16 byte (checked by the caller). */
+ WRITE64(tmp_out[0], outbuf + 8);
+ WRITE64(tmp_out[1], outbuf);
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ size_t i;
+ pre_align __m128i z_high post_align;
+ pre_align __m128i z_low post_align;
+ pre_align __m128i C post_align;
+ pre_align __m128i D post_align;
+ pre_align __m128i E post_align;
+ pre_align __m128i F post_align;
+ pre_align __m128i bin post_align;
+ pre_align __m128i Ci post_align;
+ pre_align __m128i tmp post_align;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
+ ((uint16_t)buf[2] << 8) | buf[3],
+ ((uint16_t)buf[4] << 8) | buf[5],
+ ((uint16_t)buf[6] << 8) | buf[7],
+ ((uint16_t)buf[8] << 8) | buf[9],
+ ((uint16_t)buf[10] << 8) | buf[11],
+ ((uint16_t)buf[12] << 8) | buf[13],
+ ((uint16_t)buf[14] << 8) | buf[15]);
+ Ci = _mm_xor_si128(bin, ghash->x);
+
+ /* Do binary mult ghash->X = Ci * ghash->H. */
+ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
+ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
+ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
+ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
+ tmp = _mm_xor_si128(E, F);
+ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
+ z_high = _mm_unpackhi_epi64(z_high, D);
+ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
+ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
+
+ /* Shift one to the left (multiply by x) as gcm spec is stupid. */
+ C = _mm_slli_si128(z_low, 8);
+ E = _mm_srli_epi64(C, 63);
+ D = _mm_slli_si128(z_high, 8);
+ F = _mm_srli_epi64(D, 63);
+ /* Carry over */
+ C = _mm_srli_si128(z_low, 8);
+ D = _mm_srli_epi64(C, 63);
+ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
+ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
+
+ /* Reduce */
+ C = _mm_slli_si128(z_low, 8);
+ /* D = z_low << 127 */
+ D = _mm_slli_epi64(C, 63);
+ /* E = z_low << 126 */
+ E = _mm_slli_epi64(C, 62);
+ /* F = z_low << 121 */
+ F = _mm_slli_epi64(C, 57);
+ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
+ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
+ C = _mm_srli_si128(z_low, 8);
+ /* D = z_low >> 1 */
+ D = _mm_slli_epi64(C, 63);
+ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
+ /* E = z_low >> 2 */
+ E = _mm_slli_epi64(C, 62);
+ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
+ /* F = z_low >> 7 */
+ F = _mm_slli_epi64(C, 57);
+ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
+ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
+ ghash->x = _mm_xor_si128(_mm_xor_si128(
+ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
+ F);
+ }
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->x = _mm_setzero_si128();
+ /* MSVC requires __m64 to load epi64. */
+ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
+ ghash->h_low >> 32, (uint32_t)ghash->h_low);
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x = _mm_setzero_si128();
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c
new file mode 100644
index 0000000000..2dae72419c
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.c
@@ -0,0 +1,1171 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Thanks to Thomas Pornin for the ideas how to implement the constat time
+ * binary multiplication. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapii.h"
+#include "blapit.h"
+#include "blapi.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+/* old gcc doesn't support some poly64x2_t intrinsic */
+#if defined(__aarch64__) && defined(IS_LITTLE_ENDIAN) && \
+ (defined(__clang__) || defined(__GNUC__) && __GNUC__ > 6)
+#define USE_ARM_GCM
+#elif defined(__arm__) && defined(IS_LITTLE_ENDIAN) && \
+ !defined(NSS_DISABLE_ARM32_NEON)
+/* We don't test on big endian platform, so disable this on big endian. */
+#define USE_ARM_GCM
+#endif
+
+/* Forward declarations */
+SECStatus gcm_HashInit_hw(gcmHashContext *ghash);
+SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf);
+SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
+SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash);
+SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
+SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
+
+/* Stub definitions for the above *_hw functions, which shouldn't be
+ * used unless NSS_X86_OR_X64 is defined */
+#if !defined(NSS_X86_OR_X64) && !defined(USE_ARM_GCM) && !defined(USE_PPC_CRYPTO)
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
+#endif /* !NSS_X86_OR_X64 && !USE_ARM_GCM && !USE_PPC_CRYPTO */
+
+uint64_t
+get64(const unsigned char *bytes)
+{
+ return ((uint64_t)bytes[0]) << 56 |
+ ((uint64_t)bytes[1]) << 48 |
+ ((uint64_t)bytes[2]) << 40 |
+ ((uint64_t)bytes[3]) << 32 |
+ ((uint64_t)bytes[4]) << 24 |
+ ((uint64_t)bytes[5]) << 16 |
+ ((uint64_t)bytes[6]) << 8 |
+ ((uint64_t)bytes[7]);
+}
+
+/* Initialize a gcmHashContext */
+SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
+{
+ SECStatus rv = SECSuccess;
+
+ ghash->cLen = 0;
+ ghash->bufLen = 0;
+ PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+
+ ghash->h_low = get64(H + 8);
+ ghash->h_high = get64(H);
+#ifdef USE_ARM_GCM
+#if defined(__aarch64__)
+ if (arm_pmull_support() && !sw) {
+#else
+ if (arm_neon_support() && !sw) {
+#endif
+#elif defined(USE_PPC_CRYPTO)
+ if (ppc_crypto_support() && !sw) {
+#else
+ if (clmul_support() && !sw) {
+#endif
+ rv = gcm_HashInit_hw(ghash);
+ } else {
+/* We fall back to the software implementation if we can't use / don't
+ * want to use pclmul. */
+#ifdef HAVE_INT128_SUPPORT
+ ghash->ghash_mul = gcm_HashMult_sftw;
+#else
+ ghash->ghash_mul = gcm_HashMult_sftw32;
+#endif
+ ghash->x_high = ghash->x_low = 0;
+ ghash->hw = PR_FALSE;
+ }
+ return rv;
+}
+
+#ifdef HAVE_INT128_SUPPORT
+/* Binary multiplication x * y = r_high << 64 | r_low. */
+void
+bmul(uint64_t x, uint64_t y, uint64_t *r_high, uint64_t *r_low)
+{
+ uint128_t x1, x2, x3, x4, x5;
+ uint128_t y1, y2, y3, y4, y5;
+ uint128_t r, z;
+
+ uint128_t m1 = (uint128_t)0x2108421084210842 << 64 | 0x1084210842108421;
+ uint128_t m2 = (uint128_t)0x4210842108421084 << 64 | 0x2108421084210842;
+ uint128_t m3 = (uint128_t)0x8421084210842108 << 64 | 0x4210842108421084;
+ uint128_t m4 = (uint128_t)0x0842108421084210 << 64 | 0x8421084210842108;
+ uint128_t m5 = (uint128_t)0x1084210842108421 << 64 | 0x0842108421084210;
+
+ x1 = x & m1;
+ y1 = y & m1;
+ x2 = x & m2;
+ y2 = y & m2;
+ x3 = x & m3;
+ y3 = y & m3;
+ x4 = x & m4;
+ y4 = y & m4;
+ x5 = x & m5;
+ y5 = y & m5;
+
+ z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2);
+ r = z & m1;
+ z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3);
+ r |= z & m2;
+ z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4);
+ r |= z & m3;
+ z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5);
+ r |= z & m4;
+ z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1);
+ r |= z & m5;
+
+ *r_high = (uint64_t)(r >> 64);
+ *r_low = (uint64_t)r;
+}
+
+SECStatus
+gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ uint64_t ci_low, ci_high;
+ size_t i;
+ uint64_t z2_low, z2_high, z0_low, z0_high, z1a_low, z1a_high;
+ uint128_t z_high = 0, z_low = 0;
+
+ ci_low = ghash->x_low;
+ ci_high = ghash->x_high;
+ for (i = 0; i < count; i++, buf += 16) {
+ ci_low ^= get64(buf + 8);
+ ci_high ^= get64(buf);
+
+ /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
+ bmul(ci_high, ghash->h_high, &z2_high, &z2_low);
+ bmul(ci_low, ghash->h_low, &z0_high, &z0_low);
+ bmul(ci_high ^ ci_low, ghash->h_high ^ ghash->h_low, &z1a_high, &z1a_low);
+ z1a_high ^= z2_high ^ z0_high;
+ z1a_low ^= z2_low ^ z0_low;
+ z_high = ((uint128_t)z2_high << 64) | (z2_low ^ z1a_high);
+ z_low = (((uint128_t)z0_high << 64) | z0_low) ^ (((uint128_t)z1a_low) << 64);
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ z_high = (z_high << 1) | (z_low >> 127);
+ z_low <<= 1;
+
+ /* Reduce */
+ z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121);
+ z_high ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7);
+ ci_low = (uint64_t)z_high;
+ ci_high = (uint64_t)(z_high >> 64);
+ }
+ ghash->x_low = ci_low;
+ ghash->x_high = ci_high;
+ return SECSuccess;
+}
+#else
+/* Binary multiplication x * y = r_high << 32 | r_low. */
+void
+bmul32(uint32_t x, uint32_t y, uint32_t *r_high, uint32_t *r_low)
+{
+ uint32_t x0, x1, x2, x3;
+ uint32_t y0, y1, y2, y3;
+ uint32_t m1 = (uint32_t)0x11111111;
+ uint32_t m2 = (uint32_t)0x22222222;
+ uint32_t m4 = (uint32_t)0x44444444;
+ uint32_t m8 = (uint32_t)0x88888888;
+ uint64_t z0, z1, z2, z3;
+ uint64_t z;
+
+ x0 = x & m1;
+ x1 = x & m2;
+ x2 = x & m4;
+ x3 = x & m8;
+ y0 = y & m1;
+ y1 = y & m2;
+ y2 = y & m4;
+ y3 = y & m8;
+ z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^
+ ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1);
+ z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^
+ ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2);
+ z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^
+ ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3);
+ z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^
+ ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0);
+ z0 &= ((uint64_t)m1 << 32) | m1;
+ z1 &= ((uint64_t)m2 << 32) | m2;
+ z2 &= ((uint64_t)m4 << 32) | m4;
+ z3 &= ((uint64_t)m8 << 32) | m8;
+ z = z0 | z1 | z2 | z3;
+ *r_high = (uint32_t)(z >> 32);
+ *r_low = (uint32_t)z;
+}
+
+SECStatus
+gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ size_t i;
+ uint64_t ci_low, ci_high;
+ uint64_t z_high_h, z_high_l, z_low_h, z_low_l;
+ uint32_t ci_high_h, ci_high_l, ci_low_h, ci_low_l;
+ uint32_t b_a_h, b_a_l, a_a_h, a_a_l, b_b_h, b_b_l;
+ uint32_t a_b_h, a_b_l, b_c_h, b_c_l, a_c_h, a_c_l, c_c_h, c_c_l;
+ uint32_t ci_highXlow_h, ci_highXlow_l, c_a_h, c_a_l, c_b_h, c_b_l;
+
+ uint32_t h_high_h = (uint32_t)(ghash->h_high >> 32);
+ uint32_t h_high_l = (uint32_t)ghash->h_high;
+ uint32_t h_low_h = (uint32_t)(ghash->h_low >> 32);
+ uint32_t h_low_l = (uint32_t)ghash->h_low;
+ uint32_t h_highXlow_h = h_high_h ^ h_low_h;
+ uint32_t h_highXlow_l = h_high_l ^ h_low_l;
+ uint32_t h_highX_xored = h_highXlow_h ^ h_highXlow_l;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ ci_low = ghash->x_low ^ get64(buf + 8);
+ ci_high = ghash->x_high ^ get64(buf);
+ ci_low_h = (uint32_t)(ci_low >> 32);
+ ci_low_l = (uint32_t)ci_low;
+ ci_high_h = (uint32_t)(ci_high >> 32);
+ ci_high_l = (uint32_t)ci_high;
+ ci_highXlow_h = ci_high_h ^ ci_low_h;
+ ci_highXlow_l = ci_high_l ^ ci_low_l;
+
+ /* Do binary mult ghash->X = C * ghash->H (recursive Karatsuba). */
+ bmul32(ci_high_h, h_high_h, &a_a_h, &a_a_l);
+ bmul32(ci_high_l, h_high_l, &a_b_h, &a_b_l);
+ bmul32(ci_high_h ^ ci_high_l, h_high_h ^ h_high_l, &a_c_h, &a_c_l);
+ a_c_h ^= a_a_h ^ a_b_h;
+ a_c_l ^= a_a_l ^ a_b_l;
+ a_a_l ^= a_c_h;
+ a_b_h ^= a_c_l;
+ /* ci_high * h_high = a_a_h:a_a_l:a_b_h:a_b_l */
+
+ bmul32(ci_low_h, h_low_h, &b_a_h, &b_a_l);
+ bmul32(ci_low_l, h_low_l, &b_b_h, &b_b_l);
+ bmul32(ci_low_h ^ ci_low_l, h_low_h ^ h_low_l, &b_c_h, &b_c_l);
+ b_c_h ^= b_a_h ^ b_b_h;
+ b_c_l ^= b_a_l ^ b_b_l;
+ b_a_l ^= b_c_h;
+ b_b_h ^= b_c_l;
+ /* ci_low * h_low = b_a_h:b_a_l:b_b_h:b_b_l */
+
+ bmul32(ci_highXlow_h, h_highXlow_h, &c_a_h, &c_a_l);
+ bmul32(ci_highXlow_l, h_highXlow_l, &c_b_h, &c_b_l);
+ bmul32(ci_highXlow_h ^ ci_highXlow_l, h_highX_xored, &c_c_h, &c_c_l);
+ c_c_h ^= c_a_h ^ c_b_h;
+ c_c_l ^= c_a_l ^ c_b_l;
+ c_a_l ^= c_c_h;
+ c_b_h ^= c_c_l;
+ /* (ci_high ^ ci_low) * (h_high ^ h_low) = c_a_h:c_a_l:c_b_h:c_b_l */
+
+ c_a_h ^= b_a_h ^ a_a_h;
+ c_a_l ^= b_a_l ^ a_a_l;
+ c_b_h ^= b_b_h ^ a_b_h;
+ c_b_l ^= b_b_l ^ a_b_l;
+ z_high_h = ((uint64_t)a_a_h << 32) | a_a_l;
+ z_high_l = (((uint64_t)a_b_h << 32) | a_b_l) ^
+ (((uint64_t)c_a_h << 32) | c_a_l);
+ z_low_h = (((uint64_t)b_a_h << 32) | b_a_l) ^
+ (((uint64_t)c_b_h << 32) | c_b_l);
+ z_low_l = ((uint64_t)b_b_h << 32) | b_b_l;
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ z_high_h = z_high_h << 1 | z_high_l >> 63;
+ z_high_l = z_high_l << 1 | z_low_h >> 63;
+ z_low_h = z_low_h << 1 | z_low_l >> 63;
+ z_low_l <<= 1;
+
+ /* Reduce */
+ z_low_h ^= (z_low_l << 63) ^ (z_low_l << 62) ^ (z_low_l << 57);
+ z_high_h ^= z_low_h ^ (z_low_h >> 1) ^ (z_low_h >> 2) ^ (z_low_h >> 7);
+ z_high_l ^= z_low_l ^ (z_low_l >> 1) ^ (z_low_l >> 2) ^ (z_low_l >> 7) ^
+ (z_low_h << 63) ^ (z_low_h << 62) ^ (z_low_h << 57);
+ ghash->x_high = z_high_h;
+ ghash->x_low = z_high_l;
+ }
+ return SECSuccess;
+}
+#endif /* HAVE_INT128_SUPPORT */
+
+static SECStatus
+gcm_zeroX(gcmHashContext *ghash)
+{
+ SECStatus rv = SECSuccess;
+
+ if (ghash->hw) {
+ rv = gcm_HashZeroX_hw(ghash);
+ }
+
+ ghash->x_high = ghash->x_low = 0;
+ return rv;
+}
+
+/*
+ * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult
+ * function always takes AES_BLOCK_SIZE lengths of data. gcmHash_Update will
+ * format the data properly.
+ */
+SECStatus
+gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int len)
+{
+ unsigned int blocks;
+ SECStatus rv;
+
+ ghash->cLen += (len * PR_BITS_PER_BYTE);
+
+ /* first deal with the current buffer of data. Try to fill it out so
+ * we can hash it */
+ if (ghash->bufLen) {
+ unsigned int needed = PR_MIN(len, AES_BLOCK_SIZE - ghash->bufLen);
+ if (needed != 0) {
+ PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed);
+ }
+ buf += needed;
+ len -= needed;
+ ghash->bufLen += needed;
+ if (len == 0) {
+ /* didn't add enough to hash the data, nothing more do do */
+ return SECSuccess;
+ }
+ PORT_Assert(ghash->bufLen == AES_BLOCK_SIZE);
+ /* hash the buffer and clear it */
+ rv = ghash->ghash_mul(ghash, ghash->buffer, 1);
+ PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE);
+ ghash->bufLen = 0;
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ /* now hash any full blocks remaining in the data stream */
+ blocks = len / AES_BLOCK_SIZE;
+ if (blocks) {
+ rv = ghash->ghash_mul(ghash, buf, blocks);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ buf += blocks * AES_BLOCK_SIZE;
+ len -= blocks * AES_BLOCK_SIZE;
+ }
+
+ /* save any remainder in the buffer to be hashed with the next call */
+ if (len != 0) {
+ PORT_Memcpy(ghash->buffer, buf, len);
+ ghash->bufLen = len;
+ }
+ return SECSuccess;
+}
+
+/*
+ * write out any partial blocks zero padded through the GHASH engine,
+ * save the lengths for the final completion of the hash
+ */
+static SECStatus
+gcmHash_Sync(gcmHashContext *ghash)
+{
+ int i;
+ SECStatus rv;
+
+ /* copy the previous counter to the upper block */
+ PORT_Memcpy(ghash->counterBuf, &ghash->counterBuf[GCM_HASH_LEN_LEN],
+ GCM_HASH_LEN_LEN);
+ /* copy the current counter in the lower block */
+ for (i = 0; i < GCM_HASH_LEN_LEN; i++) {
+ ghash->counterBuf[GCM_HASH_LEN_LEN + i] =
+ (ghash->cLen >> ((GCM_HASH_LEN_LEN - 1 - i) * PR_BITS_PER_BYTE)) & 0xff;
+ }
+ ghash->cLen = 0;
+
+ /* now zero fill the buffer and hash the last block */
+ if (ghash->bufLen) {
+ PORT_Memset(ghash->buffer + ghash->bufLen, 0, AES_BLOCK_SIZE - ghash->bufLen);
+ rv = ghash->ghash_mul(ghash, ghash->buffer, 1);
+ PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE);
+ ghash->bufLen = 0;
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ return SECSuccess;
+}
+
+#define WRITE64(x, bytes) \
+ (bytes)[0] = (x) >> 56; \
+ (bytes)[1] = (x) >> 48; \
+ (bytes)[2] = (x) >> 40; \
+ (bytes)[3] = (x) >> 32; \
+ (bytes)[4] = (x) >> 24; \
+ (bytes)[5] = (x) >> 16; \
+ (bytes)[6] = (x) >> 8; \
+ (bytes)[7] = (x);
+
+/*
+ * This does the final sync, hashes the lengths, then returns
+ * "T", the hashed output.
+ */
+SECStatus
+gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout)
+{
+ unsigned char T[MAX_BLOCK_SIZE];
+ SECStatus rv;
+
+ rv = gcmHash_Sync(ghash);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ rv = ghash->ghash_mul(ghash, ghash->counterBuf,
+ (GCM_HASH_LEN_LEN * 2) / AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ if (ghash->hw) {
+ rv = gcm_HashWrite_hw(ghash, T);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ } else {
+ WRITE64(ghash->x_low, T + 8);
+ WRITE64(ghash->x_high, T);
+ }
+
+ if (maxout > AES_BLOCK_SIZE) {
+ maxout = AES_BLOCK_SIZE;
+ }
+ PORT_Memcpy(outbuf, T, maxout);
+ *outlen = maxout;
+ rv = SECSuccess;
+
+cleanup:
+ PORT_Memset(T, 0, sizeof(T));
+ return rv;
+}
+
+SECStatus
+gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
+ unsigned int AADLen)
+{
+ SECStatus rv;
+
+ // Limit AADLen in accordance with SP800-38D
+ if (sizeof(AADLen) >= 8) {
+ unsigned long long AADLen_ull = AADLen;
+ if (AADLen_ull > (1ULL << 61) - 1) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+
+ ghash->cLen = 0;
+ PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2);
+ ghash->bufLen = 0;
+ rv = gcm_zeroX(ghash);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+
+ /* now kick things off by hashing the Additional Authenticated Data */
+ if (AADLen != 0) {
+ rv = gcmHash_Update(ghash, AAD, AADLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcmHash_Sync(ghash);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ return SECSuccess;
+}
+
+/**************************************************************************
+ * Now implement the GCM using gcmHash and CTR *
+ **************************************************************************/
+
+/* state to handle the full GCM operation (hash and counter) */
+struct GCMContextStr {
+ gcmHashContext *ghash_context;
+ CTRContext ctr_context;
+ freeblCipherFunc cipher;
+ void *cipher_context;
+ unsigned long tagBits;
+ unsigned char tagKey[MAX_BLOCK_SIZE];
+ PRBool ctr_context_init;
+ gcmIVContext gcm_iv;
+};
+
+SECStatus gcm_InitCounter(GCMContext *gcm, const unsigned char *iv,
+ unsigned int ivLen, unsigned int tagBits,
+ const unsigned char *aad, unsigned int aadLen);
+
+GCMContext *
+GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params)
+{
+ GCMContext *gcm = NULL;
+ gcmHashContext *ghash = NULL;
+ unsigned char H[MAX_BLOCK_SIZE];
+ unsigned int tmp;
+ const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params;
+ SECStatus rv;
+#ifdef DISABLE_HW_GCM
+ const PRBool sw = PR_TRUE;
+#else
+ const PRBool sw = PR_FALSE;
+#endif
+
+ gcm = PORT_ZNew(GCMContext);
+ if (gcm == NULL) {
+ return NULL;
+ }
+ gcm->cipher = cipher;
+ gcm->cipher_context = context;
+ ghash = PORT_ZNewAligned(gcmHashContext, 16, mem);
+
+ /* first plug in the ghash context */
+ gcm->ghash_context = ghash;
+ PORT_Memset(H, 0, AES_BLOCK_SIZE);
+ rv = (*cipher)(context, H, &tmp, AES_BLOCK_SIZE, H, AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = gcmHash_InitContext(ghash, H, sw);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ gcm_InitIVContext(&gcm->gcm_iv);
+ gcm->ctr_context_init = PR_FALSE;
+
+ /* if gcmPara/ms is NULL, then we are creating an PKCS #11 MESSAGE
+ * style context, in which we initialize the key once, then do separate
+ * iv/aad's for each message. In that case we only initialize the key
+ * and ghash. We initialize the counter in each separate message */
+ if (gcmParams == NULL) {
+ /* OK we are finished with init, if we are doing MESSAGE interface,
+ * return from here */
+ return gcm;
+ }
+
+ rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, gcmParams->pAAD,
+ gcmParams->ulAADLen);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ PORT_Memset(H, 0, AES_BLOCK_SIZE);
+ gcm->ctr_context_init = PR_TRUE;
+ return gcm;
+
+loser:
+ PORT_Memset(H, 0, AES_BLOCK_SIZE);
+ if (ghash && ghash->mem) {
+ void *mem = ghash->mem;
+ PORT_Memset(ghash, 0, sizeof(gcmHashContext));
+ PORT_Free(mem);
+ }
+ if (gcm) {
+ PORT_ZFree(gcm, sizeof(GCMContext));
+ }
+ return NULL;
+}
+
+SECStatus
+gcm_InitCounter(GCMContext *gcm, const unsigned char *iv, unsigned int ivLen,
+ unsigned int tagBits, const unsigned char *aad,
+ unsigned int aadLen)
+{
+ gcmHashContext *ghash = gcm->ghash_context;
+ unsigned int tmp;
+ PRBool freeCtr = PR_FALSE;
+ CK_AES_CTR_PARAMS ctrParams;
+ SECStatus rv;
+
+ /* Verify our parameters here */
+ if (ivLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto loser;
+ }
+
+ if (tagBits != 128 && tagBits != 120 &&
+ tagBits != 112 && tagBits != 104 &&
+ tagBits != 96 && tagBits != 64 &&
+ tagBits != 32) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto loser;
+ }
+
+ /* fill in the Counter context */
+ ctrParams.ulCounterBits = 32;
+ PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb));
+ if (ivLen == 12) {
+ PORT_Memcpy(ctrParams.cb, iv, ivLen);
+ ctrParams.cb[AES_BLOCK_SIZE - 1] = 1;
+ } else {
+ rv = gcmHash_Reset(ghash, NULL, 0);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = gcmHash_Update(ghash, iv, ivLen);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ }
+ rv = CTR_InitContext(&gcm->ctr_context, gcm->cipher_context, gcm->cipher,
+ (unsigned char *)&ctrParams);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ freeCtr = PR_TRUE;
+
+ /* fill in the gcm structure */
+ gcm->tagBits = tagBits; /* save for final step */
+ /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with.
+ * if this assumption changes, we would need to explicitly clear it here */
+ PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey));
+ rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, AES_BLOCK_SIZE,
+ gcm->tagKey, AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ /* finally mix in the AAD data */
+ rv = gcmHash_Reset(ghash, aad, aadLen);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ PORT_Memset(&ctrParams, 0, sizeof ctrParams);
+ return SECSuccess;
+
+loser:
+ PORT_Memset(&ctrParams, 0, sizeof ctrParams);
+ if (freeCtr) {
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ }
+ return SECFailure;
+}
+
+void
+GCM_DestroyContext(GCMContext *gcm, PRBool freeit)
+{
+ void *mem = gcm->ghash_context->mem;
+ /* ctr_context is statically allocated and will be freed when we free
+ * gcm. call their destroy functions to free up any locally
+ * allocated data (like mp_int's) */
+ if (gcm->ctr_context_init) {
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ }
+ PORT_Memset(gcm->ghash_context, 0, sizeof(gcmHashContext));
+ PORT_Free(mem);
+ PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits));
+ PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey));
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+static SECStatus
+gcm_GetTag(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout)
+{
+ unsigned int tagBytes;
+ unsigned int extra;
+ unsigned int i;
+ SECStatus rv;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ extra = tagBytes * PR_BITS_PER_BYTE - gcm->tagBits;
+
+ if (outbuf == NULL) {
+ *outlen = tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < tagBytes) {
+ *outlen = tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ maxout = tagBytes;
+ rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ for (i = 0; i < *outlen; i++) {
+ outbuf[i] ^= gcm->tagKey[i];
+ }
+ /* mask off any extra bits we got */
+ if (extra) {
+ outbuf[tagBytes - 1] &= ~((1 << extra) - 1);
+ }
+ return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ * GCM is basically counter mode with a specific initialization and
+ * built in macing operation.
+ */
+SECStatus
+GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned int len;
+
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ };
+ *outlen += len;
+ return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ * GCM is basically counter mode with a specific initialization and
+ * built in macing operation. NOTE: the only difference between Encrypt
+ * and Decrypt is when we calculate the mac. That is because the mac must
+ * always be calculated on the cipher text, not the plain text, so for
+ * encrypt, we do the CTR update first and for decrypt we do the mac first.
+ */
+SECStatus
+GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned char tag[MAX_BLOCK_SIZE];
+ const unsigned char *intag;
+ unsigned int len;
+
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ /* verify the block */
+ rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ /* Don't decrypt if we can't authenticate the encrypted data!
+ * This assumes that if tagBits is not a multiple of 8, intag will
+ * preserve the masked off missing bits. */
+ if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) {
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ PORT_Memset(tag, 0, sizeof(tag));
+ return SECFailure;
+ }
+ PORT_Memset(tag, 0, sizeof(tag));
+ /* finish the decryption */
+ return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, AES_BLOCK_SIZE);
+}
+
+void
+gcm_InitIVContext(gcmIVContext *gcmIv)
+{
+ gcmIv->counter = 0;
+ gcmIv->max_count = 0;
+ gcmIv->ivGen = CKG_GENERATE;
+ gcmIv->ivLen = 0;
+ gcmIv->fixedBits = 0;
+}
+
+/*
+ * generate the IV on the fly and return it to the application.
+ * This function keeps a counter, which may be used in the IV
+ * generation, or may be used in simply to make sure we don't
+ * generate to many IV's from this same key.
+ * PKCS #11 defines 4 generating values:
+ * 1) CKG_NO_GENERATE: just use the passed in IV as it.
+ * 2) CKG_GENERATE: the application doesn't care what generation
+ * scheme is use (we default to counter in this code).
+ * 3) CKG_GENERATE_COUNTER: The IV is the value of a counter.
+ * 4) CKG_GENERATE_RANDOM: The IV is randomly generated.
+ * We add a fifth rule:
+ * 5) CKG_GENERATE_COUNTER_XOR: The Counter value is xor'ed with
+ * the IV.
+ * The value fixedBits specifies the number of bits that will be passed
+ * on from the original IV. The counter or the random data is is loaded
+ * in the remainder of the IV not covered by fixedBits, overwriting any
+ * data there. In the xor case the counter is xor'ed with the data in the
+ * IV. In all cases only bits outside of fixedBits is modified.
+ * The number of IV's we can generate is restricted by the size of the
+ * variable part of the IV and the generation algorithm used. Because of
+ * this, we require subsequent calls on this context to use the same
+ * generator, IV len, and fixed bits as the first call.
+ */
+SECStatus
+gcm_GenerateIV(gcmIVContext *gcmIv, unsigned char *iv, unsigned int ivLen,
+ unsigned int fixedBits, CK_GENERATOR_FUNCTION ivGen)
+{
+ unsigned int i;
+ unsigned int flexBits;
+ unsigned int ivOffset;
+ unsigned int ivNewCount;
+ unsigned char ivMask;
+ unsigned char ivSave;
+ SECStatus rv;
+
+ if (gcmIv->counter != 0) {
+ /* If we've already generated a message, make sure all subsequent
+ * messages are using the same generator */
+ if ((gcmIv->ivGen != ivGen) || (gcmIv->fixedBits != fixedBits) ||
+ (gcmIv->ivLen != ivLen)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ } else {
+ /* remember these values */
+ gcmIv->ivGen = ivGen;
+ gcmIv->fixedBits = fixedBits;
+ gcmIv->ivLen = ivLen;
+ /* now calculate how may bits of IV we have to supply */
+ flexBits = ivLen * PR_BITS_PER_BYTE; /* bytes->bits */
+ /* first make sure we aren't going to overflow */
+ if (flexBits < fixedBits) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ flexBits -= fixedBits;
+ /* if we are generating a random number reduce the acceptable bits to
+ * avoid birthday attacks */
+ if (ivGen == CKG_GENERATE_RANDOM) {
+ if (flexBits <= GCMIV_RANDOM_BIRTHDAY_BITS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* see freebl/blapit.h for how we calculate
+ * GCMIV_RANDOM_BIRTHDAY_BITS */
+ flexBits -= GCMIV_RANDOM_BIRTHDAY_BITS;
+ flexBits = flexBits >> 1;
+ }
+ if (flexBits == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* Turn those bits into the number of IV's we can safely return */
+ if (flexBits >= sizeof(gcmIv->max_count) * PR_BITS_PER_BYTE) {
+ gcmIv->max_count = PR_UINT64(0xffffffffffffffff);
+ } else {
+ gcmIv->max_count = PR_UINT64(1) << flexBits;
+ }
+ }
+
+ /* no generate, accept the IV from the source */
+ if (ivGen == CKG_NO_GENERATE) {
+ gcmIv->counter = 1;
+ return SECSuccess;
+ }
+
+ /* make sure we haven't exceeded the number of IVs we can return
+ * for this key, generator, and IV size */
+ if (gcmIv->counter >= gcmIv->max_count) {
+ /* use a unique error from just bad user input */
+ PORT_SetError(SEC_ERROR_EXTRA_INPUT);
+ return SECFailure;
+ }
+
+ /* build to mask to handle the first byte of the IV */
+ ivOffset = fixedBits / PR_BITS_PER_BYTE;
+ ivMask = 0xff >> ((8 - (fixedBits & 7)) & 7);
+ ivNewCount = ivLen - ivOffset;
+
+ /* finally generate the IV */
+ switch (ivGen) {
+ case CKG_GENERATE: /* default to counter */
+ case CKG_GENERATE_COUNTER:
+ iv[ivOffset] = (iv[ivOffset] & ~ivMask) |
+ (PORT_GET_BYTE_BE(gcmIv->counter, 0, ivNewCount) & ivMask);
+ for (i = 1; i < ivNewCount; i++) {
+ iv[ivOffset + i] = PORT_GET_BYTE_BE(gcmIv->counter, i, ivNewCount);
+ }
+ break;
+ /* for TLS 1.3 */
+ case CKG_GENERATE_COUNTER_XOR:
+ iv[ivOffset] ^=
+ (PORT_GET_BYTE_BE(gcmIv->counter, 0, ivNewCount) & ivMask);
+ for (i = 1; i < ivNewCount; i++) {
+ iv[ivOffset + i] ^= PORT_GET_BYTE_BE(gcmIv->counter, i, ivNewCount);
+ }
+ break;
+ case CKG_GENERATE_RANDOM:
+ ivSave = iv[ivOffset] & ~ivMask;
+ rv = RNG_GenerateGlobalRandomBytes(iv + ivOffset, ivNewCount);
+ iv[ivOffset] = ivSave | (iv[ivOffset] & ivMask);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ break;
+ }
+ gcmIv->counter++;
+ return SECSuccess;
+}
+
+SECStatus
+GCM_EncryptAEAD(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned int len;
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* if we were initialized with the C_EncryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulIvFixedBits, gcmParams->ivGenerator);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, AES_BLOCK_SIZE);
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, gcmParams->pTag, &len, tagBytes);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ };
+ return SECSuccess;
+}
+
+SECStatus
+GCM_DecryptAEAD(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned char tag[MAX_BLOCK_SIZE];
+ const unsigned char *intag;
+ unsigned int len;
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* if we were initialized with the C_DecryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = gcm_InitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ intag = gcmParams->pTag;
+ PORT_Assert(tagBytes != 0);
+
+ /* verify the block */
+ rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen);
+ if (rv != SECSuccess) {
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ return SECFailure;
+ }
+ /* Don't decrypt if we can't authenticate the encrypted data!
+ * This assumes that if tagBits is may not be a multiple of 8, intag will
+ * preserve the masked off missing bits. */
+ if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) {
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ PORT_Memset(tag, 0, sizeof(tag));
+ return SECFailure;
+ }
+ PORT_Memset(tag, 0, sizeof(tag));
+ /* finish the decryption */
+ rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, AES_BLOCK_SIZE);
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ return rv;
+}
diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h
new file mode 100644
index 0000000000..21792e0b35
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.h
@@ -0,0 +1,125 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GCM_H
+#define GCM_H 1
+
+#include "blapii.h"
+#include "pkcs11t.h"
+#include <stdint.h>
+
+#ifdef NSS_X86_OR_X64
+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#undef NSS_DISABLE_SSE2
+#define NSS_DISABLE_SSE2 1
+#endif /* GCC <= 4.8 */
+
+#include <emmintrin.h> /* __m128i */
+
+#ifdef NSS_DISABLE_SSE2
+#undef NSS_DISABLE_SSE2
+#pragma GCC pop_options
+#endif /* NSS_DISABLE_SSE2 */
+#endif
+
+#ifdef __aarch64__
+#include <arm_neon.h>
+#endif
+
+#if defined(__powerpc64__)
+#include "ppc-crypto.h"
+#endif
+
+SEC_BEGIN_PROTOS
+
+#ifdef HAVE_INT128_SUPPORT
+typedef unsigned __int128 uint128_t;
+#endif
+
+typedef struct GCMContextStr GCMContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * GCMContext does not own context. context needs to remain valid for as long
+ * as the GCMContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params);
+void GCM_DestroyContext(GCMContext *gcm, PRBool freeit);
+SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus GCM_EncryptAEAD(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+SECStatus GCM_DecryptAEAD(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+
+/* These functions are here only so we can test them */
+#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */
+typedef struct gcmHashContextStr gcmHashContext;
+typedef SECStatus (*ghash_t)(gcmHashContext *, const unsigned char *,
+ unsigned int);
+pre_align struct gcmHashContextStr {
+#ifdef NSS_X86_OR_X64
+ __m128i x, h;
+#elif defined(__aarch64__)
+ uint64x2_t x, h;
+#elif defined(USE_PPC_CRYPTO)
+ vec_u64 x, h;
+#endif
+ uint64_t x_low, x_high, h_high, h_low;
+ unsigned char buffer[MAX_BLOCK_SIZE];
+ unsigned int bufLen;
+ uint8_t counterBuf[16];
+ uint64_t cLen;
+ ghash_t ghash_mul;
+ PRBool hw;
+ gcmHashContext *mem;
+} post_align;
+
+typedef struct gcmIVContextStr gcmIVContext;
+struct gcmIVContextStr {
+ PRUint64 counter;
+ PRUint64 max_count;
+ CK_GENERATOR_FUNCTION ivGen;
+ unsigned int fixedBits;
+ unsigned int ivLen;
+};
+
+SECStatus gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int len);
+SECStatus gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
+ PRBool sw);
+SECStatus gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
+ unsigned int AADLen);
+SECStatus gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout);
+
+void gcm_InitIVContext(gcmIVContext *gcmiv);
+SECStatus gcm_GenerateIV(gcmIVContext *gcmIv, unsigned char *iv,
+ unsigned int ivLen, unsigned int fixedBits,
+ CK_GENERATOR_FUNCTION ivGen);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/genload.c b/security/nss/lib/freebl/genload.c
new file mode 100644
index 0000000000..832deb58c4
--- /dev/null
+++ b/security/nss/lib/freebl/genload.c
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This file is meant to be included by other .c files.
+ * This file takes a "parameter", the scope which includes this
+ * code shall declare this variable:
+ * const char *NameOfThisSharedLib;
+ *
+ * NameOfThisSharedLib:
+ * The file name of the shared library that shall be used as the
+ * "reference library". The loader will attempt to load the requested
+ * library from the same directory as the reference library.
+ */
+
+#ifdef XP_UNIX
+#include <unistd.h>
+#define BL_MAXSYMLINKS 20
+
+/*
+ * If 'link' is a symbolic link, this function follows the symbolic links
+ * and returns the pathname of the ultimate source of the symbolic links.
+ * If 'link' is not a symbolic link, this function returns NULL.
+ * The caller should call PR_Free to free the string returned by this
+ * function.
+ */
+static char*
+loader_GetOriginalPathname(const char* link)
+{
+#ifdef __GLIBC__
+ char* tmp = realpath(link, NULL);
+ char* resolved;
+ if (!tmp)
+ return NULL;
+ resolved = PR_Malloc(strlen(tmp) + 1);
+ strcpy(resolved, tmp); /* This is necessary because PR_Free might not be using free() */
+ free(tmp);
+ return resolved;
+#else
+ char* resolved = NULL;
+ char* input = NULL;
+ PRUint32 iterations = 0;
+ PRInt32 len = 0, retlen = 0;
+ if (!link) {
+ PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
+ return NULL;
+ }
+ len = PR_MAX(1024, strlen(link) + 1);
+ resolved = PR_Malloc(len);
+ input = PR_Malloc(len);
+ if (!resolved || !input) {
+ if (resolved) {
+ PR_Free(resolved);
+ }
+ if (input) {
+ PR_Free(input);
+ }
+ return NULL;
+ }
+ strcpy(input, link);
+ while ((iterations++ < BL_MAXSYMLINKS) &&
+ ((retlen = readlink(input, resolved, len - 1)) > 0)) {
+ char* tmp = input;
+ resolved[retlen] = '\0'; /* NULL termination */
+ input = resolved;
+ resolved = tmp;
+ }
+ PR_Free(resolved);
+ if (iterations == 1 && retlen < 0) {
+ PR_Free(input);
+ input = NULL;
+ }
+ return input;
+#endif
+}
+#endif /* XP_UNIX */
+
+/*
+ * Load the library with the file name 'name' residing in the same
+ * directory as the reference library, whose pathname is 'referencePath'.
+ */
+static PRLibrary*
+loader_LoadLibInReferenceDir(const char* referencePath, const char* name)
+{
+ PRLibrary* dlh = NULL;
+ char* fullName = NULL;
+ char* c;
+ PRLibSpec libSpec;
+
+ /* Remove the trailing filename from referencePath and add the new one */
+ c = strrchr(referencePath, PR_GetDirectorySeparator());
+ if (c) {
+ size_t referencePathSize = 1 + c - referencePath;
+ fullName = (char*)PORT_Alloc(strlen(name) + referencePathSize + 1);
+ if (fullName) {
+ memcpy(fullName, referencePath, referencePathSize);
+ strcpy(fullName + referencePathSize, name);
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nAttempting to load fully-qualified %s\n",
+ fullName);
+#endif
+ libSpec.type = PR_LibSpec_Pathname;
+ libSpec.value.pathname = fullName;
+ dlh = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+ PORT_Free(fullName);
+ }
+ }
+ return dlh;
+}
+
+/*
+ * We use PR_GetLibraryFilePathname to get the pathname of the loaded
+ * shared lib that contains this function, and then do a PR_LoadLibrary
+ * with an absolute pathname for the softoken shared library.
+ */
+
+static PRLibrary*
+loader_LoadLibrary(const char* nameToLoad)
+{
+ PRLibrary* lib = NULL;
+ char* fullPath = NULL;
+ PRLibSpec libSpec;
+
+ /* Get the pathname for nameOfAlreadyLoadedLib, i.e. /usr/lib/libnss3.so
+ * PR_GetLibraryFilePathname works with either the base library name or a
+ * function pointer, depending on the platform. We can't query an exported
+ * symbol such as NSC_GetFunctionList, because on some platforms we can't
+ * find symbols in loaded implicit dependencies.
+ * But we can just get the address of this function !
+ */
+ fullPath = PR_GetLibraryFilePathname(NameOfThisSharedLib,
+ (PRFuncPtr)&loader_LoadLibrary);
+
+ if (fullPath) {
+ lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+#ifdef XP_UNIX
+ if (!lib) {
+ /*
+ * If fullPath is a symbolic link, resolve the symbolic
+ * link and try again.
+ */
+ char* originalfullPath = loader_GetOriginalPathname(fullPath);
+ if (originalfullPath) {
+ PR_Free(fullPath);
+ fullPath = originalfullPath;
+ lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+ }
+ }
+#endif
+ PR_Free(fullPath);
+ }
+ if (!lib) {
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nAttempting to load %s\n", nameToLoad);
+#endif
+ libSpec.type = PR_LibSpec_Pathname;
+ libSpec.value.pathname = nameToLoad;
+ lib = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+ }
+ if (NULL == lib) {
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nLoading failed : %s.\n", nameToLoad);
+#endif
+ }
+ return lib;
+}
diff --git a/security/nss/lib/freebl/hmacct.c b/security/nss/lib/freebl/hmacct.c
new file mode 100644
index 0000000000..a1b2ba35a0
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.c
@@ -0,0 +1,325 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "hmacct.h"
+#include "secerr.h"
+
+/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length
+ * field. (SHA-384/512 have 128-bit length.) */
+#define MAX_HASH_BIT_COUNT_BYTES 16
+
+/* constantTimeGE returns 0xff if a>=b and 0x00 otherwise, where a, b <
+ * MAX_UINT/2. */
+static unsigned char
+constantTimeGE(unsigned int a, unsigned int b)
+{
+ return PORT_CT_GE(a, b);
+}
+
+/* constantTimeEQ8 returns 0xff if a==b and 0x00 otherwise. */
+static unsigned char
+constantTimeEQ(unsigned char a, unsigned char b)
+{
+ return PORT_CT_EQ(a, b);
+}
+
+/* MAC performs a constant time SSLv3/TLS MAC of |dataLen| bytes of |data|,
+ * where |dataLen| includes both the authenticated bytes and the MAC tag from
+ * the sender. |dataLen| must be >= the length of the MAC tag.
+ *
+ * |dataTotalLen| is >= |dataLen| and also accounts for any padding bytes
+ * that may follow the sender's MAC. (Only a single block of padding may
+ * follow in SSLv3, or up to 255 bytes in TLS.)
+ *
+ * Since the results of decryption are secret information (otherwise a
+ * padding-oracle is created), this function is constant-time with respect to
+ * |dataLen|.
+ *
+ * |header| contains either the 13-byte TLS header (containing the sequence
+ * number, record type etc), or it contains the SSLv3 header with the SSLv3
+ * padding bytes etc. */
+static SECStatus
+MAC(unsigned char *mdOut,
+ unsigned int *mdOutLen,
+ unsigned int mdOutMax,
+ const SECHashObject *hashObj,
+ const unsigned char *macSecret,
+ unsigned int macSecretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *data,
+ unsigned int dataLen,
+ unsigned int dataTotalLen,
+ unsigned char isSSLv3)
+{
+ void *mdState = hashObj->create();
+ const unsigned int mdSize = hashObj->length;
+ const unsigned int mdBlockSize = hashObj->blocklength;
+ /* mdLengthSize is the number of bytes in the length field that terminates
+ * the hash.
+ *
+ * This assumes that hash functions with a 64 byte block size use a 64-bit
+ * length, and otherwise they use a 128-bit length. This is true of {MD5,
+ * SHA*} (which are all of the hash functions specified for use with TLS
+ * today). */
+ const unsigned int mdLengthSize = mdBlockSize == 64 ? 8 : 16;
+
+ const unsigned int sslv3PadLen = hashObj->type == HASH_AlgMD5 ? 48 : 40;
+
+ /* varianceBlocks is the number of blocks of the hash that we have to
+ * calculate in constant time because they could be altered by the
+ * padding value.
+ *
+ * In SSLv3, the padding must be minimal so the end of the plaintext
+ * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that
+ * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash
+ * termination (0x80 + 64-bit length) don't fit in the final block, we
+ * say that the final two blocks can vary based on the padding.
+ *
+ * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not
+ * required to be minimal. Therefore we say that the final six blocks
+ * can vary based on the padding.
+ *
+ * Later in the function, if the message is short and there obviously
+ * cannot be this many blocks then varianceBlocks can be reduced. */
+ unsigned int varianceBlocks = isSSLv3 ? 2 : 6;
+ /* From now on we're dealing with the MAC, which conceptually has 13
+ * bytes of `header' before the start of the data (TLS) or 71/75 bytes
+ * (SSLv3) */
+ const unsigned int len = dataTotalLen + headerLen;
+ /* maxMACBytes contains the maximum bytes of bytes in the MAC, including
+ * |header|, assuming that there's no padding. */
+ const unsigned int maxMACBytes = len - mdSize - 1;
+ /* numBlocks is the maximum number of hash blocks. */
+ const unsigned int numBlocks =
+ (maxMACBytes + 1 + mdLengthSize + mdBlockSize - 1) / mdBlockSize;
+ /* macEndOffset is the index just past the end of the data to be
+ * MACed. */
+ const unsigned int macEndOffset = dataLen + headerLen - mdSize;
+ /* c is the index of the 0x80 byte in the final hash block that
+ * contains application data. */
+ const unsigned int c = macEndOffset % mdBlockSize;
+ /* indexA is the hash block number that contains the 0x80 terminating
+ * value. */
+ const unsigned int indexA = macEndOffset / mdBlockSize;
+ /* indexB is the hash block number that contains the 64-bit hash
+ * length, in bits. */
+ const unsigned int indexB = (macEndOffset + mdLengthSize) / mdBlockSize;
+ /* bits is the hash-length in bits. It includes the additional hash
+ * block for the masked HMAC key, or whole of |header| in the case of
+ * SSLv3. */
+ unsigned int bits;
+ /* In order to calculate the MAC in constant time we have to handle
+ * the final blocks specially because the padding value could cause the
+ * end to appear somewhere in the final |varianceBlocks| blocks and we
+ * can't leak where. However, |numStartingBlocks| worth of data can
+ * be hashed right away because no padding value can affect whether
+ * they are plaintext. */
+ unsigned int numStartingBlocks = 0;
+ /* k is the starting byte offset into the conceptual header||data where
+ * we start processing. */
+ unsigned int k = 0;
+ unsigned char lengthBytes[MAX_HASH_BIT_COUNT_BYTES];
+ /* hmacPad is the masked HMAC key. */
+ unsigned char hmacPad[HASH_BLOCK_LENGTH_MAX];
+ unsigned char firstBlock[HASH_BLOCK_LENGTH_MAX];
+ unsigned char macOut[HASH_LENGTH_MAX];
+ unsigned i, j;
+
+ /* For SSLv3, if we're going to have any starting blocks then we need
+ * at least two because the header is larger than a single block. */
+ if (numBlocks > varianceBlocks + (isSSLv3 ? 1 : 0)) {
+ numStartingBlocks = numBlocks - varianceBlocks;
+ k = mdBlockSize * numStartingBlocks;
+ }
+
+ bits = 8 * macEndOffset;
+ hashObj->begin(mdState);
+ if (!isSSLv3) {
+ /* Compute the initial HMAC block. For SSLv3, the padding and
+ * secret bytes are included in |header| because they take more
+ * than a single block. */
+ bits += 8 * mdBlockSize;
+ memset(hmacPad, 0, mdBlockSize);
+ PORT_Assert(macSecretLen <= sizeof(hmacPad));
+ memcpy(hmacPad, macSecret, macSecretLen);
+ for (i = 0; i < mdBlockSize; i++)
+ hmacPad[i] ^= 0x36;
+ hashObj->update(mdState, hmacPad, mdBlockSize);
+ }
+
+ j = 0;
+ memset(lengthBytes, 0, sizeof(lengthBytes));
+ if (mdLengthSize == 16) {
+ j = 8;
+ }
+ if (hashObj->type == HASH_AlgMD5) {
+ /* MD5 appends a little-endian length. */
+ for (i = 0; i < 4; i++) {
+ lengthBytes[i + j] = bits >> (8 * i);
+ }
+ } else {
+ /* All other TLS hash functions use a big-endian length. */
+ for (i = 0; i < 4; i++) {
+ lengthBytes[4 + i + j] = bits >> (8 * (3 - i));
+ }
+ }
+
+ if (k > 0) {
+ if (isSSLv3) {
+ /* The SSLv3 header is larger than a single block.
+ * overhang is the number of bytes beyond a single
+ * block that the header consumes: either 7 bytes
+ * (SHA1) or 11 bytes (MD5). */
+ const unsigned int overhang = headerLen - mdBlockSize;
+ hashObj->update(mdState, header, mdBlockSize);
+ memcpy(firstBlock, header + mdBlockSize, overhang);
+ memcpy(firstBlock + overhang, data, mdBlockSize - overhang);
+ hashObj->update(mdState, firstBlock, mdBlockSize);
+ for (i = 1; i < k / mdBlockSize - 1; i++) {
+ hashObj->update(mdState, data + mdBlockSize * i - overhang,
+ mdBlockSize);
+ }
+ } else {
+ /* k is a multiple of mdBlockSize. */
+ memcpy(firstBlock, header, 13);
+ memcpy(firstBlock + 13, data, mdBlockSize - 13);
+ hashObj->update(mdState, firstBlock, mdBlockSize);
+ for (i = 1; i < k / mdBlockSize; i++) {
+ hashObj->update(mdState, data + mdBlockSize * i - 13,
+ mdBlockSize);
+ }
+ }
+ }
+
+ memset(macOut, 0, sizeof(macOut));
+
+ /* We now process the final hash blocks. For each block, we construct
+ * it in constant time. If i == indexA then we'll include the 0x80
+ * bytes and zero pad etc. For each block we selectively copy it, in
+ * constant time, to |macOut|. */
+ for (i = numStartingBlocks; i <= numStartingBlocks + varianceBlocks; i++) {
+ unsigned char block[HASH_BLOCK_LENGTH_MAX];
+ unsigned char isBlockA = constantTimeEQ(i, indexA);
+ unsigned char isBlockB = constantTimeEQ(i, indexB);
+ for (j = 0; j < mdBlockSize; j++) {
+ unsigned char isPastC = isBlockA & constantTimeGE(j, c);
+ unsigned char isPastCPlus1 = isBlockA & constantTimeGE(j, c + 1);
+ unsigned char b = 0;
+ if (k < headerLen) {
+ b = header[k];
+ } else if (k < dataTotalLen + headerLen) {
+ b = data[k - headerLen];
+ }
+ k++;
+
+ /* If this is the block containing the end of the
+ * application data, and we are at the offset for the
+ * 0x80 value, then overwrite b with 0x80. */
+ b = (b & ~isPastC) | (0x80 & isPastC);
+ /* If this the the block containing the end of the
+ * application data and we're past the 0x80 value then
+ * just write zero. */
+ b = b & ~isPastCPlus1;
+ /* If this is indexB (the final block), but not
+ * indexA (the end of the data), then the 64-bit
+ * length didn't fit into indexA and we're having to
+ * add an extra block of zeros. */
+ b &= ~isBlockB | isBlockA;
+
+ /* The final bytes of one of the blocks contains the length. */
+ if (j >= mdBlockSize - mdLengthSize) {
+ /* If this is indexB, write a length byte. */
+ b = (b & ~isBlockB) |
+ (isBlockB & lengthBytes[j - (mdBlockSize - mdLengthSize)]);
+ }
+ block[j] = b;
+ }
+
+ hashObj->update(mdState, block, mdBlockSize);
+ hashObj->end_raw(mdState, block, NULL, mdSize);
+ /* If this is indexB, copy the hash value to |macOut|. */
+ for (j = 0; j < mdSize; j++) {
+ macOut[j] |= block[j] & isBlockB;
+ }
+ }
+
+ hashObj->begin(mdState);
+
+ if (isSSLv3) {
+ /* We repurpose |hmacPad| to contain the SSLv3 pad2 block. */
+ for (i = 0; i < sslv3PadLen; i++)
+ hmacPad[i] = 0x5c;
+
+ hashObj->update(mdState, macSecret, macSecretLen);
+ hashObj->update(mdState, hmacPad, sslv3PadLen);
+ hashObj->update(mdState, macOut, mdSize);
+ } else {
+ /* Complete the HMAC in the standard manner. */
+ for (i = 0; i < mdBlockSize; i++)
+ hmacPad[i] ^= 0x6a;
+
+ hashObj->update(mdState, hmacPad, mdBlockSize);
+ hashObj->update(mdState, macOut, mdSize);
+ }
+
+ hashObj->end(mdState, mdOut, mdOutLen, mdOutMax);
+ hashObj->destroy(mdState, PR_TRUE);
+
+ PORT_Memset(lengthBytes, 0, sizeof lengthBytes);
+ PORT_Memset(hmacPad, 0, sizeof hmacPad);
+ PORT_Memset(firstBlock, 0, sizeof firstBlock);
+ PORT_Memset(macOut, 0, sizeof macOut);
+
+ return SECSuccess;
+}
+
+SECStatus
+HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (hashObj->end_raw == NULL)
+ return SECFailure;
+ return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+ header, headerLen, body, bodyLen, bodyTotalLen,
+ 0 /* not SSLv3 */);
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (hashObj->end_raw == NULL)
+ return SECFailure;
+ return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+ header, headerLen, body, bodyLen, bodyTotalLen,
+ 1 /* SSLv3 */);
+}
diff --git a/security/nss/lib/freebl/hmacct.h b/security/nss/lib/freebl/hmacct.h
new file mode 100644
index 0000000000..a773ea89c7
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.h
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _HMACCT_H_
+#define _HMACCT_H_
+
+SEC_BEGIN_PROTOS
+
+extern SECStatus HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+extern SECStatus SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/intel-aes-x64-masm.asm b/security/nss/lib/freebl/intel-aes-x64-masm.asm
new file mode 100644
index 0000000000..fe183bca03
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x64-masm.asm
@@ -0,0 +1,964 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx textequ <rcx>
+output textequ <rdx>
+input textequ <r8>
+inputLen textequ <r9d>
+
+
+aes_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
+ aesenc xmm1, xmm8
+ aesenc xmm2, xmm8
+ aesenc xmm3, xmm8
+ aesenc xmm4, xmm8
+ aesenc xmm5, xmm8
+ aesenc xmm6, xmm8
+ aesenc xmm7, xmm8
+ ENDM
+
+aes_last_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesenclast xmm0, xmm8
+ aesenclast xmm1, xmm8
+ aesenclast xmm2, xmm8
+ aesenclast xmm3, xmm8
+ aesenclast xmm4, xmm8
+ aesenclast xmm5, xmm8
+ aesenclast xmm6, xmm8
+ aesenclast xmm7, xmm8
+ ENDM
+
+aes_dec_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesdec xmm0, xmm8
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm7, xmm8
+ ENDM
+
+aes_dec_last_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesdeclast xmm0, xmm8
+ aesdeclast xmm1, xmm8
+ aesdeclast xmm2, xmm8
+ aesdeclast xmm3, xmm8
+ aesdeclast xmm4, xmm8
+ aesdeclast xmm5, xmm8
+ aesdeclast xmm6, xmm8
+ aesdeclast xmm7, xmm8
+ ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL bail
+
+ xor inputLen, inputLen
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+loop8:
+ cmp inputLen, 8*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
+
+IF enc eq 1
+ rnd textequ <aes_rnd>
+ lastrnd textequ <aes_last_rnd>
+ aesinst textequ <aesenc>
+ aeslastinst textequ <aesenclast>
+ELSE
+ rnd textequ <aes_dec_rnd>
+ lastrnd textequ <aes_dec_last_rnd>
+ aesinst textequ <aesdec>
+ aeslastinst textequ <aesdeclast>
+ENDIF
+
+ i = 1
+ WHILE i LT rnds
+ rnd i
+ i = i+1
+ ENDM
+ lastrnd rnds
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesinst xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aeslastinst xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+ENDM
+
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <rcx>
+KS textequ <rdx>
+ITR textequ <r8>
+
+intel_aes_encrypt_init_128 PROC
+
+ movdqu xmm1, [KEY]
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ lea ITR, Lcon1
+ movdqa xmm0, [ITR]
+ lea ITR, Lmask
+ movdqa xmm4, [ITR]
+
+ mov ITR, 8
+
+Lenc_128_ks_loop:
+ lea KS, [16 + KS]
+ dec ITR
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ jne Lenc_128_ks_loop
+
+ lea ITR, Lcon2
+ movdqa xmm0, [ITR]
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [16 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [32 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ ret
+intel_aes_encrypt_init_128 ENDP
+
+
+intel_aes_decrypt_init_128 PROC
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_128
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [10*16 + KS]
+ movdqu [10*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 5
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(10-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(10-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [5*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [5*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_128 ENDP
+
+
+intel_aes_encrypt_init_192 PROC
+
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
+
+ movdqu xmm1, [KEY]
+ mov ITR, [16 + KEY]
+ movd xmm3, ITR
+
+ movdqu [KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask192
+ movdqu xmm4, [ITR]
+
+ mov ITR, 4
+
+Lenc_192_ks_loop:
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqa xmm6, xmm1
+ shufpd xmm5, xmm1, 00h
+ shufpd xmm6, xmm3, 01h
+
+ movdqu [16 + KS], xmm5
+ movdqu [32 + KS], xmm6
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqu [48 + KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea KS, [48 + KS]
+
+ dec ITR
+ jnz Lenc_192_ks_loop
+
+ movdqu [16 + KS], xmm5
+
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
+ ret
+intel_aes_encrypt_init_192 ENDP
+
+intel_aes_decrypt_init_192 PROC
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_192
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [12*16 + KS]
+ movdqu [12*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 6
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(12-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(12-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [6*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [6*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_192 ENDP
+
+
+intel_aes_encrypt_init_256 PROC
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
+
+ movdqu xmm1, [16*0 + KEY]
+ movdqu xmm3, [16*1 + KEY]
+
+ movdqu [16*0 + KS], xmm1
+ movdqu [16*1 + KS], xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask256
+ movdqu xmm5, [ITR]
+
+ pxor xmm6, xmm6
+
+ mov ITR, 6
+
+Lenc_256_ks_loop:
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ pshufd xmm2, xmm1, 0ffh
+ aesenclast xmm2, xmm6
+ movdqa xmm4, xmm3
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+ movdqu [16*3 + KS], xmm3
+
+ lea KS, [32 + KS]
+ dec ITR
+ jnz Lenc_256_ks_loop
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
+ ret
+
+intel_aes_encrypt_init_256 ENDP
+
+
+intel_aes_decrypt_init_256 PROC
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_256
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [14*16 + KS]
+ movdqu [14*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 7
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(14-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(14-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [7*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [7*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_256 ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL loop1
+LOCAL bail
+
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+ movdqu xmm0, [256+ctx]
+
+ movdqu xmm2, [0*16 + ctx]
+ movdqu xmm3, [1*16 + ctx]
+ movdqu xmm4, [2*16 + ctx]
+ movdqu xmm5, [3*16 + ctx]
+ movdqu xmm6, [4*16 + ctx]
+ movdqu xmm7, [5*16 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm1, [input]
+ pxor xmm1, xmm2
+ pxor xmm0, xmm1
+
+ aesenc xmm0, xmm3
+ aesenc xmm0, xmm4
+ aesenc xmm0, xmm5
+ aesenc xmm0, xmm6
+ aesenc xmm0, xmm7
+
+ i = 6
+ WHILE i LT rnds
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
+ i = i+1
+ ENDM
+ movdqu xmm8, [rnds*16 + ctx]
+ aesenclast xmm0, xmm8
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [256+ctx], xmm0
+
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL dec1
+LOCAL bail
+
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+loop8:
+ cmp inputLen, 8*16
+ jb dec1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
+
+ i = 1
+ WHILE i LT rnds
+ aes_dec_rnd i
+ i = i+1
+ ENDM
+ aes_dec_last_rnd rnds
+
+ movdqu xmm8, [256 + ctx]
+ pxor xmm0, xmm8
+ movdqu xmm8, [0*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm7, xmm8
+ movdqu xmm8, [7*16 + input]
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+ movdqu [256 + ctx], xmm8
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+dec1:
+
+ movdqu xmm3, [256 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqa xmm4, xmm0
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesdeclast xmm0, xmm7
+ pxor xmm3, xmm0
+
+ movdqu [output], xmm3
+ movdqa xmm3, xmm4
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [256 + ctx], xmm3
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+ENDM
+
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func 10
+intel_aes_encrypt_cbc_128 ENDP
+
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func 12
+intel_aes_encrypt_cbc_192 ENDP
+
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func 14
+intel_aes_encrypt_cbc_256 ENDP
+
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func 10
+intel_aes_decrypt_cbc_128 ENDP
+
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func 12
+intel_aes_decrypt_cbc_192 ENDP
+
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func 14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <r10>
+CTR textequ <r11d>
+CTRSave textequ <eax>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL enc1
+LOCAL bail
+
+ mov input, [rsp + 8*1 + 4*8]
+ mov inputLen, [rsp + 8*1 + 5*8]
+
+ mov ctrCtx, ctx
+ mov ctx, [8+ctrCtx]
+
+ sub rsp, 3*16
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+
+ push rbp
+ mov rbp, rsp
+ sub rsp, 8*16
+ and rsp, -16
+
+
+ movdqu xmm0, [16+ctrCtx]
+ mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
+ bswap CTRSave
+ movdqu xmm1, [ctx + 0*16]
+
+ pxor xmm0, xmm1
+
+ movdqa [rsp + 0*16], xmm0
+ movdqa [rsp + 1*16], xmm0
+ movdqa [rsp + 2*16], xmm0
+ movdqa [rsp + 3*16], xmm0
+ movdqa [rsp + 4*16], xmm0
+ movdqa [rsp + 5*16], xmm0
+ movdqa [rsp + 6*16], xmm0
+ movdqa [rsp + 7*16], xmm0
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 1*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 2*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 3*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 4*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 5*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 6*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 7*16 + 3*4], CTR
+
+
+loop8:
+ cmp inputLen, 8*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + rsp]
+ movdqu xmm1, [1*16 + rsp]
+ movdqu xmm2, [2*16 + rsp]
+ movdqu xmm3, [3*16 + rsp]
+ movdqu xmm4, [4*16 + rsp]
+ movdqu xmm5, [5*16 + rsp]
+ movdqu xmm6, [6*16 + rsp]
+ movdqu xmm7, [7*16 + rsp]
+
+ i = 1
+ WHILE i LE 8
+ aes_rnd i
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
+
+ i = i+1
+ ENDM
+ WHILE i LT rnds
+ aes_rnd i
+ i = i+1
+ ENDM
+ aes_last_rnd rnds
+
+ movdqu xmm8, [0*16 + input]
+ pxor xmm0, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [7*16 + input]
+ pxor xmm7, xmm8
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [rsp]
+ add rsp, 16
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu xmm7, [input]
+ pxor xmm0, xmm7
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+
+ movdqu xmm0, [rsp]
+ movdqu xmm1, [ctx + 0*16]
+ pxor xmm0, xmm1
+ movdqu [16+ctrCtx], xmm0
+
+
+ xor rax, rax
+ mov rsp, rbp
+ pop rbp
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+
+ ret
+ENDM
+
+
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func 10
+intel_aes_encrypt_ctr_128 ENDP
+
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func 12
+intel_aes_encrypt_ctr_192 ENDP
+
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func 14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes-x86-masm.asm b/security/nss/lib/freebl/intel-aes-x86-masm.asm
new file mode 100644
index 0000000000..790c951e7c
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x86-masm.asm
@@ -0,0 +1,942 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx textequ <ecx>
+output textequ <edx>
+input textequ <eax>
+inputLen textequ <edi>
+
+
+aes_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ aesenc xmm1, xmm7
+ aesenc xmm2, xmm7
+ aesenc xmm3, xmm7
+ aesenc xmm4, xmm7
+ aesenc xmm5, xmm7
+ aesenc xmm6, xmm7
+ ENDM
+
+aes_last_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesenclast xmm0, xmm7
+ aesenclast xmm1, xmm7
+ aesenclast xmm2, xmm7
+ aesenclast xmm3, xmm7
+ aesenclast xmm4, xmm7
+ aesenclast xmm5, xmm7
+ aesenclast xmm6, xmm7
+ ENDM
+
+aes_dec_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ aesdec xmm1, xmm7
+ aesdec xmm2, xmm7
+ aesdec xmm3, xmm7
+ aesdec xmm4, xmm7
+ aesdec xmm5, xmm7
+ aesdec xmm6, xmm7
+ ENDM
+
+aes_dec_last_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesdeclast xmm0, xmm7
+ aesdeclast xmm1, xmm7
+ aesdeclast xmm2, xmm7
+ aesdeclast xmm3, xmm7
+ aesdeclast xmm4, xmm7
+ aesdeclast xmm5, xmm7
+ aesdeclast xmm6, xmm7
+ ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+loop7:
+ cmp inputLen, 7*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+ pxor xmm1, xmm7
+ pxor xmm2, xmm7
+ pxor xmm3, xmm7
+ pxor xmm4, xmm7
+ pxor xmm5, xmm7
+ pxor xmm6, xmm7
+
+IF enc eq 1
+ rnd textequ <aes_rnd>
+ lastrnd textequ <aes_last_rnd>
+ aesinst textequ <aesenc>
+ aeslastinst textequ <aesenclast>
+ELSE
+ rnd textequ <aes_dec_rnd>
+ lastrnd textequ <aes_dec_last_rnd>
+ aesinst textequ <aesdec>
+ aeslastinst textequ <aesdeclast>
+ENDIF
+
+ i = 1
+ WHILE i LT rnds
+ rnd i
+ i = i+1
+ ENDM
+ lastrnd rnds
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesinst xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aeslastinst xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ xor eax, eax
+ pop inputLen
+ ret
+
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <ecx>
+KS textequ <edx>
+ITR textequ <eax>
+
+ALIGN 16
+intel_aes_encrypt_init_128 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+
+ movdqu xmm1, [KEY]
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ lea ITR, Lcon1
+ movdqa xmm0, [ITR]
+ lea ITR, Lmask
+ movdqa xmm4, [ITR]
+
+ mov ITR, 8
+
+Lenc_128_ks_loop:
+ lea KS, [16 + KS]
+ dec ITR
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ jne Lenc_128_ks_loop
+
+ lea ITR, Lcon2
+ movdqa xmm0, [ITR]
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [16 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [32 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ ret
+intel_aes_encrypt_init_128 ENDP
+
+
+ALIGN 16
+intel_aes_decrypt_init_128 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_128
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [10*16 + KS]
+ movdqu [10*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 5
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(10-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(10-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [5*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [5*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_128 ENDP
+
+
+ALIGN 16
+intel_aes_encrypt_init_192 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ pxor xmm3, xmm3
+ movdqu xmm1, [KEY]
+ pinsrd xmm3, DWORD PTR [16 + KEY], 0
+ pinsrd xmm3, DWORD PTR [20 + KEY], 1
+
+ movdqu [KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask192
+ movdqu xmm4, [ITR]
+
+ mov ITR, 4
+
+Lenc_192_ks_loop:
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqa xmm6, xmm1
+ shufpd xmm5, xmm1, 00h
+ shufpd xmm6, xmm3, 01h
+
+ movdqu [16 + KS], xmm5
+ movdqu [32 + KS], xmm6
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqu [48 + KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea KS, [48 + KS]
+
+ dec ITR
+ jnz Lenc_192_ks_loop
+
+ movdqu [16 + KS], xmm5
+ret
+intel_aes_encrypt_init_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_192 PROC
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_192
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [12*16 + KS]
+ movdqu [12*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 6
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(12-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(12-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [6*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [6*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_init_256 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+ movdqu xmm1, [16*0 + KEY]
+ movdqu xmm3, [16*1 + KEY]
+
+ movdqu [16*0 + KS], xmm1
+ movdqu [16*1 + KS], xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask256
+ movdqu xmm5, [ITR]
+
+ pxor xmm6, xmm6
+
+ mov ITR, 6
+
+Lenc_256_ks_loop:
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ pshufd xmm2, xmm1, 0ffh
+ aesenclast xmm2, xmm6
+ movdqa xmm4, xmm3
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+ movdqu [16*3 + KS], xmm3
+
+ lea KS, [32 + KS]
+ dec ITR
+ jnz Lenc_256_ks_loop
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ ret
+intel_aes_encrypt_init_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_256 PROC
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_256
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [14*16 + KS]
+ movdqu [14*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 7
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(14-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(14-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [7*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [7*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_256 ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL loop1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+ movdqu xmm0, [252+ctx]
+
+ movdqu xmm2, [0*16 + ctx]
+ movdqu xmm3, [1*16 + ctx]
+ movdqu xmm4, [2*16 + ctx]
+ movdqu xmm5, [3*16 + ctx]
+ movdqu xmm6, [4*16 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm1, [input]
+ pxor xmm1, xmm2
+ pxor xmm0, xmm1
+
+ aesenc xmm0, xmm3
+ aesenc xmm0, xmm4
+ aesenc xmm0, xmm5
+ aesenc xmm0, xmm6
+
+ i = 5
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [252+ctx], xmm0
+
+ xor eax, eax
+ pop inputLen
+ ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL dec1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+loop7:
+ cmp inputLen, 7*16
+ jb dec1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+ pxor xmm1, xmm7
+ pxor xmm2, xmm7
+ pxor xmm3, xmm7
+ pxor xmm4, xmm7
+ pxor xmm5, xmm7
+ pxor xmm6, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ aes_dec_rnd i
+ i = i+1
+ ENDM
+ aes_dec_last_rnd rnds
+
+ movdqu xmm7, [252 + ctx]
+ pxor xmm0, xmm7
+ movdqu xmm7, [0*16 + input]
+ pxor xmm1, xmm7
+ movdqu xmm7, [1*16 + input]
+ pxor xmm2, xmm7
+ movdqu xmm7, [2*16 + input]
+ pxor xmm3, xmm7
+ movdqu xmm7, [3*16 + input]
+ pxor xmm4, xmm7
+ movdqu xmm7, [4*16 + input]
+ pxor xmm5, xmm7
+ movdqu xmm7, [5*16 + input]
+ pxor xmm6, xmm7
+ movdqu xmm7, [6*16 + input]
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [252 + ctx], xmm7
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+dec1:
+
+ movdqu xmm3, [252 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqa xmm4, xmm0
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesdeclast xmm0, xmm7
+ pxor xmm3, xmm0
+
+ movdqu [output], xmm3
+ movdqa xmm3, xmm4
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [252 + ctx], xmm3
+ xor eax, eax
+ pop inputLen
+ ret
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func 10
+intel_aes_encrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func 12
+intel_aes_encrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func 14
+intel_aes_encrypt_cbc_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func 10
+intel_aes_decrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func 12
+intel_aes_decrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func 14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <esi>
+CTR textequ <ebx>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL enc1
+LOCAL bail
+
+ push inputLen
+ push ctrCtx
+ push CTR
+ push ebp
+
+ mov ctrCtx, [esp + 4*5 + 0*4]
+ mov output, [esp + 4*5 + 1*4]
+ mov input, [esp + 4*5 + 4*4]
+ mov inputLen, [esp + 4*5 + 5*4]
+
+ mov ctx, [4+ctrCtx]
+
+ mov ebp, esp
+ sub esp, 7*16
+ and esp, -16
+
+ movdqu xmm0, [8+ctrCtx]
+ mov ctrCtx, [ctrCtx + 8 + 3*4]
+ bswap ctrCtx
+ movdqu xmm1, [ctx + 0*16]
+
+ pxor xmm0, xmm1
+
+ movdqa [esp + 0*16], xmm0
+ movdqa [esp + 1*16], xmm0
+ movdqa [esp + 2*16], xmm0
+ movdqa [esp + 3*16], xmm0
+ movdqa [esp + 4*16], xmm0
+ movdqa [esp + 5*16], xmm0
+ movdqa [esp + 6*16], xmm0
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 1*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 2*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 3*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 4*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 5*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 6*16 + 3*4], CTR
+
+
+loop7:
+ cmp inputLen, 7*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + esp]
+ movdqu xmm1, [1*16 + esp]
+ movdqu xmm2, [2*16 + esp]
+ movdqu xmm3, [3*16 + esp]
+ movdqu xmm4, [4*16 + esp]
+ movdqu xmm5, [5*16 + esp]
+ movdqu xmm6, [6*16 + esp]
+
+ i = 1
+ WHILE i LE 7
+ aes_rnd i
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + (i-1)*16 + 3*4], CTR
+
+ i = i+1
+ ENDM
+ WHILE i LT rnds
+ aes_rnd i
+ i = i+1
+ ENDM
+ aes_last_rnd rnds
+
+ movdqu xmm7, [0*16 + input]
+ pxor xmm0, xmm7
+ movdqu xmm7, [1*16 + input]
+ pxor xmm1, xmm7
+ movdqu xmm7, [2*16 + input]
+ pxor xmm2, xmm7
+ movdqu xmm7, [3*16 + input]
+ pxor xmm3, xmm7
+ movdqu xmm7, [4*16 + input]
+ pxor xmm4, xmm7
+ movdqu xmm7, [5*16 + input]
+ pxor xmm5, xmm7
+ movdqu xmm7, [6*16 + input]
+ pxor xmm6, xmm7
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [esp]
+ add esp, 16
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu xmm7, [input]
+ pxor xmm0, xmm7
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+
+ mov ctrCtx, [ebp + 4*5 + 0*4]
+ movdqu xmm0, [esp]
+ movdqu xmm1, [ctx + 0*16]
+ pxor xmm0, xmm1
+ movdqu [8+ctrCtx], xmm0
+
+
+ xor eax, eax
+ mov esp, ebp
+ pop ebp
+ pop CTR
+ pop ctrCtx
+ pop inputLen
+ ret
+ENDM
+
+
+ALIGN 16
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func 10
+intel_aes_encrypt_ctr_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func 12
+intel_aes_encrypt_ctr_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func 14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes.h b/security/nss/lib/freebl/intel-aes.h
new file mode 100644
index 0000000000..e7fe1a03ba
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.h
@@ -0,0 +1,143 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Prototypes of the functions defined in the assembler file. */
+void intel_aes_encrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+SECStatus intel_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_128(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_192(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+
+#define native_aes_ecb_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128 \
+ : (keysize) == 24 ? intel_aes_encrypt_ecb_192 \
+ : intel_aes_encrypt_ecb_256) \
+ : ((keysize) == 16 ? intel_aes_decrypt_ecb_128 \
+ : (keysize) == 24 ? intel_aes_decrypt_ecb_192 \
+ : intel_aes_decrypt_ecb_256))
+
+#define native_aes_cbc_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128 \
+ : (keysize) == 24 ? intel_aes_encrypt_cbc_192 \
+ : intel_aes_encrypt_cbc_256) \
+ : ((keysize) == 16 ? intel_aes_decrypt_cbc_128 \
+ : (keysize) == 24 ? intel_aes_decrypt_cbc_192 \
+ : intel_aes_decrypt_cbc_256))
+
+#define intel_aes_ctr_worker(nr) \
+ ((nr) == 10 ? intel_aes_encrypt_ctr_128 \
+ : (nr) == 12 ? intel_aes_encrypt_ctr_192 \
+ : intel_aes_encrypt_ctr_256)
+
+#define native_aes_init(encrypt, keysize) \
+ do { \
+ if (encrypt) { \
+ if (keysize == 16) \
+ intel_aes_encrypt_init_128(key, cx->k.expandedKey); \
+ else if (keysize == 24) \
+ intel_aes_encrypt_init_192(key, cx->k.expandedKey); \
+ else \
+ intel_aes_encrypt_init_256(key, cx->k.expandedKey); \
+ } else { \
+ if (keysize == 16) \
+ intel_aes_decrypt_init_128(key, cx->k.expandedKey); \
+ else if (keysize == 24) \
+ intel_aes_decrypt_init_192(key, cx->k.expandedKey); \
+ else \
+ intel_aes_decrypt_init_256(key, cx->k.expandedKey); \
+ } \
+ } while (0)
diff --git a/security/nss/lib/freebl/intel-aes.s b/security/nss/lib/freebl/intel-aes.s
new file mode 100644
index 0000000000..b242d233fe
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.s
@@ -0,0 +1,2485 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+ .text
+
+#define IV_OFFSET 256
+
+/*
+ * Warning: the length values used in this module are "unsigned int"
+ * in C, which is 32-bit. When they're passed in registers, use only
+ * the low 32 bits, because the top half is unspecified.
+ *
+ * This is called from C code, so the contents of those bits can
+ * depend on the C compiler's optimization decisions. This means that
+ * mistakes might not be obvious in testing if those bits happen to be
+ * zero in your build.
+ *
+ * Exception: 32-bit lea instructions use a 64-bit address because the
+ * address size doesn't affect the result, and that form is more
+ * compactly encoded and preferred by compilers over a 32-bit address.
+ */
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_128,@function
+ .globl intel_aes_encrypt_init_128
+ .align 16
+intel_aes_encrypt_init_128:
+ movups (%rdi), %xmm1
+ movups %xmm1, (%rsi)
+ leaq 16(%rsi), %rsi
+ xorl %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
+ call key_expansion128
+
+ ret
+ .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_128,@function
+ .globl intel_aes_decrypt_init_128
+ .align 16
+intel_aes_decrypt_init_128:
+ movups (%rdi), %xmm1
+ movups %xmm1, (%rsi)
+ leaq 16(%rsi), %rsi
+ xorl %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
+ call key_expansion128
+
+ ret
+ .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
+
+
+ .type key_expansion128,@function
+ .align 16
+key_expansion128:
+ movd %eax, %xmm3
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm3
+ pxor %xmm3, %xmm1
+ shufps $0x8c, %xmm1, %xmm3
+ pxor %xmm2, %xmm1
+ pxor %xmm3, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ ret
+ .size key_expansion128, .-key_expansion128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_128,@function
+ .globl intel_aes_encrypt_ecb_128
+ .align 16
+intel_aes_encrypt_ecb_128:
+ movdqu (%rdi), %xmm2
+ movdqu 160(%rdi), %xmm12
+ xor %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_128,@function
+ .globl intel_aes_decrypt_ecb_128
+ .align 16
+intel_aes_decrypt_ecb_128:
+ movdqu (%rdi), %xmm2
+ movdqu 160(%rdi), %xmm12
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm12, %xmm3
+ pxor %xmm12, %xmm4
+ pxor %xmm12, %xmm5
+ pxor %xmm12, %xmm6
+ pxor %xmm12, %xmm7
+ pxor %xmm12, %xmm8
+ pxor %xmm12, %xmm9
+ pxor %xmm12, %xmm10
+
+// complete loop unrolling
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm12, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_128,@function
+ .globl intel_aes_encrypt_cbc_128
+ .align 16
+intel_aes_encrypt_cbc_128:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_128,@function
+ .globl intel_aes_decrypt_cbc_128
+ .align 16
+intel_aes_decrypt_cbc_128:
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0 /* iv */
+ movdqu (%rdi), %xmm2 /* first key block */
+ movdqu 160(%rdi), %xmm12 /* last key block */
+ xorl %eax, %eax
+ cmpl $128, %r9d
+ jb 1f
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
+ movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm12, %xmm3
+ pxor %xmm12, %xmm4
+ pxor %xmm12, %xmm5
+ pxor %xmm12, %xmm6
+ pxor %xmm12, %xmm7
+ pxor %xmm12, %xmm8
+ pxor %xmm12, %xmm9
+ pxor %xmm12, %xmm10
+
+// complete loop unrolling
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ movdqa %xmm1, %xmm13
+ pxor %xmm12, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ pxor %xmm0, %xmm1
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm13, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_192,@function
+ .globl intel_aes_encrypt_init_192
+ .align 16
+intel_aes_encrypt_init_192:
+ movdqu (%rdi), %xmm1
+ movq 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movq %xmm3, 16(%rsi)
+ leaq 24(%rsi), %rsi
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
+ call key_expansion192
+
+ ret
+ .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_192,@function
+ .globl intel_aes_decrypt_init_192
+ .align 16
+intel_aes_decrypt_init_192:
+ movdqu (%rdi), %xmm1
+ movq 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movq %xmm3, 16(%rsi)
+ leaq 24(%rsi), %rsi
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
+ call key_expansion192
+
+ ret
+ .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
+
+
+ .type key_expansion192,@function
+ .align 16
+key_expansion192:
+ pshufd $0x55, %xmm2, %xmm2
+ xor %eax, %eax
+ movd %eax, %xmm4
+ shufps $0x10, %xmm1, %xmm4
+ pxor %xmm4, %xmm1
+ shufps $0x8c, %xmm1, %xmm4
+ pxor %xmm2, %xmm1
+ pxor %xmm4, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+
+ pshufd $0xff, %xmm1, %xmm4
+ movd %eax, %xmm5
+ shufps $0x00, %xmm3, %xmm5
+ shufps $0x08, %xmm3, %xmm5
+ pxor %xmm4, %xmm3
+ pxor %xmm5, %xmm3
+ movq %xmm3, (%rsi)
+ addq $8, %rsi
+ ret
+ .size key_expansion192, .-key_expansion192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_192,@function
+ .globl intel_aes_encrypt_ecb_192
+ .align 16
+intel_aes_encrypt_ecb_192:
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_192,@function
+ .globl intel_aes_decrypt_ecb_192
+ .align 16
+intel_aes_decrypt_ecb_192:
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm14, %xmm3
+ pxor %xmm14, %xmm4
+ pxor %xmm14, %xmm5
+ pxor %xmm14, %xmm6
+ pxor %xmm14, %xmm7
+ pxor %xmm14, %xmm8
+ pxor %xmm14, %xmm9
+ pxor %xmm14, %xmm10
+
+// complete loop unrolling
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm14, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_192,@function
+ .globl intel_aes_encrypt_cbc_192
+ .align 16
+intel_aes_encrypt_cbc_192:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+ movdqu 192(%rdi), %xmm14
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %exx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_192,@function
+ .globl intel_aes_decrypt_cbc_192
+ .align 16
+intel_aes_decrypt_cbc_192:
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+ cmpl $128, %r9d
+ jb 1f
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm14, %xmm3
+ pxor %xmm14, %xmm4
+ pxor %xmm14, %xmm5
+ pxor %xmm14, %xmm6
+ pxor %xmm14, %xmm7
+ pxor %xmm14, %xmm8
+ pxor %xmm14, %xmm9
+ pxor %xmm14, %xmm10
+
+// complete loop unrolling
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ movdqa %xmm1, %xmm15
+ pxor %xmm14, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ pxor %xmm0, %xmm1
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm15, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_256,@function
+ .globl intel_aes_encrypt_init_256
+ .align 16
+intel_aes_encrypt_init_256:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm3, 16(%rsi)
+ leaq 32(%rsi), %rsi
+ xor %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ pxor %xmm6, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ ret
+ .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_256,@function
+ .globl intel_aes_decrypt_init_256
+ .align 16
+intel_aes_decrypt_init_256:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
+ movdqu %xmm4, 16(%rsi)
+ leaq 32(%rsi), %rsi
+ xor %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ pxor %xmm6, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ ret
+ .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
+
+
+ .type key_expansion256,@function
+ .align 16
+key_expansion256:
+ movd %eax, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ addq $16, %rsi
+ .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
+ pshufd $0xaa, %xmm4, %xmm4
+ shufps $0x10, %xmm3, %xmm6
+ pxor %xmm6, %xmm3
+ shufps $0x8c, %xmm3, %xmm6
+ pxor %xmm4, %xmm3
+ pxor %xmm6, %xmm3
+ movdqu %xmm3, (%rsi)
+ addq $16, %rsi
+ ret
+ .size key_expansion256, .-key_expansion256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_256,@function
+ .globl intel_aes_encrypt_ecb_256
+ .align 16
+intel_aes_encrypt_ecb_256:
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 208(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu (%rdi), %xmm8
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm8, %xmm1
+ movdqu 112(%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_256,@function
+ .globl intel_aes_decrypt_ecb_256
+ .align 16
+intel_aes_decrypt_ecb_256:
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm15, %xmm3
+ pxor %xmm15, %xmm4
+ pxor %xmm15, %xmm5
+ pxor %xmm15, %xmm6
+ pxor %xmm15, %xmm7
+ pxor %xmm15, %xmm8
+ pxor %xmm15, %xmm9
+ pxor %xmm15, %xmm10
+
+// complete loop unrolling
+ movdqu 208(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 112(%rdi), %xmm8
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm15, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
+ movdqu 112(%rdi), %xmm8
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_256,@function
+ .globl intel_aes_encrypt_cbc_256
+ .align 16
+intel_aes_encrypt_cbc_256:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm8
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+ movdqu 224(%rdi), %xmm15
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm8, %xmm1
+ movdqu 112(%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_256,@function
+ .globl intel_aes_decrypt_cbc_256
+ .align 16
+intel_aes_decrypt_cbc_256:
+// leaq IV_OFFSET(%rdi), %rdx
+ leaq 256(%rdi), %rdx
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm15, %xmm3
+ pxor %xmm15, %xmm4
+ pxor %xmm15, %xmm5
+ pxor %xmm15, %xmm6
+ pxor %xmm15, %xmm7
+ pxor %xmm15, %xmm8
+ pxor %xmm15, %xmm9
+ pxor %xmm15, %xmm10
+
+// complete loop unrolling
+ movdqu 208(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 112(%rdi), %xmm8
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm15, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
+ movdqu 112(%rdi), %xmm8
+ pxor %xmm0, %xmm1
+ movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
+ movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
diff --git a/security/nss/lib/freebl/intel-gcm-wrap.c b/security/nss/lib/freebl/intel-gcm-wrap.c
new file mode 100644
index 0000000000..5adbd81f74
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-wrap.c
@@ -0,0 +1,475 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Copyright(c) 2013, Intel Corp. */
+
+/* Wrapper functions for Intel optimized implementation of AES-GCM */
+
+#ifdef USE_HW_AES
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+#include "intel-gcm.h"
+#include "rijndael.h"
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+
+struct intel_AES_GCMContextStr {
+ unsigned char Htbl[16 * AES_BLOCK_SIZE];
+ unsigned char X0[AES_BLOCK_SIZE];
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned char CTR[AES_BLOCK_SIZE];
+ AESContext *aes_context;
+ unsigned long tagBits;
+ unsigned long Alen;
+ unsigned long Mlen;
+ freeblCipherFunc cipher;
+ PRBool ctr_context_init;
+ gcmIVContext gcm_iv;
+};
+
+SECStatus intel_aes_gcmInitCounter(intel_AES_GCMContext *gcm,
+ const unsigned char *iv,
+ unsigned long ivLen, unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen);
+
+intel_AES_GCMContext *
+intel_AES_GCM_CreateContext(void *context,
+ freeblCipherFunc cipher,
+ const unsigned char *params)
+{
+ intel_AES_GCMContext *gcm = NULL;
+ AESContext *aes = (AESContext *)context;
+ const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params;
+ SECStatus rv;
+
+ gcm = PORT_ZNew(intel_AES_GCMContext);
+ if (gcm == NULL) {
+ return NULL;
+ }
+
+ /* initialize context fields */
+ gcm->aes_context = aes;
+ gcm->cipher = cipher;
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+ gcm->ctr_context_init = PR_FALSE;
+
+ /* first prepare H and its derivatives for ghash */
+ intel_aes_gcmINIT(gcm->Htbl, (unsigned char *)aes->k.expandedKey, aes->Nr);
+
+ gcm_InitIVContext(&gcm->gcm_iv);
+
+ /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE
+ * style context, in which we initialize the key once, then do separate
+ * iv/aad's for each message. If we are doing that kind of operation,
+ * we've finished with init here. We'll init the Counter in each AEAD
+ * call */
+ if (gcmParams == NULL) {
+ return gcm;
+ }
+
+ rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv,
+ gcmParams->ulIvLen, gcmParams->ulTagBits,
+ gcmParams->pAAD, gcmParams->ulAADLen);
+ if (rv != SECSuccess) {
+ PORT_Free(gcm);
+ return NULL;
+ }
+ gcm->ctr_context_init = PR_TRUE;
+
+ return gcm;
+}
+
+SECStatus
+intel_aes_gcmInitCounter(intel_AES_GCMContext *gcm,
+ const unsigned char *iv, unsigned long ivLen,
+ unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen)
+{
+ unsigned char buff[AES_BLOCK_SIZE]; /* aux buffer */
+ unsigned long IV_whole_len = ivLen & (~0xful);
+ unsigned int IV_remainder_len = ivLen & 0xful;
+ unsigned long AAD_whole_len = aadLen & (~0xful);
+ unsigned int AAD_remainder_len = aadLen & 0xful;
+ unsigned int j;
+ __m128i BSWAP_MASK = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ __m128i ONE = _mm_set_epi32(0, 0, 0, 1);
+ SECStatus rv;
+
+ if (ivLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (tagBits != 128 && tagBits != 120 && tagBits != 112 &&
+ tagBits != 104 && tagBits != 96 && tagBits != 64 &&
+ tagBits != 32) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ gcm->tagBits = tagBits;
+
+ /* reset the aad and message length counters */
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+
+ // Limit AADLen in accordance with SP800-38D
+ if (sizeof(AAD_whole_len) >= 8 && AAD_whole_len > (1ULL << 61) - 1) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ /* Initial TAG value is zero */
+ _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+ _mm_storeu_si128((__m128i *)gcm->X0, _mm_setzero_si128());
+
+ /* Init the counter */
+ if (ivLen == 12) {
+ _mm_storeu_si128((__m128i *)gcm->CTR,
+ _mm_setr_epi32(((unsigned int *)iv)[0],
+ ((unsigned int *)iv)[1],
+ ((unsigned int *)iv)[2],
+ 0x01000000));
+ } else {
+ /* If IV size is not 96 bits, then the initial counter value is GHASH
+ * of the IV */
+ intel_aes_gcmAAD(gcm->Htbl, (unsigned char *)iv, IV_whole_len, gcm->T);
+
+ /* Partial block */
+ if (IV_remainder_len) {
+ PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+ PORT_Memcpy(buff, iv + IV_whole_len, IV_remainder_len);
+ intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+ }
+
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ ivLen,
+ 0,
+ gcm->X0,
+ gcm->CTR);
+
+ /* TAG should be zero again */
+ _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+ }
+
+ /* Encrypt the initial counter, will be used to encrypt the GHASH value,
+ * in the end */
+ rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* Promote the counter by 1 */
+ _mm_storeu_si128((__m128i *)gcm->CTR, _mm_shuffle_epi8(_mm_add_epi32(ONE, _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)gcm->CTR), BSWAP_MASK)), BSWAP_MASK));
+
+ /* Now hash AAD - it would actually make sense to seperate the context
+ * creation from the AAD, because that would allow to reuse the H, which
+ * only changes when the AES key changes, and not every package, like the
+ * IV and AAD */
+ intel_aes_gcmAAD(gcm->Htbl, (unsigned char *)aad, AAD_whole_len, gcm->T);
+ if (AAD_remainder_len) {
+ PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+ PORT_Memcpy(buff, aad + AAD_whole_len, AAD_remainder_len);
+ intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+ }
+ gcm->Alen += aadLen;
+ return SECSuccess;
+}
+
+void
+intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit)
+{
+ PORT_Memset(gcm, 0, sizeof(intel_AES_GCMContext));
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+SECStatus
+intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned int j;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4) {
+ unsigned long long inlen_ull = inlen;
+ if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ intel_aes_gcmENC(
+ inbuf,
+ outbuf,
+ gcm,
+ inlen);
+
+ gcm->Mlen += inlen;
+
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ *outlen = inlen + tagBytes;
+
+ for (j = 0; j < tagBytes; j++) {
+ outbuf[inlen + j] = T[j];
+ }
+ return SECSuccess;
+}
+
+SECStatus
+intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4) {
+ unsigned long long inlen_ull = inlen;
+ if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ intel_aes_gcmDEC(
+ inbuf,
+ outbuf,
+ gcm,
+ inlen);
+
+ gcm->Mlen += inlen;
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
+
+SECStatus
+intel_AES_GCM_EncryptAEAD(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4) {
+ unsigned long long inlen_ull = inlen;
+ if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* if we were initialized with the C_EncryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulIvFixedBits, gcmParams->ivGenerator);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ intel_aes_gcmENC(inbuf, outbuf, gcm, inlen);
+
+ gcm->Mlen += inlen;
+
+ intel_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ *outlen = inlen;
+ PORT_Memcpy(gcmParams->pTag, T, tagBytes);
+ return SECSuccess;
+}
+
+SECStatus
+intel_AES_GCM_DecryptAEAD(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* if we were initialized with the C_DecryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4) {
+ unsigned long long inlen_ull = inlen;
+ if (inlen_ull >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = intel_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ intag = gcmParams->pTag;
+ PORT_Assert(tagBytes != 0);
+
+ intel_aes_gcmDEC(inbuf, outbuf, gcm, inlen);
+
+ gcm->Mlen += inlen;
+ intel_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm-x64-masm.asm b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
new file mode 100644
index 0000000000..07ddefbc1e
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
@@ -0,0 +1,1294 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lone dq 1,0
+Ltwo dq 2,0
+Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+ vpclmulqdq TMP1, SRC2, SRC1, 0h
+ vpclmulqdq TMP4, SRC2, SRC1, 011h
+
+ vpshufd TMP2, SRC2, 78
+ vpshufd TMP3, SRC1, 78
+ vpxor TMP2, TMP2, SRC2
+ vpxor TMP3, TMP3, SRC1
+
+ vpclmulqdq TMP2, TMP2, TMP3, 0h
+ vpxor TMP2, TMP2, TMP1
+ vpxor TMP2, TMP2, TMP4
+
+ vpslldq TMP3, TMP2, 8
+ vpsrldq TMP2, TMP2, 8
+
+ vpxor TMP1, TMP1, TMP3
+ vpxor TMP4, TMP4, TMP2
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpxor DST, TMP1, TMP4
+
+ ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+; unsigned char *Tp,
+; unsigned int Mlen,
+; unsigned int Alen,
+; unsigned char *X0,
+; unsigned char *TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl textequ <rcx>
+Tp textequ <rdx>
+Mlen textequ <r8>
+Alen textequ <r9>
+X0 textequ <r10>
+TAG textequ <r11>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ mov X0, [rsp + 1*8 + 4*8]
+ mov TAG, [rsp + 1*8 + 5*8]
+
+ vzeroupper
+ vmovdqu T, XMMWORD PTR[Tp]
+ vpxor TMP0, TMP0, TMP0
+
+ shl Mlen, 3
+ shl Alen, 3
+
+ ;vpinsrq TMP0, TMP0, Mlen, 0
+ ;vpinsrq TMP0, TMP0, Alen, 1
+ ; workaround the ml64.exe vpinsrq issue
+ vpinsrd TMP0, TMP0, r8d, 0
+ vpinsrd TMP0, TMP0, r9d, 2
+ shr Mlen, 32
+ shr Alen, 32
+ vpinsrd TMP0, TMP0, r8d, 1
+ vpinsrd TMP0, TMP0, r9d, 3
+
+ vpxor T, T, TMP0
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+ vpshufb T, T, [Lbswap_mask]
+ vpxor T, T, [X0]
+ vmovdqu XMMWORD PTR[TAG], T
+ vzeroupper
+
+ ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl textequ <rcx>
+KS textequ <rdx>
+NR textequ <r8d>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ vzeroupper
+ ; AES-ENC(0)
+ vmovdqu T, XMMWORD PTR[KS]
+ lea KS, [16 + KS]
+ dec NR
+Lenc_loop:
+ vaesenc T, T, [KS]
+ lea KS, [16 + KS]
+ dec NR
+ jnz Lenc_loop
+
+ vaesenclast T, T, [KS]
+ vpshufb T, T, [Lbswap_mask]
+
+ ;Calculate H` = GFMUL(H, 2)
+ vpsrad xmm3, T, 31
+ vpshufd xmm3, xmm3, 0ffh
+ vpand xmm5, xmm3, [Lpoly]
+ vpsrld xmm3, T, 31
+ vpslld xmm4, T, 1
+ vpslldq xmm3, xmm3, 4
+ vpxor T, xmm4, xmm3
+ vpxor T, T, xmm5
+
+ vmovdqu TMP0, T
+ vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+ i = 1
+ WHILE i LT 8
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+ vmovdqu XMMWORD PTR[Htbl + i*16], T
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+ i = i+1
+ ENDM
+ vzeroupper
+ ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl textequ <rcx>
+inp textequ <rdx>
+len textequ <r8>
+Tp textequ <r9>
+hlp0 textequ <r10>
+
+DATA textequ <xmm0>
+T textequ <xmm1>
+TMP0 textequ <xmm2>
+TMP1 textequ <xmm3>
+TMP2 textequ <xmm4>
+TMP3 textequ <xmm5>
+TMP4 textequ <xmm6>
+Xhi textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+ test len, len
+ jnz LbeginAAD
+ ret
+
+LbeginAAD:
+ vzeroupper
+
+ sub rsp, 2*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu T, XMMWORD PTR[Tp]
+ ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov hlp0, len
+ and hlp0, 128-1
+ jz Lmod_loop
+
+ and len, -128
+ sub hlp0, 16
+
+ ; Prefix block
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+
+ vpclmulqdq TMP0, DATA, [Htbl + hlp0], 0h
+ vpclmulqdq TMP1, DATA, [Htbl + hlp0], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + hlp0], 0h
+
+ lea inp, [inp+16]
+ test hlp0, hlp0
+ jnz Lpre_loop
+ jmp Lred1
+
+ ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+ sub hlp0, 16
+
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP3, DATA, [Htbl + hlp0], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + hlp0], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + hlp0], 0h
+ vpxor TMP2, TMP2, TMP3
+
+ test hlp0, hlp0
+ lea inp, [inp+16]
+ jnz Lpre_loop
+
+Lred1:
+
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+
+Lmod_loop:
+
+ sub len, 16*8
+ jb Ldone
+ ; Block #0
+ vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP0, DATA, [Htbl + 0*16], 0h
+ vpclmulqdq TMP1, DATA, [Htbl + 0*16], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + 0*16], 0h
+
+ ; Block #1
+ vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 1
+
+ ; Block #2
+ vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 2
+
+ vpxor T, T, TMP4 ;reduction stage 1b
+
+ ; Block #3
+ vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 3
+ ; Block #4
+ vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 4
+
+ vpxor T, T, TMP4 ;reduction stage 2b
+ ; Block #5
+ vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 5
+
+ vpxor T, T, Xhi ;reduction finalize
+ ; Block #6
+ vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 6
+ ; Block #7
+ vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+ KARATSUBA_AAD 7
+ ; Aggregated 8 blocks, now karatsuba fixup
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+ lea inp, [inp + 16*8]
+ jmp Lmod_loop
+
+Ldone:
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpxor T, T, Xhi
+ vmovdqu XMMWORD PTR[Tp], T
+ vzeroupper
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ add rsp, 16*2
+
+ ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT textequ <rcx>
+CT textequ <rdx>
+Htbl textequ <r8>
+Gctx textequ <r8>
+len textequ <r9>
+KS textequ <r10>
+NR textequ <eax>
+
+aluCTR textequ <r11d>
+aluKSl textequ <r12d>
+aluTMP textequ <r13d>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+TMP1 textequ <xmm2>
+TMP2 textequ <xmm3>
+TMP3 textequ <xmm4>
+TMP4 textequ <xmm5>
+TMP5 textequ <xmm6>
+CTR0 textequ <xmm7>
+CTR1 textequ <xmm8>
+CTR2 textequ <xmm9>
+CTR3 textequ <xmm10>
+CTR4 textequ <xmm11>
+CTR5 textequ <xmm12>
+CTR6 textequ <xmm13>
+CTR7 textequ <xmm14>
+BSWAPMASK textequ <xmm15>
+
+ROUND MACRO i
+ vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+ vaesenc CTR0, CTR0, TMP3
+ vaesenc CTR1, CTR1, TMP3
+ vaesenc CTR2, CTR2, TMP3
+ vaesenc CTR3, CTR3, TMP3
+ vaesenc CTR4, CTR4, TMP3
+ vaesenc CTR5, CTR5, TMP3
+ vaesenc CTR6, CTR6, TMP3
+ vaesenc CTR7, CTR7, TMP3
+ENDM
+ROUNDMUL MACRO i
+ vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+
+ vaesenc CTR0, CTR0, TMP3
+ vaesenc CTR1, CTR1, TMP3
+ vaesenc CTR2, CTR2, TMP3
+ vaesenc CTR3, CTR3, TMP3
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+
+ vaesenc CTR4, CTR4, TMP3
+ vaesenc CTR5, CTR5, TMP3
+ vaesenc CTR6, CTR6, TMP3
+ vaesenc CTR7, CTR7, TMP3
+
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+KARATSUBA MACRO i
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [3*4 + 8*16 + i*16 + rsp], aluTMP
+ENDM
+
+
+ test len, len
+ jnz LbeginENC
+ ret
+
+LbeginENC:
+
+ vzeroupper
+ push r11
+ push r12
+ push r13
+ push rbp
+ sub rsp, 10*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+ vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+ vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+ vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+ vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+ vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+ vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+ vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+ vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+ mov rbp, rsp
+ sub rsp, 16*16
+ and rsp, -16
+
+ vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+ mov KS, [16*16 + 3*16 + Gctx]
+ mov NR, [244 + KS]
+ lea KS, [KS]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ mov aluKSl, [3*4 + KS]
+ bswap aluCTR
+ bswap aluKSl
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[8*16 + 0*16 + rsp], TMP0
+
+ cmp len, 128
+ jb LEncDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[8*16 + 1*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 2*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 3*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 4*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 5*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 6*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 7*16 + rsp], TMP0
+
+; Encrypt the initial 8 blocks
+ sub len, 128
+ vpaddd CTR1, CTR0, XMMWORD PTR[Lone]
+ vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo]
+ vpaddd CTR3, CTR2, XMMWORD PTR[Lone]
+ vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo]
+ vpaddd CTR5, CTR4, XMMWORD PTR[Lone]
+ vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo]
+ vpaddd CTR7, CTR6, XMMWORD PTR[Lone]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vpshufb CTR7, CTR7, BSWAPMASK
+
+ vmovdqu TMP3, XMMWORD PTR[0*16 + KS]
+ vpxor CTR0, CTR0, TMP3
+ vpxor CTR1, CTR1, TMP3
+ vpxor CTR2, CTR2, TMP3
+ vpxor CTR3, CTR3, TMP3
+ vpxor CTR4, CTR4, TMP3
+ vpxor CTR5, CTR5, TMP3
+ vpxor CTR6, CTR6, TMP3
+ vpxor CTR7, CTR7, TMP3
+
+ ROUND 1
+
+ add aluCTR, 8
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [8*16 + 0*16 + 3*4 + rsp], aluTMP
+
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+ ROUND 8
+ NEXTCTR 7
+ ROUND 9
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+ vpshufb TMP5, CTR7, BSWAPMASK
+
+ vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+ vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+ vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+ vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+ vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+ vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+ vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LEncDataOctets
+
+LEncDataOctets:
+ cmp len, 128
+ jb LEndEncOctets
+ sub len, 128
+
+ vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + rsp]
+ vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + rsp]
+ vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + rsp]
+ vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + rsp]
+ vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + rsp]
+ vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + rsp]
+ vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + rsp]
+ vmovdqa CTR7, XMMWORD PTR[8*16 + 7*16 + rsp]
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+ ROUNDMUL 1
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+ ROUNDMUL 2
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+ ROUNDMUL 3
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+ ROUNDMUL 4
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+ ROUNDMUL 5
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+ ROUNDMUL 6
+ NEXTCTR 5
+ vpxor TMP5, T, XMMWORD PTR[7*16 + rsp]
+ ROUNDMUL 7
+ NEXTCTR 6
+
+ ROUND 8
+ NEXTCTR 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ ROUND 9
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+ vpshufb TMP5, CTR7, BSWAPMASK
+
+ vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+ vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+ vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+ vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+ vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+ vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+ vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+ vpxor T, T, TMP4
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LEncDataOctets
+
+LEndEncOctets:
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+ KARATSUBA 6
+ vpxor TMP5, T, XMMWORD PTR[7*16 + rsp]
+ KARATSUBA 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vpxor T, T, TMP4
+
+ sub aluCTR, 7
+
+LEncDataSingles:
+
+ cmp len, 16
+ jb LEncDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[PT]
+ vmovdqu XMMWORD PTR[CT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP1, TMP1, BSWAPMASK
+ vpxor T, T, TMP1
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+ jmp LEncDataSingles
+
+LEncDataTail:
+
+ test len, len
+ jz LEncDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa XMMWORD PTR[rsp], TMP2
+; copy as many bytes as needed
+ xor KS, KS
+
+@@:
+ cmp len, KS
+ je @f
+ mov al, [PT + KS]
+ mov [rsp + KS], al
+ inc KS
+ jmp @b
+@@:
+ vpxor TMP1, TMP1, XMMWORD PTR[rsp]
+ vmovdqa XMMWORD PTR[rsp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [rsp + KS]
+ mov [CT + KS], al
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[rsp + KS], 0
+ inc KS
+ jmp @b
+@@:
+BAIL:
+ vmovdqa TMP1, XMMWORD PTR[rsp]
+ vpshufb TMP1, TMP1, BSWAPMASK
+ vpxor T, T, TMP1
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+LEncDataEnd:
+
+ vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov rsp, rbp
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+ vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+ vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+ vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+ vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+ vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+ vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+ vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+ add rsp, 10*16
+ pop rbp
+ pop r13
+ pop r12
+ pop r11
+
+ vzeroupper
+
+ ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmDEC PROC
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [3*4 + i*16 + rsp], aluTMP
+ENDM
+
+PT textequ <rdx>
+CT textequ <rcx>
+
+ test len, len
+ jnz LbeginDEC
+ ret
+
+LbeginDEC:
+
+ vzeroupper
+ push r11
+ push r12
+ push r13
+ push rbp
+ sub rsp, 10*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+ vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+ vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+ vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+ vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+ vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+ vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+ vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+ vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+ mov rbp, rsp
+ sub rsp, 8*16
+ and rsp, -16
+
+ vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+ mov KS, [16*16 + 3*16 + Gctx]
+ mov NR, [244 + KS]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ mov aluKSl, [3*4 + KS]
+ bswap aluCTR
+ bswap aluKSl
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[0*16 + rsp], TMP0
+
+ cmp len, 128
+ jb LDecDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[1*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[2*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[3*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[4*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[5*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[6*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[7*16 + rsp], TMP0
+
+ NEXTCTR 1
+ NEXTCTR 2
+ NEXTCTR 3
+ NEXTCTR 4
+ NEXTCTR 5
+ NEXTCTR 6
+ NEXTCTR 7
+
+LDecDataOctets:
+ cmp len, 128
+ jb LEndDecOctets
+ sub len, 128
+
+ vmovdqa CTR0, XMMWORD PTR[0*16 + rsp]
+ vmovdqa CTR1, XMMWORD PTR[1*16 + rsp]
+ vmovdqa CTR2, XMMWORD PTR[2*16 + rsp]
+ vmovdqa CTR3, XMMWORD PTR[3*16 + rsp]
+ vmovdqa CTR4, XMMWORD PTR[4*16 + rsp]
+ vmovdqa CTR5, XMMWORD PTR[5*16 + rsp]
+ vmovdqa CTR6, XMMWORD PTR[6*16 + rsp]
+ vmovdqa CTR7, XMMWORD PTR[7*16 + rsp]
+
+ vmovdqu TMP5, XMMWORD PTR[7*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 1
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 2
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 3
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 4
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 5
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 6
+ NEXTCTR 5
+ vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ vpxor TMP5, TMP5, T
+ ROUNDMUL 7
+ NEXTCTR 6
+
+ ROUND 8
+ NEXTCTR 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ ROUND 9
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + CT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + CT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + CT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + CT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + CT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + CT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + CT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + CT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+ vmovdqu XMMWORD PTR[7*16 + PT], CTR7
+
+ vpxor T, T, TMP4
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LDecDataOctets
+
+LEndDecOctets:
+
+ sub aluCTR, 7
+
+LDecDataSingles:
+
+ cmp len, 16
+ jb LDecDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+
+ vmovdqu TMP2, XMMWORD PTR[CT]
+ vpxor TMP1, TMP1, TMP2
+ vmovdqu XMMWORD PTR[PT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP2, TMP2, BSWAPMASK
+ vpxor T, T, TMP2
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+ jmp LDecDataSingles
+
+LDecDataTail:
+
+ test len, len
+ jz LDecDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+ inc aluCTR
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; copy as many bytes as needed
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [CT + KS]
+ mov [rsp + KS], al
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[rsp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ vmovdqa TMP2, XMMWORD PTR[rsp]
+ vpshufb TMP2, TMP2, BSWAPMASK
+ vpxor T, T, TMP2
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP5, TMP2, TMP3, TMP4
+
+
+ vpxor TMP1, TMP1, XMMWORD PTR[rsp]
+ vmovdqa XMMWORD PTR[rsp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [rsp + KS]
+ mov [PT + KS], al
+ inc KS
+ jmp @b
+@@:
+
+LDecDataEnd:
+
+ vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov rsp, rbp
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+ vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+ vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+ vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+ vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+ vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+ vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+ vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+ add rsp, 10*16
+ pop rbp
+ pop r13
+ pop r12
+ pop r11
+
+ vzeroupper
+
+ ret
+ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm-x86-masm.asm b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
new file mode 100644
index 0000000000..32f4257884
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
@@ -0,0 +1,1207 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lone dq 1,0
+Ltwo dq 2,0
+Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+ vpclmulqdq TMP1, SRC2, SRC1, 0h
+ vpclmulqdq TMP4, SRC2, SRC1, 011h
+
+ vpshufd TMP2, SRC2, 78
+ vpshufd TMP3, SRC1, 78
+ vpxor TMP2, TMP2, SRC2
+ vpxor TMP3, TMP3, SRC1
+
+ vpclmulqdq TMP2, TMP2, TMP3, 0h
+ vpxor TMP2, TMP2, TMP1
+ vpxor TMP2, TMP2, TMP4
+
+ vpslldq TMP3, TMP2, 8
+ vpsrldq TMP2, TMP2, 8
+
+ vpxor TMP1, TMP1, TMP3
+ vpxor TMP4, TMP4, TMP2
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpxor DST, TMP1, TMP4
+
+ ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+; unsigned char *Tp,
+; unsigned int Mlen,
+; unsigned int Alen,
+; unsigned char* X0,
+; unsigned char* TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl textequ <eax>
+Tp textequ <ecx>
+X0 textequ <edx>
+TAG textequ <ebx>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ push ebx
+
+ mov Htbl, [esp + 2*4 + 0*4]
+ mov Tp, [esp + 2*4 + 1*4]
+ mov X0, [esp + 2*4 + 4*4]
+ mov TAG, [esp + 2*4 + 5*4]
+
+ vzeroupper
+ vmovdqu T, XMMWORD PTR[Tp]
+
+ vpxor TMP0, TMP0, TMP0
+ vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0
+ vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2
+ vpsllq TMP0, TMP0, 3
+
+ vpxor T, T, TMP0
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+ vpshufb T, T, [Lbswap_mask]
+ vpxor T, T, [X0]
+ vmovdqu XMMWORD PTR[TAG], T
+ vzeroupper
+
+ pop ebx
+
+ ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl textequ <eax>
+KS textequ <ecx>
+NR textequ <edx>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ mov Htbl, [esp + 4*1 + 0*4]
+ mov KS, [esp + 4*1 + 1*4]
+ mov NR, [esp + 4*1 + 2*4]
+
+ vzeroupper
+ ; AES-ENC(0)
+ vmovdqu T, XMMWORD PTR[KS]
+ lea KS, [16 + KS]
+ dec NR
+Lenc_loop:
+ vaesenc T, T, [KS]
+ lea KS, [16 + KS]
+ dec NR
+ jnz Lenc_loop
+
+ vaesenclast T, T, [KS]
+ vpshufb T, T, [Lbswap_mask]
+
+ ;Calculate H` = GFMUL(H, 2)
+ vpsrad xmm3, T, 31
+ vpshufd xmm3, xmm3, 0ffh
+ vpand xmm5, xmm3, [Lpoly]
+ vpsrld xmm3, T, 31
+ vpslld xmm4, T, 1
+ vpslldq xmm3, xmm3, 4
+ vpxor T, xmm4, xmm3
+ vpxor T, T, xmm5
+
+ vmovdqu TMP0, T
+ vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+ i = 1
+ WHILE i LT 8
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+ vmovdqu XMMWORD PTR[Htbl + i*16], T
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+ i = i+1
+ ENDM
+ vzeroupper
+ ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl textequ <eax>
+inp textequ <ecx>
+len textequ <edx>
+Tp textequ <ebx>
+hlp0 textequ <esi>
+
+DATA textequ <xmm0>
+T textequ <xmm1>
+TMP0 textequ <xmm2>
+TMP1 textequ <xmm3>
+TMP2 textequ <xmm4>
+TMP3 textequ <xmm5>
+TMP4 textequ <xmm6>
+Xhi textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+ cmp DWORD PTR[esp + 1*3 + 2*4], 0
+ jnz LbeginAAD
+ ret
+
+LbeginAAD:
+ push ebx
+ push esi
+
+ mov Htbl, [esp + 4*3 + 0*4]
+ mov inp, [esp + 4*3 + 1*4]
+ mov len, [esp + 4*3 + 2*4]
+ mov Tp, [esp + 4*3 + 3*4]
+
+ vzeroupper
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu T, XMMWORD PTR[Tp]
+ ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov hlp0, len
+ and hlp0, 128-1
+ jz Lmod_loop
+
+ and len, -128
+ sub hlp0, 16
+
+ ; Prefix block
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+
+ vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+ vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+
+ lea inp, [inp+16]
+ test hlp0, hlp0
+ jnz Lpre_loop
+ jmp Lred1
+
+ ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+ sub hlp0, 16
+
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+ vpxor TMP2, TMP2, TMP3
+
+ test hlp0, hlp0
+ lea inp, [inp+16]
+ jnz Lpre_loop
+
+Lred1:
+
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+Lmod_loop:
+
+ sub len, 16*8
+ jb Ldone
+ ; Block #0
+ vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+ vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask]
+
+ vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h
+ vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h
+
+ ; Block #1
+ vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 1
+
+ ; Block #2
+ vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 2
+
+ vpxor T, T, TMP4 ;reduction stage 1b
+
+ ; Block #3
+ vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 3
+ ; Block #4
+ vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 4
+
+ vpxor T, T, TMP4 ;reduction stage 2b
+ ; Block #5
+ vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 5
+
+ vpxor T, T, Xhi ;reduction finalize
+ ; Block #6
+ vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 6
+ ; Block #7
+ vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+ KARATSUBA_AAD 7
+ ; Aggregated 8 blocks, now karatsuba fixup
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+ lea inp, [inp + 16*8]
+ jmp Lmod_loop
+
+Ldone:
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpxor T, T, Xhi
+ vmovdqu XMMWORD PTR[Tp], T
+ vzeroupper
+
+ pop esi
+ pop ebx
+ ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT textequ <eax>
+CT textequ <ecx>
+Htbl textequ <edx>
+Gctx textequ <edx>
+len textequ <DWORD PTR[ebp + 5*4 + 3*4]>
+KS textequ <esi>
+NR textequ <DWORD PTR[244+KS]>
+
+aluCTR textequ <ebx>
+aluTMP textequ <edi>
+
+T textequ <XMMWORD PTR[16*16 + 1*16 + Gctx]>
+TMP0 textequ <xmm1>
+TMP1 textequ <xmm2>
+TMP2 textequ <xmm3>
+TMP3 textequ <xmm4>
+TMP4 textequ <xmm5>
+TMP5 textequ <xmm6>
+
+CTR0 textequ <xmm0>
+CTR1 textequ <xmm1>
+CTR2 textequ <xmm2>
+CTR3 textequ <xmm3>
+CTR4 textequ <xmm4>
+CTR5 textequ <xmm5>
+CTR6 textequ <xmm6>
+
+ROUND MACRO i
+ vmovdqu xmm7, XMMWORD PTR[i*16 + KS]
+ vaesenc CTR0, CTR0, xmm7
+ vaesenc CTR1, CTR1, xmm7
+ vaesenc CTR2, CTR2, xmm7
+ vaesenc CTR3, CTR3, xmm7
+ vaesenc CTR4, CTR4, xmm7
+ vaesenc CTR5, CTR5, xmm7
+ vaesenc CTR6, CTR6, xmm7
+ENDM
+
+KARATSUBA MACRO i
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [3*4 + KS]
+ mov [3*4 + 8*16 + i*16 + esp], aluTMP
+ENDM
+
+ cmp DWORD PTR[1*4 + 3*4 + esp], 0
+ jne LbeginENC
+ ret
+
+LbeginENC:
+
+ vzeroupper
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp, esp
+ sub esp, 16*16
+ and esp, -16
+
+ mov PT, [ebp + 5*4 + 0*4]
+ mov CT, [ebp + 5*4 + 1*4]
+ mov Gctx, [ebp + 5*4 + 2*4]
+
+ mov KS, [16*16 + 3*16 + Gctx]
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ bswap aluCTR
+
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0
+
+ cmp len, 16*7
+ jb LEncDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0
+
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+; Encrypt the initial 7 blocks
+ sub len, 16*7
+ vpaddd CTR1, CTR0, XMMWORD PTR[Lone]
+ vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo]
+ vpaddd CTR3, CTR2, XMMWORD PTR[Lone]
+ vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo]
+ vpaddd CTR5, CTR4, XMMWORD PTR[Lone]
+ vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo]
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqu xmm7, XMMWORD PTR[0*16 + KS]
+ vpxor CTR0, CTR0, xmm7
+ vpxor CTR1, CTR1, xmm7
+ vpxor CTR2, CTR2, xmm7
+ vpxor CTR3, CTR3, xmm7
+ vpxor CTR4, CTR4, xmm7
+ vpxor CTR5, CTR5, xmm7
+ vpxor CTR6, CTR6, xmm7
+
+ ROUND 1
+
+ add aluCTR, 7
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [KS + 3*4]
+ mov [8*16 + 0*16 + 3*4 + esp], aluTMP
+
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+ ROUND 8
+ ROUND 9
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+ vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+ vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+ vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+ vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+ vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LEncData7
+
+LEncData7:
+ cmp len, 16*7
+ jb LEndEnc7
+ sub len, 16*7
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+ vpxor TMP5, TMP5, T
+ KARATSUBA 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp]
+ vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp]
+ vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp]
+ vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp]
+ vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp]
+ vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp]
+ vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp]
+
+ ROUND 1
+ NEXTCTR 0
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+
+ ROUND 8
+ ROUND 9
+
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+ vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+ vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+ vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+ vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+ vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LEncData7
+
+LEndEnc7:
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+ vpxor TMP5, TMP5, T
+ KARATSUBA 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ sub aluCTR, 6
+
+LEncDataSingles:
+
+ cmp len, 16
+ jb LEncDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[PT]
+ vmovdqu XMMWORD PTR[CT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ jmp LEncDataSingles
+
+LEncDataTail:
+
+ cmp len, 0
+ je LEncDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa XMMWORD PTR[esp], TMP2
+; copy as many bytes as needed
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[PT + KS]
+ mov BYTE PTR[esp + KS], dl
+ inc KS
+ jmp @b
+@@:
+ vpxor TMP1, TMP1, XMMWORD PTR[esp]
+ vmovdqa XMMWORD PTR[esp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[esp + KS]
+ mov BYTE PTR[CT + KS], dl
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[esp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+ vmovdqa TMP1, XMMWORD PTR[esp]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+LEncDataEnd:
+ inc aluCTR
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov esp, ebp
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+
+ vzeroupper
+
+ ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [3*4 + KS]
+ mov [3*4 + i*16 + esp], aluTMP
+ENDM
+
+intel_aes_gcmDEC PROC
+
+ cmp DWORD PTR[1*4 + 3*4 + esp], 0
+ jne LbeginDEC
+ ret
+
+LbeginDEC:
+
+ vzeroupper
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp, esp
+ sub esp, 8*16
+ and esp, -16
+
+ mov CT, [ebp + 5*4 + 0*4]
+ mov PT, [ebp + 5*4 + 1*4]
+ mov Gctx, [ebp + 5*4 + 2*4]
+
+ mov KS, [16*16 + 3*16 + Gctx]
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ bswap aluCTR
+
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[0*16 + esp], TMP0
+
+ cmp len, 16*7
+ jb LDecDataSingles
+ vmovdqu XMMWORD PTR[1*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[2*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[3*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[4*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[5*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[6*16 + esp], TMP0
+ dec aluCTR
+
+LDecData7:
+ cmp len, 16*7
+ jb LDecData7End
+ sub len, 16*7
+
+ vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP5, TMP5, T
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[6*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 5
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 4
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 3
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 2
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 1
+ NEXTCTR 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 0
+ NEXTCTR 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ vmovdqa CTR0, XMMWORD PTR[0*16 + esp]
+ vmovdqa CTR1, XMMWORD PTR[1*16 + esp]
+ vmovdqa CTR2, XMMWORD PTR[2*16 + esp]
+ vmovdqa CTR3, XMMWORD PTR[3*16 + esp]
+ vmovdqa CTR4, XMMWORD PTR[4*16 + esp]
+ vmovdqa CTR5, XMMWORD PTR[5*16 + esp]
+ vmovdqa CTR6, XMMWORD PTR[6*16 + esp]
+
+ ROUND 1
+ ROUND 2
+ ROUND 3
+ ROUND 4
+ ROUND 5
+ ROUND 6
+ ROUND 7
+ ROUND 8
+ ROUND 9
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + CT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + CT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + CT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + CT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + CT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + CT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + CT]
+
+ vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LDecData7
+
+LDecData7End:
+
+ NEXTCTR 0
+
+LDecDataSingles:
+
+ cmp len, 16
+ jb LDecDataTail
+ sub len, 16
+
+ vmovdqu TMP1, XMMWORD PTR[CT]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[CT]
+ vmovdqu XMMWORD PTR[PT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+ jmp LDecDataSingles
+
+LDecDataTail:
+
+ cmp len, 0
+ je LDecDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+ inc aluCTR
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast xmm7, TMP1, TMP2
+
+; copy as many bytes as needed
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[CT + KS]
+ mov BYTE PTR[esp + KS], dl
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[esp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+ vmovdqa TMP1, XMMWORD PTR[esp]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ vpxor xmm7, xmm7, XMMWORD PTR[esp]
+ vmovdqa XMMWORD PTR[esp], xmm7
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[esp + KS]
+ mov BYTE PTR[PT + KS], dl
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+
+LDecDataEnd:
+
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov esp, ebp
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ vzeroupper
+
+ ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm.h b/security/nss/lib/freebl/intel-gcm.h
new file mode 100644
index 0000000000..e0221159d7
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.h
@@ -0,0 +1,97 @@
+/******************************************************************************/
+/* LICENSE: */
+/* This submission to NSS is to be made available under the terms of the */
+/* Mozilla Public License, v. 2.0. You can obtain one at http: */
+/* //mozilla.org/MPL/2.0/. */
+/******************************************************************************/
+/* Copyright(c) 2013, Intel Corp. */
+/******************************************************************************/
+/* Reference: */
+/* [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication */
+/* Instruction and its Usage for Computing the GCM Mode (Rev. 2.01) */
+/* http://software.intel.com/sites/default/files/article/165685/clmul-wp-r*/
+/*ev-2.01-2012-09-21.pdf */
+/* [2] S. Gueron, M. E. Kounavis: Efficient Implementation of the Galois */
+/* Counter Mode Using a Carry-less Multiplier and a Fast Reduction */
+/* Algorithm. Information Processing Letters 110: 549-553 (2010). */
+/* [3] S. Gueron: AES Performance on the 2nd Generation Intel(R) Core(TM) */
+/* Processor Family (to be posted) (2012). */
+/* [4] S. Gueron: Fast GHASH computations for speeding up AES-GCM (to be */
+/* published) (2012). */
+
+#ifndef INTEL_GCM_H
+#define INTEL_GCM_H 1
+
+#include "blapii.h"
+
+typedef struct intel_AES_GCMContextStr intel_AES_GCMContext;
+
+intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params);
+
+void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit);
+
+SECStatus intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+SECStatus intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus intel_AES_GCM_EncryptAEAD(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+SECStatus intel_AES_GCM_DecryptAEAD(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+
+/* Prototypes of functions in the assembler file for fast AES-GCM, using
+ Intel AES-NI and CLMUL-NI, as described in [1]
+ [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication
+ Instruction and its Usage for Computing the GCM Mode */
+
+/* Prepares the constants used in the aggregated reduction method */
+void intel_aes_gcmINIT(unsigned char Htbl[16 * 16],
+ unsigned char *KS,
+ int NR);
+
+/* Produces the final GHASH value */
+void intel_aes_gcmTAG(unsigned char Htbl[16 * 16],
+ unsigned char *Tp,
+ unsigned long Mlen,
+ unsigned long Alen,
+ unsigned char *X0,
+ unsigned char *TAG);
+
+/* Hashes the Additional Authenticated Data, should be used before enc/dec.
+ Operates on whole blocks only. Partial blocks should be padded externally. */
+void intel_aes_gcmAAD(unsigned char Htbl[16 * 16],
+ unsigned char *AAD,
+ unsigned long Alen,
+ unsigned char *Tp);
+
+/* Encrypts and hashes the Plaintext.
+ Operates on any length of data, however partial block should only be encrypted
+ at the last call, otherwise the result will be incorrect. */
+void intel_aes_gcmENC(const unsigned char *PT,
+ unsigned char *CT,
+ void *Gctx,
+ unsigned long len);
+
+/* Similar to ENC, but decrypts the Ciphertext. */
+void intel_aes_gcmDEC(const unsigned char *CT,
+ unsigned char *PT,
+ void *Gctx,
+ unsigned long len);
+
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm.s b/security/nss/lib/freebl/intel-gcm.s
new file mode 100644
index 0000000000..5b5cf5d4bb
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.s
@@ -0,0 +1,1340 @@
+# LICENSE:
+# This submission to NSS is to be made available under the terms of the
+# Mozilla Public License, v. 2.0. You can obtain one at http:
+# //mozilla.org/MPL/2.0/.
+################################################################################
+# Copyright(c) 2012, Intel Corp.
+
+.align 16
+.Lone:
+.quad 1,0
+.Ltwo:
+.quad 2,0
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lshuff_mask:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.Lpoly:
+.quad 0x1, 0xc200000000000000
+
+
+################################################################################
+# Generates the final GCM tag
+# void intel_aes_gcmTAG(uint8_t Htbl[16*16], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG);
+.type intel_aes_gcmTAG,@function
+.globl intel_aes_gcmTAG
+.align 16
+intel_aes_gcmTAG:
+
+.set Htbl, %rdi
+.set Tp, %rsi
+.set Mlen, %rdx
+.set Alen, %rcx
+.set X0, %r8
+.set TAG, %r9
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+ vmovdqu (Tp), T
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpxor TMP0, TMP0, TMP0
+ shl $3, Mlen
+ shl $3, Alen
+ vpinsrq $0, Mlen, TMP0, TMP0
+ vpinsrq $1, Alen, TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpxor (X0), T, T
+ vmovdqu T, (TAG)
+
+ret
+.size intel_aes_gcmTAG, .-intel_aes_gcmTAG
+################################################################################
+# Generates the H table
+# void intel_aes_gcmINIT(uint8_t Htbl[16*16], uint8_t *KS, int NR);
+.type intel_aes_gcmINIT,@function
+.globl intel_aes_gcmINIT
+.align 16
+intel_aes_gcmINIT:
+
+.set Htbl, %rdi
+.set KS, %rsi
+.set NR, %edx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+CALCULATE_POWERS_OF_H:
+ vmovdqu 16*0(KS), T
+ vaesenc 16*1(KS), T, T
+ vaesenc 16*2(KS), T, T
+ vaesenc 16*3(KS), T, T
+ vaesenc 16*4(KS), T, T
+ vaesenc 16*5(KS), T, T
+ vaesenc 16*6(KS), T, T
+ vaesenc 16*7(KS), T, T
+ vaesenc 16*8(KS), T, T
+ vaesenc 16*9(KS), T, T
+ vmovdqu 16*10(KS), TMP0
+ cmp $10, NR
+ je .LH0done
+ vaesenc 16*10(KS), T, T
+ vaesenc 16*11(KS), T, T
+ vmovdqu 16*12(KS), TMP0
+ cmp $12, NR
+ je .LH0done
+ vaesenc 16*12(KS), T, T
+ vaesenc 16*13(KS), T, T
+ vmovdqu 16*14(KS), TMP0
+
+.LH0done:
+ vaesenclast TMP0, T, T
+
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ vmovdqu T, TMP0
+ # Calculate H` = GFMUL(H, 2)
+ vpsrld $7 , T , %xmm3
+ vmovdqu .Lshuff_mask(%rip), %xmm4
+ vpshufb %xmm4, %xmm3 , %xmm3
+ movq $0xff00 , %rax
+ vmovq %rax, %xmm4
+ vpshufb %xmm3, %xmm4 , %xmm4
+ vmovdqu .Lpoly(%rip), %xmm5
+ vpand %xmm4, %xmm5, %xmm5
+ vpsrld $31, T, %xmm3
+ vpslld $1, T, %xmm4
+ vpslldq $4, %xmm3, %xmm3
+ vpxor %xmm3, %xmm4, T #xmm1 holds now p(x)<<1
+
+ #adding p(x)<<1 to xmm5
+ vpxor %xmm5, T , T
+ vmovdqu T, TMP0
+ vmovdqu T, (Htbl) # H * 2
+ call GFMUL
+ vmovdqu T, 16(Htbl) # H^2 * 2
+ call GFMUL
+ vmovdqu T, 32(Htbl) # H^3 * 2
+ call GFMUL
+ vmovdqu T, 48(Htbl) # H^4 * 2
+ call GFMUL
+ vmovdqu T, 64(Htbl) # H^5 * 2
+ call GFMUL
+ vmovdqu T, 80(Htbl) # H^6 * 2
+ call GFMUL
+ vmovdqu T, 96(Htbl) # H^7 * 2
+ call GFMUL
+ vmovdqu T, 112(Htbl) # H^8 * 2
+
+ # Precalculations for the reduce 4 step
+ vpshufd $78, (Htbl), %xmm8
+ vpshufd $78, 16(Htbl), %xmm9
+ vpshufd $78, 32(Htbl), %xmm10
+ vpshufd $78, 48(Htbl), %xmm11
+ vpshufd $78, 64(Htbl), %xmm12
+ vpshufd $78, 80(Htbl), %xmm13
+ vpshufd $78, 96(Htbl), %xmm14
+ vpshufd $78, 112(Htbl), %xmm15
+
+ vpxor (Htbl), %xmm8, %xmm8
+ vpxor 16(Htbl), %xmm9, %xmm9
+ vpxor 32(Htbl), %xmm10, %xmm10
+ vpxor 48(Htbl), %xmm11, %xmm11
+ vpxor 64(Htbl), %xmm12, %xmm12
+ vpxor 80(Htbl), %xmm13, %xmm13
+ vpxor 96(Htbl), %xmm14, %xmm14
+ vpxor 112(Htbl), %xmm15, %xmm15
+
+ vmovdqu %xmm8, 128(Htbl)
+ vmovdqu %xmm9, 144(Htbl)
+ vmovdqu %xmm10, 160(Htbl)
+ vmovdqu %xmm11, 176(Htbl)
+ vmovdqu %xmm12, 192(Htbl)
+ vmovdqu %xmm13, 208(Htbl)
+ vmovdqu %xmm14, 224(Htbl)
+ vmovdqu %xmm15, 240(Htbl)
+
+ ret
+.size intel_aes_gcmINIT, .-intel_aes_gcmINIT
+################################################################################
+# Authenticate only
+# void intel_aes_gcmAAD(uint8_t Htbl[16*16], uint8_t *AAD, uint64_t Alen, uint8_t *Tp);
+
+.globl intel_aes_gcmAAD
+.type intel_aes_gcmAAD,@function
+.align 16
+intel_aes_gcmAAD:
+
+.set DATA, %xmm0
+.set T, %xmm1
+.set BSWAP_MASK, %xmm2
+.set TMP0, %xmm3
+.set TMP1, %xmm4
+.set TMP2, %xmm5
+.set TMP3, %xmm6
+.set TMP4, %xmm7
+.set Xhi, %xmm9
+
+.set Htbl, %rdi
+.set inp, %rsi
+.set len, %rdx
+.set Tp, %rcx
+
+.set hlp0, %r11
+
+.macro KARATSUBA_AAD i
+ vpclmulqdq $0x00, 16*\i(Htbl), DATA, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vpclmulqdq $0x11, 16*\i(Htbl), DATA, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpshufd $78, DATA, TMP3
+ vpxor DATA, TMP3, TMP3
+ vpclmulqdq $0x00, 16*(\i+8)(Htbl), TMP3, TMP3
+ vpxor TMP3, TMP2, TMP2
+.endm
+
+ test len, len
+ jnz .LbeginAAD
+ ret
+
+.LbeginAAD:
+
+ push hlp0
+ vzeroupper
+
+ vmovdqa .Lbswap_mask(%rip), BSWAP_MASK
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu (Tp),T
+ vpshufb BSWAP_MASK,T,T
+
+ # we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov len, hlp0
+ and $~-128, hlp0
+
+ jz .Lmod_loop
+
+ sub hlp0, len
+ sub $16, hlp0
+
+ #hash first prefix block
+ vmovdqu (inp), DATA
+ vpshufb BSWAP_MASK, DATA, DATA
+ vpxor T, DATA, DATA
+
+ vpclmulqdq $0x00, (Htbl, hlp0), DATA, TMP0
+ vpclmulqdq $0x11, (Htbl, hlp0), DATA, TMP1
+ vpshufd $78, DATA, TMP2
+ vpxor DATA, TMP2, TMP2
+ vpclmulqdq $0x00, 16*8(Htbl, hlp0), TMP2, TMP2
+
+ lea 16(inp), inp
+ test hlp0, hlp0
+ jnz .Lpre_loop
+ jmp .Lred1
+
+ #hash remaining prefix bocks (up to 7 total prefix blocks)
+.align 64
+.Lpre_loop:
+
+ sub $16, hlp0
+
+ vmovdqu (inp),DATA # next data block
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x00, (Htbl,hlp0), DATA, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vpclmulqdq $0x11, (Htbl,hlp0), DATA, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpshufd $78, DATA, TMP3
+ vpxor DATA, TMP3, TMP3
+ vpclmulqdq $0x00, 16*8(Htbl,hlp0), TMP3, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+ test hlp0, hlp0
+
+ lea 16(inp), inp
+
+ jnz .Lpre_loop
+
+.Lred1:
+ vpxor TMP0, TMP2, TMP2
+ vpxor TMP1, TMP2, TMP2
+ vpsrldq $8, TMP2, TMP3
+ vpslldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, Xhi
+ vpxor TMP2, TMP0, T
+
+.align 64
+.Lmod_loop:
+ sub $0x80, len
+ jb .Ldone
+
+ vmovdqu 16*7(inp),DATA # Ii
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x00, (Htbl), DATA, TMP0
+ vpclmulqdq $0x11, (Htbl), DATA, TMP1
+ vpshufd $78, DATA, TMP2
+ vpxor DATA, TMP2, TMP2
+ vpclmulqdq $0x00, 16*8(Htbl), TMP2, TMP2
+ #########################################################
+ vmovdqu 16*6(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+ KARATSUBA_AAD 1
+ #########################################################
+ vmovdqu 16*5(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 1a
+ vpalignr $8, T, T, T
+
+ KARATSUBA_AAD 2
+
+ vpxor TMP4, T, T #reduction stage 1b
+ #########################################################
+ vmovdqu 16*4(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 3
+ #########################################################
+ vmovdqu 16*3(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 2a
+ vpalignr $8, T, T, T
+
+ KARATSUBA_AAD 4
+
+ vpxor TMP4, T, T #reduction stage 2b
+ #########################################################
+ vmovdqu 16*2(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 5
+
+ vpxor Xhi, T, T #reduction finalize
+ #########################################################
+ vmovdqu 16*1(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 6
+ #########################################################
+ vmovdqu 16*0(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+ vpxor T,DATA,DATA
+
+ KARATSUBA_AAD 7
+ #########################################################
+ vpxor TMP0, TMP2, TMP2 # karatsuba fixup
+ vpxor TMP1, TMP2, TMP2
+ vpsrldq $8, TMP2, TMP3
+ vpslldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, Xhi
+ vpxor TMP2, TMP0, T
+
+ lea 16*8(inp), inp
+ jmp .Lmod_loop
+ #########################################################
+
+.Ldone:
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3
+ vpalignr $8, T, T, T
+ vpxor TMP3, T, T
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3
+ vpalignr $8, T, T, T
+ vpxor TMP3, T, T
+
+ vpxor Xhi, T, T
+
+.Lsave:
+ vpshufb BSWAP_MASK,T, T
+ vmovdqu T,(Tp)
+ vzeroupper
+
+ pop hlp0
+ ret
+.size intel_aes_gcmAAD,.-intel_aes_gcmAAD
+
+################################################################################
+# Encrypt and Authenticate
+# void intel_aes_gcmENC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmENC,@function
+.globl intel_aes_gcmENC
+.align 16
+intel_aes_gcmENC:
+
+.set PT,%rdi
+.set CT,%rsi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+.macro ROUND i
+ vmovdqu \i*16(KS), TMP3
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+.endm
+
+.macro ROUNDMUL i
+
+ vmovdqu \i*16(%rsp), TMP5
+ vmovdqu \i*16(KS), TMP3
+
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+
+ vpshufd $78, TMP5, TMP4
+ vpxor TMP5, TMP4, TMP4
+
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP4, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vmovdqa \i*16(Htbl), TMP4
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+.endm
+
+.macro KARATSUBA i
+ vmovdqu \i*16(%rsp), TMP5
+
+ vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+.endm
+
+ test len, len
+ jnz .Lbegin
+ ret
+
+.Lbegin:
+
+ vzeroupper
+ push %rbp
+ push %rbx
+
+ movq %rsp, %rbp
+ sub $128, %rsp
+ andq $-16, %rsp
+
+ vmovdqu 288(Gctx), CTR
+ vmovdqu 272(Gctx), T
+ mov 304(Gctx), KS
+# AESContext->Nr
+ mov 244(KS), NR
+
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ cmp $128, len
+ jb .LDataSingles
+
+# Encrypt the first eight blocks
+ sub $128, len
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vpshufb .Lbswap_mask(%rip), CTR0, CTR0
+ vpshufb .Lbswap_mask(%rip), CTR1, CTR1
+ vpshufb .Lbswap_mask(%rip), CTR2, CTR2
+ vpshufb .Lbswap_mask(%rip), CTR3, CTR3
+ vpshufb .Lbswap_mask(%rip), CTR4, CTR4
+ vpshufb .Lbswap_mask(%rip), CTR5, CTR5
+ vpshufb .Lbswap_mask(%rip), CTR6, CTR6
+ vpshufb .Lbswap_mask(%rip), CTR7, CTR7
+
+ vpxor (KS), CTR0, CTR0
+ vpxor (KS), CTR1, CTR1
+ vpxor (KS), CTR2, CTR2
+ vpxor (KS), CTR3, CTR3
+ vpxor (KS), CTR4, CTR4
+ vpxor (KS), CTR5, CTR5
+ vpxor (KS), CTR6, CTR6
+ vpxor (KS), CTR7, CTR7
+
+ ROUND 1
+ ROUND 2
+ ROUND 3
+ ROUND 4
+ ROUND 5
+ ROUND 6
+ ROUND 7
+ ROUND 8
+ ROUND 9
+
+ vmovdqu 160(KS), TMP5
+ cmp $12, NR
+ jb .LLast1
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $14, NR
+ jb .LLast1
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LLast1:
+
+ vpxor (PT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (CT)
+ vpshufb TMP3, CTR0, CTR0
+ vmovdqu CTR1, 16(CT)
+ vpshufb TMP3, CTR1, CTR1
+ vmovdqu CTR2, 32(CT)
+ vpshufb TMP3, CTR2, CTR2
+ vmovdqu CTR3, 48(CT)
+ vpshufb TMP3, CTR3, CTR3
+ vmovdqu CTR4, 64(CT)
+ vpshufb TMP3, CTR4, CTR4
+ vmovdqu CTR5, 80(CT)
+ vpshufb TMP3, CTR5, CTR5
+ vmovdqu CTR6, 96(CT)
+ vpshufb TMP3, CTR6, CTR6
+ vmovdqu CTR7, 112(CT)
+ vpshufb TMP3, CTR7, CTR7
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDataOctets
+
+# Encrypt 8 blocks each time while hashing previous 8 blocks
+.align 64
+.LDataOctets:
+ cmp $128, len
+ jb .LEndOctets
+ sub $128, len
+
+ vmovdqa CTR7, TMP5
+ vmovdqa CTR6, 1*16(%rsp)
+ vmovdqa CTR5, 2*16(%rsp)
+ vmovdqa CTR4, 3*16(%rsp)
+ vmovdqa CTR3, 4*16(%rsp)
+ vmovdqa CTR2, 5*16(%rsp)
+ vmovdqa CTR1, 6*16(%rsp)
+ vmovdqa CTR0, 7*16(%rsp)
+
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vmovdqu (KS), TMP4
+ vpshufb TMP3, CTR0, CTR0
+ vpxor TMP4, CTR0, CTR0
+ vpshufb TMP3, CTR1, CTR1
+ vpxor TMP4, CTR1, CTR1
+ vpshufb TMP3, CTR2, CTR2
+ vpxor TMP4, CTR2, CTR2
+ vpshufb TMP3, CTR3, CTR3
+ vpxor TMP4, CTR3, CTR3
+ vpshufb TMP3, CTR4, CTR4
+ vpxor TMP4, CTR4, CTR4
+ vpshufb TMP3, CTR5, CTR5
+ vpxor TMP4, CTR5, CTR5
+ vpshufb TMP3, CTR6, CTR6
+ vpxor TMP4, CTR6, CTR6
+ vpshufb TMP3, CTR7, CTR7
+ vpxor TMP4, CTR7, CTR7
+
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ ROUNDMUL 1
+
+ ROUNDMUL 2
+
+ ROUNDMUL 3
+
+ ROUNDMUL 4
+
+ ROUNDMUL 5
+
+ ROUNDMUL 6
+
+ vpxor 7*16(%rsp), T, TMP5
+ vmovdqu 7*16(KS), TMP3
+
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+
+ vpshufd $78, TMP5, TMP4
+ vpxor TMP5, TMP4, TMP4
+
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+
+ vpclmulqdq $0x11, 7*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 7*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpclmulqdq $0x00, 128+7*16(Htbl), TMP4, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ ROUND 8
+ vmovdqa .Lpoly(%rip), TMP5
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+
+ vpclmulqdq $0x10, TMP5, T, TMP1
+ vpalignr $8, T, T, T
+ vpxor T, TMP1, T
+
+ ROUND 9
+
+ vpclmulqdq $0x10, TMP5, T, TMP1
+ vpalignr $8, T, T, T
+ vpxor T, TMP1, T
+
+ vmovdqu 160(KS), TMP5
+ cmp $10, NR
+ jbe .LLast2
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $12, NR
+ jbe .LLast2
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LLast2:
+
+ vpxor (PT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (CT)
+ vpshufb TMP3, CTR0, CTR0
+ vmovdqu CTR1, 16(CT)
+ vpshufb TMP3, CTR1, CTR1
+ vmovdqu CTR2, 32(CT)
+ vpshufb TMP3, CTR2, CTR2
+ vmovdqu CTR3, 48(CT)
+ vpshufb TMP3, CTR3, CTR3
+ vmovdqu CTR4, 64(CT)
+ vpshufb TMP3, CTR4, CTR4
+ vmovdqu CTR5, 80(CT)
+ vpshufb TMP3, CTR5, CTR5
+ vmovdqu CTR6, 96(CT)
+ vpshufb TMP3, CTR6, CTR6
+ vmovdqu CTR7,112(CT)
+ vpshufb TMP3, CTR7, CTR7
+
+ vpxor TMP4, T, T
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDataOctets
+
+.LEndOctets:
+
+ vmovdqa CTR7, TMP5
+ vmovdqa CTR6, 1*16(%rsp)
+ vmovdqa CTR5, 2*16(%rsp)
+ vmovdqa CTR4, 3*16(%rsp)
+ vmovdqa CTR3, 4*16(%rsp)
+ vmovdqa CTR2, 5*16(%rsp)
+ vmovdqa CTR1, 6*16(%rsp)
+ vmovdqa CTR0, 7*16(%rsp)
+
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ KARATSUBA 1
+ KARATSUBA 2
+ KARATSUBA 3
+ KARATSUBA 4
+ KARATSUBA 5
+ KARATSUBA 6
+
+ vmovdqu 7*16(%rsp), TMP5
+ vpxor T, TMP5, TMP5
+ vmovdqu 16*7(Htbl), TMP4
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+7*16(Htbl), TMP4
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+
+ vmovdqa .Lpoly(%rip), TMP2
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vpxor TMP4, T, T
+
+#Here we encrypt any remaining whole block
+.LDataSingles:
+
+ cmp $16, len
+ jb .LDataTail
+ sub $16, len
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LLast3
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LLast3
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LLast3:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor (PT), TMP1, TMP1
+ vmovdqu TMP1, (CT)
+ addq $16, CT
+ addq $16, PT
+
+ vpshufb .Lbswap_mask(%rip), TMP1, TMP1
+ vpxor TMP1, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+ jmp .LDataSingles
+
+#Here we encypt the final partial block, if there is one
+.LDataTail:
+
+ test len, len
+ jz DATA_END
+# First prepare the counter block
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LLast4
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LLast4
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LLast4:
+ vaesenclast TMP2, TMP1, TMP1
+#Zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa TMP2, (%rsp)
+
+# Copy the required bytes only (could probably use rep movsb)
+ xor KS, KS
+.LEncCpy:
+ cmp KS, len
+ je .LEncCpyEnd
+ movb (PT, KS, 1), %r8b
+ movb %r8b, (%rsp, KS, 1)
+ inc KS
+ jmp .LEncCpy
+.LEncCpyEnd:
+# Xor with the counter block
+ vpxor (%rsp), TMP1, TMP0
+# Again, store at temp location
+ vmovdqa TMP0, (%rsp)
+# Copy only the required bytes to CT, and zero the rest for the hash
+ xor KS, KS
+.LEncCpy2:
+ cmp KS, len
+ je .LEncCpy3
+ movb (%rsp, KS, 1), %r8b
+ movb %r8b, (CT, KS, 1)
+ inc KS
+ jmp .LEncCpy2
+.LEncCpy3:
+ cmp $16, KS
+ je .LEndCpy3
+ movb $0, (%rsp, KS, 1)
+ inc KS
+ jmp .LEncCpy3
+.LEndCpy3:
+ vmovdqa (%rsp), TMP0
+
+ vpshufb .Lbswap_mask(%rip), TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+DATA_END:
+
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vmovdqu T, 272(Gctx)
+ vmovdqu CTR, 288(Gctx)
+
+ movq %rbp, %rsp
+
+ popq %rbx
+ popq %rbp
+ ret
+ .size intel_aes_gcmENC, .-intel_aes_gcmENC
+
+#########################
+# Decrypt and Authenticate
+# void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmDEC,@function
+.globl intel_aes_gcmDEC
+.align 16
+intel_aes_gcmDEC:
+# parameter 1: CT # input
+# parameter 2: PT # output
+# parameter 3: %rdx # Gctx
+# parameter 4: %rcx # len
+
+.macro DEC_KARATSUBA i
+ vmovdqu (7-\i)*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+
+ vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+.endm
+
+.set PT,%rsi
+.set CT,%rdi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+ test len, len
+ jnz .LbeginDec
+ ret
+
+.LbeginDec:
+
+ pushq %rbp
+ pushq %rbx
+ movq %rsp, %rbp
+ sub $128, %rsp
+ andq $-16, %rsp
+ vmovdqu 288(Gctx), CTR
+ vmovdqu 272(Gctx), T
+ mov 304(Gctx), KS
+# AESContext->Nr
+ mov 244(KS), NR
+
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+ jmp .LDECOctets
+
+# Decrypt 8 blocks each time while hashing them at the same time
+.align 64
+.LDECOctets:
+
+ cmp $128, len
+ jb .LDECSingles
+ sub $128, len
+
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vpshufb TMP3, CTR0, CTR0
+ vpshufb TMP3, CTR1, CTR1
+ vpshufb TMP3, CTR2, CTR2
+ vpshufb TMP3, CTR3, CTR3
+ vpshufb TMP3, CTR4, CTR4
+ vpshufb TMP3, CTR5, CTR5
+ vpshufb TMP3, CTR6, CTR6
+ vpshufb TMP3, CTR7, CTR7
+
+ vmovdqu (KS), TMP3
+ vpxor TMP3, CTR0, CTR0
+ vpxor TMP3, CTR1, CTR1
+ vpxor TMP3, CTR2, CTR2
+ vpxor TMP3, CTR3, CTR3
+ vpxor TMP3, CTR4, CTR4
+ vpxor TMP3, CTR5, CTR5
+ vpxor TMP3, CTR6, CTR6
+ vpxor TMP3, CTR7, CTR7
+
+ vmovdqu 7*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ ROUND 1
+ DEC_KARATSUBA 1
+
+ ROUND 2
+ DEC_KARATSUBA 2
+
+ ROUND 3
+ DEC_KARATSUBA 3
+
+ ROUND 4
+ DEC_KARATSUBA 4
+
+ ROUND 5
+ DEC_KARATSUBA 5
+
+ ROUND 6
+ DEC_KARATSUBA 6
+
+ ROUND 7
+
+ vmovdqu 0*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+ vpxor T, TMP5, TMP5
+ vmovdqu 16*7(Htbl), TMP4
+
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+7*16(Htbl), TMP4
+
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ ROUND 8
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+ vmovdqa .Lpoly(%rip), TMP2
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ ROUND 9
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vmovdqu 160(KS), TMP5
+ cmp $10, NR
+
+ jbe .LDECLast1
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $12, NR
+
+ jbe .LDECLast1
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LDECLast1:
+
+ vpxor (CT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (PT)
+ vmovdqu CTR1, 16(PT)
+ vmovdqu CTR2, 32(PT)
+ vmovdqu CTR3, 48(PT)
+ vmovdqu CTR4, 64(PT)
+ vmovdqu CTR5, 80(PT)
+ vmovdqu CTR6, 96(PT)
+ vmovdqu CTR7,112(PT)
+
+ vpxor TMP4, T, T
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDECOctets
+
+#Here we decrypt and hash any remaining whole block
+.LDECSingles:
+
+ cmp $16, len
+ jb .LDECTail
+ sub $16, len
+
+ vmovdqu (CT), TMP1
+ vpshufb .Lbswap_mask(%rip), TMP1, TMP1
+ vpxor TMP1, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LDECLast2
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LDECLast2
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+.LDECLast2:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor (CT), TMP1, TMP1
+ vmovdqu TMP1, (PT)
+ addq $16, CT
+ addq $16, PT
+ jmp .LDECSingles
+
+#Here we decrypt the final partial block, if there is one
+.LDECTail:
+ test len, len
+ jz .LDEC_END
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LDECLast3
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LDECLast3
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LDECLast3:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa TMP2, (%rsp)
+# Copy the required bytes only (could probably use rep movsb)
+ xor KS, KS
+.LDecCpy:
+ cmp KS, len
+ je .LDecCpy2
+ movb (CT, KS, 1), %r8b
+ movb %r8b, (%rsp, KS, 1)
+ inc KS
+ jmp .LDecCpy
+.LDecCpy2:
+ cmp $16, KS
+ je .LDecCpyEnd
+ movb $0, (%rsp, KS, 1)
+ inc KS
+ jmp .LDecCpy2
+.LDecCpyEnd:
+# Xor with the counter block
+ vmovdqa (%rsp), TMP0
+ vpxor TMP0, TMP1, TMP1
+# Again, store at temp location
+ vmovdqa TMP1, (%rsp)
+# Copy only the required bytes to PT, and zero the rest for the hash
+ xor KS, KS
+.LDecCpy3:
+ cmp KS, len
+ je .LDecCpyEnd3
+ movb (%rsp, KS, 1), %r8b
+ movb %r8b, (PT, KS, 1)
+ inc KS
+ jmp .LDecCpy3
+.LDecCpyEnd3:
+ vpshufb .Lbswap_mask(%rip), TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+.LDEC_END:
+
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vmovdqu T, 272(Gctx)
+ vmovdqu CTR, 288(Gctx)
+
+ movq %rbp, %rsp
+
+ popq %rbx
+ popq %rbp
+ ret
+ .size intel_aes_gcmDEC, .-intel_aes_gcmDEC
+#########################
+# a = T
+# b = TMP0 - remains unchanged
+# res = T
+# uses also TMP1,TMP2,TMP3,TMP4
+# __m128i GFMUL(__m128i A, __m128i B);
+.type GFMUL,@function
+.globl GFMUL
+GFMUL:
+ vpclmulqdq $0x00, TMP0, T, TMP1
+ vpclmulqdq $0x11, TMP0, T, TMP4
+
+ vpshufd $78, T, TMP2
+ vpshufd $78, TMP0, TMP3
+ vpxor T, TMP2, TMP2
+ vpxor TMP0, TMP3, TMP3
+
+ vpclmulqdq $0x00, TMP3, TMP2, TMP2
+ vpxor TMP1, TMP2, TMP2
+ vpxor TMP4, TMP2, TMP2
+
+ vpslldq $8, TMP2, TMP3
+ vpsrldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, TMP1
+ vpxor TMP2, TMP4, TMP4
+
+ vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2
+ vpshufd $78, TMP1, TMP3
+ vpxor TMP3, TMP2, TMP1
+
+ vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2
+ vpshufd $78, TMP1, TMP3
+ vpxor TMP3, TMP2, TMP1
+
+ vpxor TMP4, TMP1, T
+ ret
+.size GFMUL, .-GFMUL
+
diff --git a/security/nss/lib/freebl/jpake.c b/security/nss/lib/freebl/jpake.c
new file mode 100644
index 0000000000..741c7a8760
--- /dev/null
+++ b/security/nss/lib/freebl/jpake.c
@@ -0,0 +1,495 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secerr.h"
+#include "secitem.h"
+#include "secmpi.h"
+
+/* Hash an item's length and then its value. Only items smaller than 2^16 bytes
+ * are allowed. Lengths are hashed in network byte order. This is designed
+ * to match the OpenSSL J-PAKE implementation.
+ */
+static mp_err
+hashSECItem(HASHContext *hash, const SECItem *it)
+{
+ unsigned char length[2];
+
+ if (it->len > 0xffff)
+ return MP_BADARG;
+
+ length[0] = (unsigned char)(it->len >> 8);
+ length[1] = (unsigned char)(it->len);
+ hash->hashobj->update(hash->hash_context, length, 2);
+ hash->hashobj->update(hash->hash_context, it->data, it->len);
+ return MP_OKAY;
+}
+
+/* Hash all public components of the signature, each prefixed with its
+ length, and then convert the hash to an mp_int. */
+static mp_err
+hashPublicParams(HASH_HashType hashType, const SECItem *g,
+ const SECItem *gv, const SECItem *gx,
+ const SECItem *signerID, mp_int *h)
+{
+ mp_err err;
+ unsigned char hBuf[HASH_LENGTH_MAX];
+ SECItem hItem;
+ HASHContext hash;
+
+ hash.hashobj = HASH_GetRawHashObject(hashType);
+ if (hash.hashobj == NULL || hash.hashobj->length > sizeof hBuf) {
+ return MP_BADARG;
+ }
+ hash.hash_context = hash.hashobj->create();
+ if (hash.hash_context == NULL) {
+ return MP_MEM;
+ }
+
+ hItem.data = hBuf;
+ hItem.len = hash.hashobj->length;
+
+ hash.hashobj->begin(hash.hash_context);
+ CHECK_MPI_OK(hashSECItem(&hash, g));
+ CHECK_MPI_OK(hashSECItem(&hash, gv));
+ CHECK_MPI_OK(hashSECItem(&hash, gx));
+ CHECK_MPI_OK(hashSECItem(&hash, signerID));
+ hash.hashobj->end(hash.hash_context, hItem.data, &hItem.len,
+ sizeof hBuf);
+ SECITEM_TO_MPINT(hItem, h);
+
+cleanup:
+ if (hash.hash_context != NULL) {
+ hash.hashobj->destroy(hash.hash_context, PR_TRUE);
+ }
+
+ return err;
+}
+
+/* Generate a Schnorr signature for round 1 or round 2 */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int p;
+ mp_int q;
+ mp_int g;
+ mp_int X;
+ mp_int GX;
+ mp_int V;
+ mp_int GV;
+ mp_int h;
+ mp_int tmp;
+ mp_int R;
+ SECItem v;
+
+ if (!arena ||
+ !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+ !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+ !pqg->base.data || pqg->base.len == 0 ||
+ !signerID || !signerID->data || signerID->len == 0 ||
+ !x || !x->data || x->len == 0 ||
+ (testRandom && (!testRandom->data || testRandom->len == 0)) ||
+ (gxIn == NULL && (!gxOut || gxOut->data != NULL)) ||
+ (gxIn != NULL && (!gxIn->data || gxIn->len == 0 || gxOut != NULL)) ||
+ !gv || gv->data != NULL ||
+ !r || r->data != NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&X) = 0;
+ MP_DIGITS(&GX) = 0;
+ MP_DIGITS(&V) = 0;
+ MP_DIGITS(&GV) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&R) = 0;
+
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&X));
+ CHECK_MPI_OK(mp_init(&GX));
+ CHECK_MPI_OK(mp_init(&V));
+ CHECK_MPI_OK(mp_init(&GV));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&R));
+
+ SECITEM_TO_MPINT(pqg->prime, &p);
+ SECITEM_TO_MPINT(pqg->subPrime, &q);
+ SECITEM_TO_MPINT(pqg->base, &g);
+ SECITEM_TO_MPINT(*x, &X);
+
+ /* gx = g^x */
+ if (gxIn == NULL) {
+ CHECK_MPI_OK(mp_exptmod(&g, &X, &p, &GX));
+ MPINT_TO_SECITEM(&GX, gxOut, arena);
+ gxIn = gxOut;
+ } else {
+ SECITEM_TO_MPINT(*gxIn, &GX);
+ }
+
+ /* v is a random value in the q subgroup */
+ if (testRandom == NULL) {
+ v.data = NULL;
+ rv = DSA_NewRandom(arena, &pqg->subPrime, &v);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ } else {
+ v.data = testRandom->data;
+ v.len = testRandom->len;
+ }
+ SECITEM_TO_MPINT(v, &V);
+
+ /* gv = g^v (mod q), random v, 1 <= v < q */
+ CHECK_MPI_OK(mp_exptmod(&g, &V, &p, &GV));
+ MPINT_TO_SECITEM(&GV, gv, arena);
+
+ /* h = H(g, gv, gx, signerID) */
+ CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gxIn, signerID,
+ &h));
+
+ /* r = v - x*h (mod q) */
+ CHECK_MPI_OK(mp_mulmod(&X, &h, &q, &tmp));
+ CHECK_MPI_OK(mp_submod(&V, &tmp, &q, &R));
+ MPINT_TO_SECITEM(&R, r, arena);
+
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&X);
+ mp_clear(&GX);
+ mp_clear(&V);
+ mp_clear(&GV);
+ mp_clear(&h);
+ mp_clear(&tmp);
+ mp_clear(&R);
+
+ if (rv == SECSuccess && err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* Verify a Schnorr signature generated by the peer in round 1 or round 2. */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *peerID,
+ const SECItem *gx, const SECItem *gv, const SECItem *r)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int p;
+ mp_int q;
+ mp_int g;
+ mp_int p_minus_1;
+ mp_int GX;
+ mp_int h;
+ mp_int one;
+ mp_int R;
+ mp_int gr;
+ mp_int gxh;
+ mp_int gr_gxh;
+ SECItem calculated;
+
+ if (!arena ||
+ !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+ !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+ !pqg->base.data || pqg->base.len == 0 ||
+ !signerID || !signerID->data || signerID->len == 0 ||
+ !peerID || !peerID->data || peerID->len == 0 ||
+ !gx || !gx->data || gx->len == 0 ||
+ !gv || !gv->data || gv->len == 0 ||
+ !r || !r->data || r->len == 0 ||
+ SECITEM_CompareItem(signerID, peerID) == SECEqual) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&p_minus_1) = 0;
+ MP_DIGITS(&GX) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&one) = 0;
+ MP_DIGITS(&R) = 0;
+ MP_DIGITS(&gr) = 0;
+ MP_DIGITS(&gxh) = 0;
+ MP_DIGITS(&gr_gxh) = 0;
+ calculated.data = NULL;
+
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&p_minus_1));
+ CHECK_MPI_OK(mp_init(&GX));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&one));
+ CHECK_MPI_OK(mp_init(&R));
+ CHECK_MPI_OK(mp_init(&gr));
+ CHECK_MPI_OK(mp_init(&gxh));
+ CHECK_MPI_OK(mp_init(&gr_gxh));
+
+ SECITEM_TO_MPINT(pqg->prime, &p);
+ SECITEM_TO_MPINT(pqg->subPrime, &q);
+ SECITEM_TO_MPINT(pqg->base, &g);
+ SECITEM_TO_MPINT(*gx, &GX);
+ SECITEM_TO_MPINT(*r, &R);
+
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &p_minus_1));
+ CHECK_MPI_OK(mp_exptmod(&GX, &q, &p, &one));
+ /* Check g^x is in [1, p-2], R is in [0, q-1], and (g^x)^q mod p == 1 */
+ if (!(mp_cmp_z(&GX) > 0 &&
+ mp_cmp(&GX, &p_minus_1) < 0 &&
+ mp_cmp(&R, &q) < 0 &&
+ mp_cmp_d(&one, 1) == 0)) {
+ goto badSig;
+ }
+
+ CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gx, peerID,
+ &h));
+
+ /* Calculate g^v = g^r * g^x^h */
+ CHECK_MPI_OK(mp_exptmod(&g, &R, &p, &gr));
+ CHECK_MPI_OK(mp_exptmod(&GX, &h, &p, &gxh));
+ CHECK_MPI_OK(mp_mulmod(&gr, &gxh, &p, &gr_gxh));
+
+ /* Compare calculated g^v to given g^v */
+ MPINT_TO_SECITEM(&gr_gxh, &calculated, arena);
+ if (calculated.len == gv->len &&
+ NSS_SecureMemcmp(calculated.data, gv->data, calculated.len) == 0) {
+ rv = SECSuccess;
+ } else {
+ badSig:
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ }
+
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&p_minus_1);
+ mp_clear(&GX);
+ mp_clear(&h);
+ mp_clear(&one);
+ mp_clear(&R);
+ mp_clear(&gr);
+ mp_clear(&gxh);
+ mp_clear(&gr_gxh);
+
+ if (rv == SECSuccess && err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* Calculate base = gx1*gx3*gx4 (mod p), i.e. g^(x1+x3+x4) (mod p) */
+static mp_err
+jpake_Round2Base(const SECItem *gx1, const SECItem *gx3,
+ const SECItem *gx4, const mp_int *p, mp_int *base)
+{
+ mp_err err;
+ mp_int GX1;
+ mp_int GX3;
+ mp_int GX4;
+ mp_int tmp;
+
+ MP_DIGITS(&GX1) = 0;
+ MP_DIGITS(&GX3) = 0;
+ MP_DIGITS(&GX4) = 0;
+ MP_DIGITS(&tmp) = 0;
+
+ CHECK_MPI_OK(mp_init(&GX1));
+ CHECK_MPI_OK(mp_init(&GX3));
+ CHECK_MPI_OK(mp_init(&GX4));
+ CHECK_MPI_OK(mp_init(&tmp));
+
+ SECITEM_TO_MPINT(*gx1, &GX1);
+ SECITEM_TO_MPINT(*gx3, &GX3);
+ SECITEM_TO_MPINT(*gx4, &GX4);
+
+ /* In round 2, the peer/attacker sends us g^x3 and g^x4 and the protocol
+ requires that these values are distinct. */
+ if (mp_cmp(&GX3, &GX4) == 0) {
+ return MP_BADARG;
+ }
+
+ CHECK_MPI_OK(mp_mul(&GX1, &GX3, &tmp));
+ CHECK_MPI_OK(mp_mul(&tmp, &GX4, &tmp));
+ CHECK_MPI_OK(mp_mod(&tmp, p, base));
+
+cleanup:
+ mp_clear(&GX1);
+ mp_clear(&GX3);
+ mp_clear(&GX4);
+ mp_clear(&tmp);
+ return err;
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena,
+ const SECItem *p, const SECItem *q, const SECItem *gx1,
+ const SECItem *gx3, const SECItem *gx4, SECItem *base,
+ const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+ mp_err err;
+ mp_int P;
+ mp_int Q;
+ mp_int X2;
+ mp_int S;
+ mp_int result;
+
+ if (!arena ||
+ !p || !p->data || p->len == 0 ||
+ !q || !q->data || q->len == 0 ||
+ !gx1 || !gx1->data || gx1->len == 0 ||
+ !gx3 || !gx3->data || gx3->len == 0 ||
+ !gx4 || !gx4->data || gx4->len == 0 ||
+ !base || base->data != NULL ||
+ (x2s != NULL && (x2s->data != NULL ||
+ !x2 || !x2->data || x2->len == 0 ||
+ !s || !s->data || s->len == 0))) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&X2) = 0;
+ MP_DIGITS(&S) = 0;
+ MP_DIGITS(&result) = 0;
+
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&result));
+
+ if (x2s != NULL) {
+ CHECK_MPI_OK(mp_init(&X2));
+ CHECK_MPI_OK(mp_init(&S));
+
+ SECITEM_TO_MPINT(*q, &Q);
+ SECITEM_TO_MPINT(*x2, &X2);
+
+ SECITEM_TO_MPINT(*s, &S);
+ /* S must be in [1, Q-1] */
+ if (mp_cmp_z(&S) <= 0 || mp_cmp(&S, &Q) >= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_mulmod(&X2, &S, &Q, &result));
+ MPINT_TO_SECITEM(&result, x2s, arena);
+ }
+
+ SECITEM_TO_MPINT(*p, &P);
+ CHECK_MPI_OK(jpake_Round2Base(gx1, gx3, gx4, &P, &result));
+ MPINT_TO_SECITEM(&result, base, arena);
+
+cleanup:
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&X2);
+ mp_clear(&S);
+ mp_clear(&result);
+
+ if (err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K)
+{
+ mp_err err;
+ mp_int P;
+ mp_int Q;
+ mp_int tmp;
+ mp_int exponent;
+ mp_int divisor;
+ mp_int base;
+
+ if (!arena ||
+ !p || !p->data || p->len == 0 ||
+ !q || !q->data || q->len == 0 ||
+ !x2 || !x2->data || x2->len == 0 ||
+ !gx4 || !gx4->data || gx4->len == 0 ||
+ !x2s || !x2s->data || x2s->len == 0 ||
+ !B || !B->data || B->len == 0 ||
+ !K || K->data != NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&exponent) = 0;
+ MP_DIGITS(&divisor) = 0;
+ MP_DIGITS(&base) = 0;
+
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&exponent));
+ CHECK_MPI_OK(mp_init(&divisor));
+ CHECK_MPI_OK(mp_init(&base));
+
+ /* exponent = -x2s (mod q) */
+ SECITEM_TO_MPINT(*q, &Q);
+ SECITEM_TO_MPINT(*x2s, &tmp);
+ /* q == 0 (mod q), so q - x2s == -x2s (mod q) */
+ CHECK_MPI_OK(mp_sub(&Q, &tmp, &exponent));
+
+ /* divisor = gx4^-x2s = 1/(gx4^x2s) (mod p) */
+ SECITEM_TO_MPINT(*p, &P);
+ SECITEM_TO_MPINT(*gx4, &tmp);
+ CHECK_MPI_OK(mp_exptmod(&tmp, &exponent, &P, &divisor));
+
+ /* base = B*divisor = B/(gx4^x2s) (mod p) */
+ SECITEM_TO_MPINT(*B, &tmp);
+ CHECK_MPI_OK(mp_mulmod(&divisor, &tmp, &P, &base));
+
+ /* tmp = base^x2 (mod p) */
+ SECITEM_TO_MPINT(*x2, &exponent);
+ CHECK_MPI_OK(mp_exptmod(&base, &exponent, &P, &tmp));
+
+ MPINT_TO_SECITEM(&tmp, K, arena);
+
+cleanup:
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&tmp);
+ mp_clear(&exponent);
+ mp_clear(&divisor);
+ mp_clear(&base);
+
+ if (err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/kyber-pqcrystals-ref.c b/security/nss/lib/freebl/kyber-pqcrystals-ref.c
new file mode 100644
index 0000000000..883de299d8
--- /dev/null
+++ b/security/nss/lib/freebl/kyber-pqcrystals-ref.c
@@ -0,0 +1,2693 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * This file was generated from
+ * https://github.com/pq-crystals/kyber/commit/e0d1c6ff
+ *
+ * Files from that repository are listed here surrounded by
+ * "* begin: [file] *" and "* end: [file] *" comments.
+ *
+ * The following changes have been made:
+ * - include guards have been removed,
+ * - include directives have been removed,
+ * - "#ifdef KYBER90S" blocks have been evaluated with "KYBER90S" undefined,
+ * - functions outside of kem.c have been made static.
+*/
+
+/** begin: ref/LICENSE **
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
+** end: ref/LICENSE **/
+
+/** begin: ref/AUTHORS **
+Joppe Bos,
+Léo Ducas,
+Eike Kiltz,
+Tancrède Lepoint,
+Vadim Lyubashevsky,
+John Schanck,
+Peter Schwabe,
+Gregor Seiler,
+Damien Stehlé
+** end: ref/AUTHORS **/
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+
+// We need to provide an implementation of randombytes to avoid an unused
+// function warning. We don't use the randomized API in freebl, so we'll make
+// calling randombytes an error.
+static void
+randombytes(uint8_t *out, size_t outlen)
+{
+ // this memset is to avoid "maybe-uninitialized" warnings that gcc-11 issues
+ // for the (unused) crypto_kem_keypair and crypto_kem_enc functions.
+ memset(out, 0, outlen);
+ assert(0);
+}
+
+/*************************************************
+* Name: verify
+*
+* Description: Compare two arrays for equality in constant time.
+*
+* Arguments: const uint8_t *a: pointer to first byte array
+* const uint8_t *b: pointer to second byte array
+* size_t len: length of the byte arrays
+*
+* Returns 0 if the byte arrays are equal, 1 otherwise
+**************************************************/
+static int
+verify(const uint8_t *a, const uint8_t *b, size_t len)
+{
+ return NSS_SecureMemcmp(a, b, len);
+}
+
+/*************************************************
+* Name: cmov
+*
+* Description: Copy len bytes from x to r if b is 1;
+* don't modify x if b is 0. Requires b to be in {0,1};
+* assumes two's complement representation of negative integers.
+* Runs in constant time.
+*
+* Arguments: uint8_t *r: pointer to output byte array
+* const uint8_t *x: pointer to input byte array
+* size_t len: Amount of bytes to be copied
+* uint8_t b: Condition bit; has to be in {0,1}
+**************************************************/
+static void
+cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
+{
+ NSS_SecureSelect(r, r, x, len, b);
+}
+
+/** begin: ref/params.h **/
+#ifndef KYBER_K
+#define KYBER_K 3 /* Change this for different security strengths */
+#endif
+
+//#define KYBER_90S /* Uncomment this if you want the 90S variant */
+
+/* Don't change parameters below this line */
+#if (KYBER_K == 2)
+#define KYBER_NAMESPACE(s) pqcrystals_kyber512_ref_##s
+#elif (KYBER_K == 3)
+#define KYBER_NAMESPACE(s) pqcrystals_kyber768_ref_##s
+#elif (KYBER_K == 4)
+#define KYBER_NAMESPACE(s) pqcrystals_kyber1024_ref_##s
+#else
+#error "KYBER_K must be in {2,3,4}"
+#endif
+
+#define KYBER_N 256
+#define KYBER_Q 3329
+
+#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define KYBER_SSBYTES 32 /* size in bytes of shared key */
+
+#define KYBER_POLYBYTES 384
+#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES)
+
+#if KYBER_K == 2
+#define KYBER_ETA1 3
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 3
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 128
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320)
+#elif KYBER_K == 4
+#define KYBER_ETA1 2
+#define KYBER_POLYCOMPRESSEDBYTES 160
+#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 352)
+#endif
+
+#define KYBER_ETA2 2
+
+#define KYBER_INDCPA_MSGBYTES (KYBER_SYMBYTES)
+#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES)
+#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES)
+#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES)
+
+#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2 * KYBER_SYMBYTES)
+#define KYBER_CIPHERTEXTBYTES (KYBER_INDCPA_BYTES)
+/** end: ref/params.h **/
+
+/** begin: ref/reduce.h **/
+#define MONT -1044 // 2^16 mod q
+#define QINV -3327 // q^-1 mod 2^16
+
+#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce)
+static int16_t montgomery_reduce(int32_t a);
+
+#define barrett_reduce KYBER_NAMESPACE(barrett_reduce)
+static int16_t barrett_reduce(int16_t a);
+/** end: ref/reduce.h **/
+
+/** begin: ref/ntt.h **/
+#define zetas KYBER_NAMESPACE(zetas)
+extern const int16_t zetas[128];
+
+#define ntt KYBER_NAMESPACE(ntt)
+static void ntt(int16_t poly[256]);
+
+#define invntt KYBER_NAMESPACE(invntt)
+static void invntt(int16_t poly[256]);
+
+#define basemul KYBER_NAMESPACE(basemul)
+static void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta);
+/** end: ref/ntt.h **/
+
+/** begin: ref/poly.h **/
+/*
+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
+ * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
+ */
+typedef struct {
+ int16_t coeffs[KYBER_N];
+} poly;
+
+#define poly_compress KYBER_NAMESPACE(poly_compress)
+static void poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a);
+#define poly_decompress KYBER_NAMESPACE(poly_decompress)
+static void poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES]);
+
+#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
+static void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
+#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
+static void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
+
+#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
+static void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
+#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
+static void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
+
+#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
+static void poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
+static void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
+
+#define poly_ntt KYBER_NAMESPACE(poly_ntt)
+static void poly_ntt(poly *r);
+#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
+static void poly_invntt_tomont(poly *r);
+#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
+static void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
+#define poly_tomont KYBER_NAMESPACE(poly_tomont)
+static void poly_tomont(poly *r);
+
+#define poly_reduce KYBER_NAMESPACE(poly_reduce)
+static void poly_reduce(poly *r);
+
+#define poly_add KYBER_NAMESPACE(poly_add)
+static void poly_add(poly *r, const poly *a, const poly *b);
+#define poly_sub KYBER_NAMESPACE(poly_sub)
+static void poly_sub(poly *r, const poly *a, const poly *b);
+/** end: ref/poly.h **/
+
+/** begin: ref/cbd.h **/
+#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
+static void poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1 * KYBER_N / 4]);
+
+#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
+static void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2 * KYBER_N / 4]);
+/** end: ref/cbd.h **/
+
+/** begin: ref/polyvec.h **/
+typedef struct {
+ poly vec[KYBER_K];
+} polyvec;
+
+#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
+static void polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a);
+#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
+static void polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES]);
+
+#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
+static void polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a);
+#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
+static void polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES]);
+
+#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
+static void polyvec_ntt(polyvec *r);
+#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
+static void polyvec_invntt_tomont(polyvec *r);
+
+#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
+static void polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b);
+
+#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
+static void polyvec_reduce(polyvec *r);
+
+#define polyvec_add KYBER_NAMESPACE(polyvec_add)
+static void polyvec_add(polyvec *r, const polyvec *a, const polyvec *b);
+/** end: ref/polyvec.h **/
+
+/** begin: ref/indcpa.h **/
+#define gen_matrix KYBER_NAMESPACE(gen_matrix)
+static void gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
+
+#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
+static void indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
+static void indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES]);
+
+#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
+static void indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES]);
+/** end: ref/indcpa.h **/
+
+/** begin: ref/fips202.h **/
+#define SHAKE128_RATE 168
+#define SHAKE256_RATE 136
+#define SHA3_256_RATE 136
+#define SHA3_512_RATE 72
+
+#define FIPS202_NAMESPACE(s) pqcrystals_kyber_fips202_ref_##s
+
+typedef struct {
+ uint64_t s[25];
+ unsigned int pos;
+} keccak_state;
+
+#define shake128_init FIPS202_NAMESPACE(shake128_init)
+void shake128_init(keccak_state *state);
+#define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
+void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
+#define shake128_finalize FIPS202_NAMESPACE(shake128_finalize)
+void shake128_finalize(keccak_state *state);
+#define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze)
+void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
+#define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once)
+void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
+#define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
+void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
+
+#define shake256_init FIPS202_NAMESPACE(shake256_init)
+void shake256_init(keccak_state *state);
+#define shake256_absorb FIPS202_NAMESPACE(shake256_absorb)
+void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
+#define shake256_finalize FIPS202_NAMESPACE(shake256_finalize)
+void shake256_finalize(keccak_state *state);
+#define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze)
+void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
+#define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once)
+void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
+#define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks)
+void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
+
+#define shake128 FIPS202_NAMESPACE(shake128)
+void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
+#define shake256 FIPS202_NAMESPACE(shake256)
+void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
+#define sha3_256 FIPS202_NAMESPACE(sha3_256)
+void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen);
+#define sha3_512 FIPS202_NAMESPACE(sha3_512)
+void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen);
+/** end: ref/fips202.h **/
+
+/** begin: ref/symmetric.h **/
+typedef keccak_state xof_state;
+
+#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
+static void kyber_shake128_absorb(keccak_state *s,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y);
+
+#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf)
+static void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce);
+
+#define XOF_BLOCKBYTES SHAKE128_RATE
+
+#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES)
+#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES)
+#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y)
+#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE)
+#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE)
+#define kdf(OUT, IN, INBYTES) shake256(OUT, KYBER_SSBYTES, IN, INBYTES)
+/** end: ref/symmetric.h **/
+
+/** begin: ref/kem.h **/
+#define CRYPTO_SECRETKEYBYTES KYBER_SECRETKEYBYTES
+#define CRYPTO_PUBLICKEYBYTES KYBER_PUBLICKEYBYTES
+#define CRYPTO_CIPHERTEXTBYTES KYBER_CIPHERTEXTBYTES
+#define CRYPTO_BYTES KYBER_SSBYTES
+
+#if (KYBER_K == 2)
+#define CRYPTO_ALGNAME "Kyber512"
+#elif (KYBER_K == 3)
+#define CRYPTO_ALGNAME "Kyber768"
+#elif (KYBER_K == 4)
+#define CRYPTO_ALGNAME "Kyber1024"
+#endif
+
+#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
+int crypto_kem_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+
+#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
+int crypto_kem_keypair(uint8_t *pk, uint8_t *sk);
+
+#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
+int crypto_kem_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+
+#define crypto_kem_enc KYBER_NAMESPACE(enc)
+int crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+
+#define crypto_kem_dec KYBER_NAMESPACE(dec)
+int crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+/** end: ref/kem.h **/
+
+/** begin: ref/reduce.c **/
+/*************************************************
+* Name: montgomery_reduce
+*
+* Description: Montgomery reduction; given a 32-bit integer a, computes
+* 16-bit integer congruent to a * R^-1 mod q, where R=2^16
+*
+* Arguments: - int32_t a: input integer to be reduced;
+* has to be in {-q2^15,...,q2^15-1}
+*
+* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
+**************************************************/
+static int16_t
+montgomery_reduce(int32_t a)
+{
+ int16_t t;
+
+ t = (int16_t)a * QINV;
+ t = (a - (int32_t)t * KYBER_Q) >> 16;
+ return t;
+}
+
+/*************************************************
+* Name: barrett_reduce
+*
+* Description: Barrett reduction; given a 16-bit integer a, computes
+* centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
+*
+* Arguments: - int16_t a: input integer to be reduced
+*
+* Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
+**************************************************/
+static int16_t
+barrett_reduce(int16_t a)
+{
+ int16_t t;
+ const int16_t v = ((1 << 26) + KYBER_Q / 2) / KYBER_Q;
+
+ t = ((int32_t)v * a + (1 << 25)) >> 26;
+ t *= KYBER_Q;
+ return a - t;
+}
+/** end: ref/reduce.c **/
+
+/** begin: ref/cbd.c **/
+/*************************************************
+* Name: load32_littleendian
+*
+* Description: load 4 bytes into a 32-bit integer
+* in little-endian order
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x
+**************************************************/
+static uint32_t
+load32_littleendian(const uint8_t x[4])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ r |= (uint32_t)x[3] << 24;
+ return r;
+}
+
+/*************************************************
+* Name: load24_littleendian
+*
+* Description: load 3 bytes into a 32-bit integer
+* in little-endian order.
+* This function is only needed for Kyber-512
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
+**************************************************/
+#if KYBER_ETA1 == 3
+static uint32_t
+load24_littleendian(const uint8_t x[3])
+{
+ uint32_t r;
+ r = (uint32_t)x[0];
+ r |= (uint32_t)x[1] << 8;
+ r |= (uint32_t)x[2] << 16;
+ return r;
+}
+#endif
+
+/*************************************************
+* Name: cbd2
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+static void
+cbd2(poly *r, const uint8_t buf[2 * KYBER_N / 4])
+{
+ unsigned int i, j;
+ uint32_t t, d;
+ int16_t a, b;
+
+ for (i = 0; i < KYBER_N / 8; i++) {
+ t = load32_littleendian(buf + 4 * i);
+ d = t & 0x55555555;
+ d += (t >> 1) & 0x55555555;
+
+ for (j = 0; j < 8; j++) {
+ a = (d >> (4 * j + 0)) & 0x3;
+ b = (d >> (4 * j + 2)) & 0x3;
+ r->coeffs[8 * i + j] = a - b;
+ }
+ }
+}
+
+/*************************************************
+* Name: cbd3
+*
+* Description: Given an array of uniformly random bytes, compute
+* polynomial with coefficients distributed according to
+* a centered binomial distribution with parameter eta=3.
+* This function is only needed for Kyber-512
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *buf: pointer to input byte array
+**************************************************/
+#if KYBER_ETA1 == 3
+static void
+cbd3(poly *r, const uint8_t buf[3 * KYBER_N / 4])
+{
+ unsigned int i, j;
+ uint32_t t, d;
+ int16_t a, b;
+
+ for (i = 0; i < KYBER_N / 4; i++) {
+ t = load24_littleendian(buf + 3 * i);
+ d = t & 0x00249249;
+ d += (t >> 1) & 0x00249249;
+ d += (t >> 2) & 0x00249249;
+
+ for (j = 0; j < 4; j++) {
+ a = (d >> (6 * j + 0)) & 0x7;
+ b = (d >> (6 * j + 3)) & 0x7;
+ r->coeffs[4 * i + j] = a - b;
+ }
+ }
+}
+#endif
+
+static void
+poly_cbd_eta1(poly *r, const uint8_t buf[KYBER_ETA1 * KYBER_N / 4])
+{
+#if KYBER_ETA1 == 2
+ cbd2(r, buf);
+#elif KYBER_ETA1 == 3
+ cbd3(r, buf);
+#else
+#error "This implementation requires eta1 in {2,3}"
+#endif
+}
+
+static void
+poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2 * KYBER_N / 4])
+{
+#if KYBER_ETA2 == 2
+ cbd2(r, buf);
+#else
+#error "This implementation requires eta2 = 2"
+#endif
+}
+/** end: ref/cbd.c **/
+
+/** begin: ref/ntt.c **/
+/* Code to generate zetas and zetas_inv used in the number-theoretic transform:
+
+#define KYBER_ROOT_OF_UNITY 17
+
+static const uint8_t tree[128] = {
+ 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
+ 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
+ 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
+ 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
+ 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
+ 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
+ 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
+ 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
+};
+
+static void init_ntt() {
+ unsigned int i;
+ int16_t tmp[128];
+
+ tmp[0] = MONT;
+ for(i=1;i<128;i++)
+ tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q);
+
+ for(i=0;i<128;i++) {
+ zetas[i] = tmp[tree[i]];
+ if(zetas[i] > KYBER_Q/2)
+ zetas[i] -= KYBER_Q;
+ if(zetas[i] < -KYBER_Q/2)
+ zetas[i] += KYBER_Q;
+ }
+}
+*/
+
+const int16_t zetas[128] = {
+ -1044, -758, -359, -1517, 1493, 1422, 287, 202,
+ -171, 622, 1577, 182, 962, -1202, -1474, 1468,
+ 573, -1325, 264, 383, -829, 1458, -1602, -130,
+ -681, 1017, 732, 608, -1542, 411, -205, -1571,
+ 1223, 652, -552, 1015, -1293, 1491, -282, -1544,
+ 516, -8, -320, -666, -1618, -1162, 126, 1469,
+ -853, -90, -271, 830, 107, -1421, -247, -951,
+ -398, 961, -1508, -725, 448, -1065, 677, -1275,
+ -1103, 430, 555, 843, -1251, 871, 1550, 105,
+ 422, 587, 177, -235, -291, -460, 1574, 1653,
+ -246, 778, 1159, -147, -777, 1483, -602, 1119,
+ -1590, 644, -872, 349, 418, 329, -156, -75,
+ 817, 1097, 603, 610, 1322, -1285, -1465, 384,
+ -1215, -136, 1218, -1335, -874, 220, -1187, -1659,
+ -1185, -1530, -1278, 794, -1510, -854, -870, 478,
+ -108, -308, 996, 991, 958, -1460, 1522, 1628
+};
+
+/*************************************************
+* Name: fqmul
+*
+* Description: Multiplication followed by Montgomery reduction
+*
+* Arguments: - int16_t a: first factor
+* - int16_t b: second factor
+*
+* Returns 16-bit integer congruent to a*b*R^{-1} mod q
+**************************************************/
+static int16_t
+fqmul(int16_t a, int16_t b)
+{
+ return montgomery_reduce((int32_t)a * b);
+}
+
+/*************************************************
+* Name: ntt
+*
+* Description: Inplace number-theoretic transform (NTT) in Rq.
+* input is in standard order, output is in bitreversed order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+static void
+ntt(int16_t r[256])
+{
+ unsigned int len, start, j, k;
+ int16_t t, zeta;
+
+ k = 1;
+ for (len = 128; len >= 2; len >>= 1) {
+ for (start = 0; start < 256; start = j + len) {
+ zeta = zetas[k++];
+ for (j = start; j < start + len; j++) {
+ t = fqmul(zeta, r[j + len]);
+ r[j + len] = r[j] - t;
+ r[j] = r[j] + t;
+ }
+ }
+ }
+}
+
+/*************************************************
+* Name: invntt_tomont
+*
+* Description: Inplace inverse number-theoretic transform in Rq and
+* multiplication by Montgomery factor 2^16.
+* Input is in bitreversed order, output is in standard order
+*
+* Arguments: - int16_t r[256]: pointer to input/output vector of elements of Zq
+**************************************************/
+static void
+invntt(int16_t r[256])
+{
+ unsigned int start, len, j, k;
+ int16_t t, zeta;
+ const int16_t f = 1441; // mont^2/128
+
+ k = 127;
+ for (len = 2; len <= 128; len <<= 1) {
+ for (start = 0; start < 256; start = j + len) {
+ zeta = zetas[k--];
+ for (j = start; j < start + len; j++) {
+ t = r[j];
+ r[j] = barrett_reduce(t + r[j + len]);
+ r[j + len] = r[j + len] - t;
+ r[j + len] = fqmul(zeta, r[j + len]);
+ }
+ }
+ }
+
+ for (j = 0; j < 256; j++)
+ r[j] = fqmul(r[j], f);
+}
+
+/*************************************************
+* Name: basemul
+*
+* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta)
+* used for multiplication of elements in Rq in NTT domain
+*
+* Arguments: - int16_t r[2]: pointer to the output polynomial
+* - const int16_t a[2]: pointer to the first factor
+* - const int16_t b[2]: pointer to the second factor
+* - int16_t zeta: integer defining the reduction polynomial
+**************************************************/
+static void
+basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta)
+{
+ r[0] = fqmul(a[1], b[1]);
+ r[0] = fqmul(r[0], zeta);
+ r[0] += fqmul(a[0], b[0]);
+ r[1] = fqmul(a[0], b[1]);
+ r[1] += fqmul(a[1], b[0]);
+}
+/** end: ref/ntt.c **/
+
+/** begin: ref/poly.c **/
+/*************************************************
+* Name: poly_compress
+*
+* Description: Compression and subsequent serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+static void
+poly_compress(uint8_t r[KYBER_POLYCOMPRESSEDBYTES], const poly *a)
+{
+ unsigned int i, j;
+ int16_t u;
+ uint32_t d0;
+ uint8_t t[8];
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+ for (i = 0; i < KYBER_N / 8; i++) {
+ for (j = 0; j < 8; j++) {
+ // map to positive standard representatives
+ u = a->coeffs[8 * i + j];
+ u += (u >> 15) & KYBER_Q;
+ /* t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15; */
+ d0 = u << 4;
+ d0 += 1665;
+ d0 *= 80635;
+ d0 >>= 28;
+ t[j] = d0 & 0xf;
+ }
+
+ r[0] = t[0] | (t[1] << 4);
+ r[1] = t[2] | (t[3] << 4);
+ r[2] = t[4] | (t[5] << 4);
+ r[3] = t[6] | (t[7] << 4);
+ r += 4;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ for (i = 0; i < KYBER_N / 8; i++) {
+ for (j = 0; j < 8; j++) {
+ // map to positive standard representatives
+ u = a->coeffs[8 * i + j];
+ u += (u >> 15) & KYBER_Q;
+ /* t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31; */
+ d0 = u << 5;
+ d0 += 1664;
+ d0 *= 40318;
+ d0 >>= 27;
+ t[j] = d0 & 0x1f;
+ }
+
+ r[0] = (t[0] >> 0) | (t[1] << 5);
+ r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7);
+ r[2] = (t[3] >> 1) | (t[4] << 4);
+ r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6);
+ r[4] = (t[6] >> 2) | (t[7] << 3);
+ r += 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_decompress
+*
+* Description: De-serialization and subsequent decompression of a polynomial;
+* approximate inverse of poly_compress
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYCOMPRESSEDBYTES bytes)
+**************************************************/
+static void
+poly_decompress(poly *r, const uint8_t a[KYBER_POLYCOMPRESSEDBYTES])
+{
+ unsigned int i;
+
+#if (KYBER_POLYCOMPRESSEDBYTES == 128)
+ for (i = 0; i < KYBER_N / 2; i++) {
+ r->coeffs[2 * i + 0] = (((uint16_t)(a[0] & 15) * KYBER_Q) + 8) >> 4;
+ r->coeffs[2 * i + 1] = (((uint16_t)(a[0] >> 4) * KYBER_Q) + 8) >> 4;
+ a += 1;
+ }
+#elif (KYBER_POLYCOMPRESSEDBYTES == 160)
+ unsigned int j;
+ uint8_t t[8];
+ for (i = 0; i < KYBER_N / 8; i++) {
+ t[0] = (a[0] >> 0);
+ t[1] = (a[0] >> 5) | (a[1] << 3);
+ t[2] = (a[1] >> 2);
+ t[3] = (a[1] >> 7) | (a[2] << 1);
+ t[4] = (a[2] >> 4) | (a[3] << 4);
+ t[5] = (a[3] >> 1);
+ t[6] = (a[3] >> 6) | (a[4] << 2);
+ t[7] = (a[4] >> 3);
+ a += 5;
+
+ for (j = 0; j < 8; j++)
+ r->coeffs[8 * i + j] = ((uint32_t)(t[j] & 31) * KYBER_Q + 16) >> 5;
+ }
+#else
+#error "KYBER_POLYCOMPRESSEDBYTES needs to be in {128, 160}"
+#endif
+}
+
+/*************************************************
+* Name: poly_tobytes
+*
+* Description: Serialization of a polynomial
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYBYTES bytes)
+* - const poly *a: pointer to input polynomial
+**************************************************/
+static void
+poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
+{
+ unsigned int i;
+ uint16_t t0, t1;
+
+ for (i = 0; i < KYBER_N / 2; i++) {
+ // map to positive standard representatives
+ t0 = a->coeffs[2 * i];
+ t0 += ((int16_t)t0 >> 15) & KYBER_Q;
+ t1 = a->coeffs[2 * i + 1];
+ t1 += ((int16_t)t1 >> 15) & KYBER_Q;
+ r[3 * i + 0] = (t0 >> 0);
+ r[3 * i + 1] = (t0 >> 8) | (t1 << 4);
+ r[3 * i + 2] = (t1 >> 4);
+ }
+}
+
+/*************************************************
+* Name: poly_frombytes
+*
+* Description: De-serialization of a polynomial;
+* inverse of poly_tobytes
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *a: pointer to input byte array
+* (of KYBER_POLYBYTES bytes)
+**************************************************/
+static void
+poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_N / 2; i++) {
+ r->coeffs[2 * i] = ((a[3 * i + 0] >> 0) | ((uint16_t)a[3 * i + 1] << 8)) & 0xFFF;
+ r->coeffs[2 * i + 1] = ((a[3 * i + 1] >> 4) | ((uint16_t)a[3 * i + 2] << 4)) & 0xFFF;
+ }
+}
+
+/*************************************************
+* Name: poly_frommsg
+*
+* Description: Convert 32-byte message to polynomial
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *msg: pointer to input message
+**************************************************/
+static void
+poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
+{
+ unsigned int i, j;
+ int16_t mask;
+
+#if (KYBER_INDCPA_MSGBYTES != KYBER_N / 8)
+#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!"
+#endif
+
+ for (i = 0; i < KYBER_N / 8; i++) {
+ for (j = 0; j < 8; j++) {
+ mask = -(int16_t)((msg[i] >> j) & 1);
+ r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2);
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_tomsg
+*
+* Description: Convert polynomial to 32-byte message
+*
+* Arguments: - uint8_t *msg: pointer to output message
+* - const poly *a: pointer to input polynomial
+**************************************************/
+static void
+poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a)
+{
+ unsigned int i, j;
+ uint32_t t;
+
+ for (i = 0; i < KYBER_N / 8; i++) {
+ msg[i] = 0;
+ for (j = 0; j < 8; j++) {
+ t = a->coeffs[8 * i + j];
+ // t += ((int16_t)t >> 15) & KYBER_Q;
+ // t = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
+ t <<= 1;
+ t += 1665;
+ t *= 80635;
+ t >>= 28;
+ t &= 1;
+ msg[i] |= t << j;
+ }
+ }
+}
+
+/*************************************************
+* Name: poly_getnoise_eta1
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA1
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+static void
+poly_getnoise_eta1(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA1 * KYBER_N / 4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta1(r, buf);
+}
+
+/*************************************************
+* Name: poly_getnoise_eta2
+*
+* Description: Sample a polynomial deterministically from a seed and a nonce,
+* with output polynomial close to centered binomial distribution
+* with parameter KYBER_ETA2
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const uint8_t *seed: pointer to input seed
+* (of length KYBER_SYMBYTES bytes)
+* - uint8_t nonce: one-byte input nonce
+**************************************************/
+static void
+poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t buf[KYBER_ETA2 * KYBER_N / 4];
+ prf(buf, sizeof(buf), seed, nonce);
+ poly_cbd_eta2(r, buf);
+}
+
+/*************************************************
+* Name: poly_ntt
+*
+* Description: Computes negacyclic number-theoretic transform (NTT) of
+* a polynomial in place;
+* inputs assumed to be in normal order, output in bitreversed order
+*
+* Arguments: - uint16_t *r: pointer to in/output polynomial
+**************************************************/
+static void
+poly_ntt(poly *r)
+{
+ ntt(r->coeffs);
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: poly_invntt_tomont
+*
+* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+* of a polynomial in place;
+* inputs assumed to be in bitreversed order, output in normal order
+*
+* Arguments: - uint16_t *a: pointer to in/output polynomial
+**************************************************/
+static void
+poly_invntt_tomont(poly *r)
+{
+ invntt(r->coeffs);
+}
+
+/*************************************************
+* Name: poly_basemul_montgomery
+*
+* Description: Multiplication of two polynomials in NTT domain
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+static void
+poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_N / 4; i++) {
+ basemul(&r->coeffs[4 * i], &a->coeffs[4 * i], &b->coeffs[4 * i], zetas[64 + i]);
+ basemul(&r->coeffs[4 * i + 2], &a->coeffs[4 * i + 2], &b->coeffs[4 * i + 2], -zetas[64 + i]);
+ }
+}
+
+/*************************************************
+* Name: poly_tomont
+*
+* Description: Inplace conversion of all coefficients of a polynomial
+* from normal domain to Montgomery domain
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+static void
+poly_tomont(poly *r)
+{
+ unsigned int i;
+ const int16_t f = (1ULL << 32) % KYBER_Q;
+ for (i = 0; i < KYBER_N; i++)
+ r->coeffs[i] = montgomery_reduce((int32_t)r->coeffs[i] * f);
+}
+
+/*************************************************
+* Name: poly_reduce
+*
+* Description: Applies Barrett reduction to all coefficients of a polynomial
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - poly *r: pointer to input/output polynomial
+**************************************************/
+static void
+poly_reduce(poly *r)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_N; i++)
+ r->coeffs[i] = barrett_reduce(r->coeffs[i]);
+}
+
+/*************************************************
+* Name: poly_add
+*
+* Description: Add two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+static void
+poly_add(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_N; i++)
+ r->coeffs[i] = a->coeffs[i] + b->coeffs[i];
+}
+
+/*************************************************
+* Name: poly_sub
+*
+* Description: Subtract two polynomials; no modular reduction is performed
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const poly *a: pointer to first input polynomial
+* - const poly *b: pointer to second input polynomial
+**************************************************/
+static void
+poly_sub(poly *r, const poly *a, const poly *b)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_N; i++)
+ r->coeffs[i] = a->coeffs[i] - b->coeffs[i];
+}
+/** end: ref/poly.c **/
+
+/** begin: ref/polyvec.c **/
+/*************************************************
+* Name: polyvec_compress
+*
+* Description: Compress and serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+static void
+polyvec_compress(uint8_t r[KYBER_POLYVECCOMPRESSEDBYTES], const polyvec *a)
+{
+ unsigned int i, j, k;
+ uint64_t d0;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for (i = 0; i < KYBER_K; i++) {
+ for (j = 0; j < KYBER_N / 8; j++) {
+ for (k = 0; k < 8; k++) {
+ t[k] = a->vec[i].coeffs[8 * j + k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+ /* t[k] = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff; */
+ d0 = t[k];
+ d0 <<= 11;
+ d0 += 1664;
+ d0 *= 645084;
+ d0 >>= 31;
+ t[k] = d0 & 0x7ff;
+ }
+
+ r[0] = (t[0] >> 0);
+ r[1] = (t[0] >> 8) | (t[1] << 3);
+ r[2] = (t[1] >> 5) | (t[2] << 6);
+ r[3] = (t[2] >> 2);
+ r[4] = (t[2] >> 10) | (t[3] << 1);
+ r[5] = (t[3] >> 7) | (t[4] << 4);
+ r[6] = (t[4] >> 4) | (t[5] << 7);
+ r[7] = (t[5] >> 1);
+ r[8] = (t[5] >> 9) | (t[6] << 2);
+ r[9] = (t[6] >> 6) | (t[7] << 5);
+ r[10] = (t[7] >> 3);
+ r += 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for (i = 0; i < KYBER_K; i++) {
+ for (j = 0; j < KYBER_N / 4; j++) {
+ for (k = 0; k < 4; k++) {
+ t[k] = a->vec[i].coeffs[4 * j + k];
+ t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
+ /* t[k] = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff; */
+ d0 = t[k];
+ d0 <<= 10;
+ d0 += 1665;
+ d0 *= 1290167;
+ d0 >>= 32;
+ t[k] = d0 & 0x3ff;
+ }
+
+ r[0] = (t[0] >> 0);
+ r[1] = (t[0] >> 8) | (t[1] << 2);
+ r[2] = (t[1] >> 6) | (t[2] << 4);
+ r[3] = (t[2] >> 4) | (t[3] << 6);
+ r[4] = (t[3] >> 2);
+ r += 5;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_decompress
+*
+* Description: De-serialize and decompress vector of polynomials;
+* approximate inverse of polyvec_compress
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const uint8_t *a: pointer to input byte array
+* (of length KYBER_POLYVECCOMPRESSEDBYTES)
+**************************************************/
+static void
+polyvec_decompress(polyvec *r, const uint8_t a[KYBER_POLYVECCOMPRESSEDBYTES])
+{
+ unsigned int i, j, k;
+
+#if (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 352))
+ uint16_t t[8];
+ for (i = 0; i < KYBER_K; i++) {
+ for (j = 0; j < KYBER_N / 8; j++) {
+ t[0] = (a[0] >> 0) | ((uint16_t)a[1] << 8);
+ t[1] = (a[1] >> 3) | ((uint16_t)a[2] << 5);
+ t[2] = (a[2] >> 6) | ((uint16_t)a[3] << 2) | ((uint16_t)a[4] << 10);
+ t[3] = (a[4] >> 1) | ((uint16_t)a[5] << 7);
+ t[4] = (a[5] >> 4) | ((uint16_t)a[6] << 4);
+ t[5] = (a[6] >> 7) | ((uint16_t)a[7] << 1) | ((uint16_t)a[8] << 9);
+ t[6] = (a[8] >> 2) | ((uint16_t)a[9] << 6);
+ t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3);
+ a += 11;
+
+ for (k = 0; k < 8; k++)
+ r->vec[i].coeffs[8 * j + k] = ((uint32_t)(t[k] & 0x7FF) * KYBER_Q + 1024) >> 11;
+ }
+ }
+#elif (KYBER_POLYVECCOMPRESSEDBYTES == (KYBER_K * 320))
+ uint16_t t[4];
+ for (i = 0; i < KYBER_K; i++) {
+ for (j = 0; j < KYBER_N / 4; j++) {
+ t[0] = (a[0] >> 0) | ((uint16_t)a[1] << 8);
+ t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6);
+ t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4);
+ t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2);
+ a += 5;
+
+ for (k = 0; k < 4; k++)
+ r->vec[i].coeffs[4 * j + k] = ((uint32_t)(t[k] & 0x3FF) * KYBER_Q + 512) >> 10;
+ }
+ }
+#else
+#error "KYBER_POLYVECCOMPRESSEDBYTES needs to be in {320*KYBER_K, 352*KYBER_K}"
+#endif
+}
+
+/*************************************************
+* Name: polyvec_tobytes
+*
+* Description: Serialize vector of polynomials
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* (needs space for KYBER_POLYVECBYTES)
+* - const polyvec *a: pointer to input vector of polynomials
+**************************************************/
+static void
+polyvec_tobytes(uint8_t r[KYBER_POLYVECBYTES], const polyvec *a)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]);
+}
+
+/*************************************************
+* Name: polyvec_frombytes
+*
+* Description: De-serialize vector of polynomials;
+* inverse of polyvec_tobytes
+*
+* Arguments: - uint8_t *r: pointer to output byte array
+* - const polyvec *a: pointer to input vector of polynomials
+* (of length KYBER_POLYVECBYTES)
+**************************************************/
+static void
+polyvec_frombytes(polyvec *r, const uint8_t a[KYBER_POLYVECBYTES])
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES);
+}
+
+/*************************************************
+* Name: polyvec_ntt
+*
+* Description: Apply forward NTT to all elements of a vector of polynomials
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+static void
+polyvec_ntt(polyvec *r)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_ntt(&r->vec[i]);
+}
+
+/*************************************************
+* Name: polyvec_invntt_tomont
+*
+* Description: Apply inverse NTT to all elements of a vector of polynomials
+* and multiply by Montgomery factor 2^16
+*
+* Arguments: - polyvec *r: pointer to in/output vector of polynomials
+**************************************************/
+static void
+polyvec_invntt_tomont(polyvec *r)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_invntt_tomont(&r->vec[i]);
+}
+
+/*************************************************
+* Name: polyvec_basemul_acc_montgomery
+*
+* Description: Multiply elements of a and b in NTT domain, accumulate into r,
+* and multiply by 2^-16.
+*
+* Arguments: - poly *r: pointer to output polynomial
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+static void
+polyvec_basemul_acc_montgomery(poly *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ poly t;
+
+ poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]);
+ for (i = 1; i < KYBER_K; i++) {
+ poly_basemul_montgomery(&t, &a->vec[i], &b->vec[i]);
+ poly_add(r, r, &t);
+ }
+
+ poly_reduce(r);
+}
+
+/*************************************************
+* Name: polyvec_reduce
+*
+* Description: Applies Barrett reduction to each coefficient
+* of each element of a vector of polynomials;
+* for details of the Barrett reduction see comments in reduce.c
+*
+* Arguments: - polyvec *r: pointer to input/output polynomial
+**************************************************/
+static void
+polyvec_reduce(polyvec *r)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_reduce(&r->vec[i]);
+}
+
+/*************************************************
+* Name: polyvec_add
+*
+* Description: Add vectors of polynomials
+*
+* Arguments: - polyvec *r: pointer to output vector of polynomials
+* - const polyvec *a: pointer to first input vector of polynomials
+* - const polyvec *b: pointer to second input vector of polynomials
+**************************************************/
+static void
+polyvec_add(polyvec *r, const polyvec *a, const polyvec *b)
+{
+ unsigned int i;
+ for (i = 0; i < KYBER_K; i++)
+ poly_add(&r->vec[i], &a->vec[i], &b->vec[i]);
+}
+/** end: ref/polyvec.c **/
+
+/** begin: ref/indcpa.c **/
+/*************************************************
+* Name: pack_pk
+*
+* Description: Serialize the public key as concatenation of the
+* serialized vector of polynomials pk
+* and the public seed used to generate the matrix A.
+*
+* Arguments: uint8_t *r: pointer to the output serialized public key
+* polyvec *pk: pointer to the input public-key polyvec
+* const uint8_t *seed: pointer to the input public seed
+**************************************************/
+static void
+pack_pk(uint8_t r[KYBER_INDCPA_PUBLICKEYBYTES],
+ polyvec *pk,
+ const uint8_t seed[KYBER_SYMBYTES])
+{
+ size_t i;
+ polyvec_tobytes(r, pk);
+ for (i = 0; i < KYBER_SYMBYTES; i++)
+ r[i + KYBER_POLYVECBYTES] = seed[i];
+}
+
+/*************************************************
+* Name: unpack_pk
+*
+* Description: De-serialize public key from a byte array;
+* approximate inverse of pack_pk
+*
+* Arguments: - polyvec *pk: pointer to output public-key polynomial vector
+* - uint8_t *seed: pointer to output seed to generate matrix A
+* - const uint8_t *packedpk: pointer to input serialized public key
+**************************************************/
+static void
+unpack_pk(polyvec *pk,
+ uint8_t seed[KYBER_SYMBYTES],
+ const uint8_t packedpk[KYBER_INDCPA_PUBLICKEYBYTES])
+{
+ size_t i;
+ polyvec_frombytes(pk, packedpk);
+ for (i = 0; i < KYBER_SYMBYTES; i++)
+ seed[i] = packedpk[i + KYBER_POLYVECBYTES];
+}
+
+/*************************************************
+* Name: pack_sk
+*
+* Description: Serialize the secret key
+*
+* Arguments: - uint8_t *r: pointer to output serialized secret key
+* - polyvec *sk: pointer to input vector of polynomials (secret key)
+**************************************************/
+static void
+pack_sk(uint8_t r[KYBER_INDCPA_SECRETKEYBYTES], polyvec *sk)
+{
+ polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+* Name: unpack_sk
+*
+* Description: De-serialize the secret key; inverse of pack_sk
+*
+* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key)
+* - const uint8_t *packedsk: pointer to input serialized secret key
+**************************************************/
+static void
+unpack_sk(polyvec *sk, const uint8_t packedsk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+* Name: pack_ciphertext
+*
+* Description: Serialize the ciphertext as concatenation of the
+* compressed and serialized vector of polynomials b
+* and the compressed and serialized polynomial v
+*
+* Arguments: uint8_t *r: pointer to the output serialized ciphertext
+* poly *pk: pointer to the input vector of polynomials b
+* poly *v: pointer to the input polynomial v
+**************************************************/
+static void
+pack_ciphertext(uint8_t r[KYBER_INDCPA_BYTES], polyvec *b, poly *v)
+{
+ polyvec_compress(r, b);
+ poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v);
+}
+
+/*************************************************
+* Name: unpack_ciphertext
+*
+* Description: De-serialize and decompress ciphertext from a byte array;
+* approximate inverse of pack_ciphertext
+*
+* Arguments: - polyvec *b: pointer to the output vector of polynomials b
+* - poly *v: pointer to the output polynomial v
+* - const uint8_t *c: pointer to the input serialized ciphertext
+**************************************************/
+static void
+unpack_ciphertext(polyvec *b, poly *v, const uint8_t c[KYBER_INDCPA_BYTES])
+{
+ polyvec_decompress(b, c);
+ poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES);
+}
+
+/*************************************************
+* Name: rej_uniform
+*
+* Description: Run rejection sampling on uniform random bytes to generate
+* uniform random integers mod q
+*
+* Arguments: - int16_t *r: pointer to output buffer
+* - unsigned int len: requested number of 16-bit integers (uniform mod q)
+* - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
+* - unsigned int buflen: length of input buffer in bytes
+*
+* Returns number of sampled 16-bit integers (at most len)
+**************************************************/
+static unsigned int
+rej_uniform(int16_t *r,
+ unsigned int len,
+ const uint8_t *buf,
+ unsigned int buflen)
+{
+ unsigned int ctr, pos;
+ uint16_t val0, val1;
+
+ ctr = pos = 0;
+ while (ctr < len && pos + 3 <= buflen) {
+ val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
+ val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
+ pos += 3;
+
+ if (val0 < KYBER_Q)
+ r[ctr++] = val0;
+ if (ctr < len && val1 < KYBER_Q)
+ r[ctr++] = val1;
+ }
+
+ return ctr;
+}
+
+#define gen_a(A, B) gen_matrix(A, B, 0)
+#define gen_at(A, B) gen_matrix(A, B, 1)
+
+/*************************************************
+* Name: gen_matrix
+*
+* Description: Deterministically generate matrix A (or the transpose of A)
+* from a seed. Entries of the matrix are polynomials that look
+* uniformly random. Performs rejection sampling on output of
+* a XOF
+*
+* Arguments: - polyvec *a: pointer to ouptput matrix A
+* - const uint8_t *seed: pointer to input seed
+* - int transposed: boolean deciding whether A or A^T is generated
+**************************************************/
+#define GEN_MATRIX_NBLOCKS ((12 * KYBER_N / 8 * (1 << 12) / KYBER_Q + XOF_BLOCKBYTES) / XOF_BLOCKBYTES)
+// Not static for benchmarking
+static void
+gen_matrix(polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed)
+{
+ unsigned int ctr, i, j, k;
+ unsigned int buflen, off;
+ uint8_t buf[GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES + 2];
+ xof_state state;
+
+ for (i = 0; i < KYBER_K; i++) {
+ for (j = 0; j < KYBER_K; j++) {
+ if (transposed)
+ xof_absorb(&state, seed, i, j);
+ else
+ xof_absorb(&state, seed, j, i);
+
+ xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state);
+ buflen = GEN_MATRIX_NBLOCKS * XOF_BLOCKBYTES;
+ ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, buflen);
+
+ while (ctr < KYBER_N) {
+ off = buflen % 3;
+ for (k = 0; k < off; k++)
+ buf[k] = buf[buflen - off + k];
+ xof_squeezeblocks(buf + off, 1, &state);
+ buflen = off + XOF_BLOCKBYTES;
+ ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen);
+ }
+ }
+ }
+}
+
+/*************************************************
+* Name: indcpa_keypair_derand
+*
+* Description: Generates public and private key for the CPA-secure
+* public-key encryption scheme underlying Kyber
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (of length KYBER_INDCPA_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (of length KYBER_INDCPA_SECRETKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (of length KYBER_SYMBYTES bytes)
+**************************************************/
+static void
+indcpa_keypair_derand(uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES])
+{
+ unsigned int i;
+ uint8_t buf[2 * KYBER_SYMBYTES];
+ const uint8_t *publicseed = buf;
+ const uint8_t *noiseseed = buf + KYBER_SYMBYTES;
+ uint8_t nonce = 0;
+ polyvec a[KYBER_K], e, pkpv, skpv;
+
+ hash_g(buf, coins, KYBER_SYMBYTES);
+
+ gen_a(a, publicseed);
+
+ for (i = 0; i < KYBER_K; i++)
+ poly_getnoise_eta1(&skpv.vec[i], noiseseed, nonce++);
+ for (i = 0; i < KYBER_K; i++)
+ poly_getnoise_eta1(&e.vec[i], noiseseed, nonce++);
+
+ polyvec_ntt(&skpv);
+ polyvec_ntt(&e);
+
+ // matrix-vector multiplication
+ for (i = 0; i < KYBER_K; i++) {
+ polyvec_basemul_acc_montgomery(&pkpv.vec[i], &a[i], &skpv);
+ poly_tomont(&pkpv.vec[i]);
+ }
+
+ polyvec_add(&pkpv, &pkpv, &e);
+ polyvec_reduce(&pkpv);
+
+ pack_sk(sk, &skpv);
+ pack_pk(pk, &pkpv, publicseed);
+}
+
+/*************************************************
+* Name: indcpa_enc
+*
+* Description: Encryption function of the CPA-secure
+* public-key encryption scheme underlying Kyber.
+*
+* Arguments: - uint8_t *c: pointer to output ciphertext
+* (of length KYBER_INDCPA_BYTES bytes)
+* - const uint8_t *m: pointer to input message
+* (of length KYBER_INDCPA_MSGBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (of length KYBER_INDCPA_PUBLICKEYBYTES)
+* - const uint8_t *coins: pointer to input random coins used as seed
+* (of length KYBER_SYMBYTES) to deterministically
+* generate all randomness
+**************************************************/
+static void
+indcpa_enc(uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t pk[KYBER_INDCPA_PUBLICKEYBYTES],
+ const uint8_t coins[KYBER_SYMBYTES])
+{
+ unsigned int i;
+ uint8_t seed[KYBER_SYMBYTES];
+ uint8_t nonce = 0;
+ polyvec sp, pkpv, ep, at[KYBER_K], b;
+ poly v, k, epp;
+
+ unpack_pk(&pkpv, seed, pk);
+ poly_frommsg(&k, m);
+ gen_at(at, seed);
+
+ for (i = 0; i < KYBER_K; i++)
+ poly_getnoise_eta1(sp.vec + i, coins, nonce++);
+ for (i = 0; i < KYBER_K; i++)
+ poly_getnoise_eta2(ep.vec + i, coins, nonce++);
+ poly_getnoise_eta2(&epp, coins, nonce++);
+
+ polyvec_ntt(&sp);
+
+ // matrix-vector multiplication
+ for (i = 0; i < KYBER_K; i++)
+ polyvec_basemul_acc_montgomery(&b.vec[i], &at[i], &sp);
+
+ polyvec_basemul_acc_montgomery(&v, &pkpv, &sp);
+
+ polyvec_invntt_tomont(&b);
+ poly_invntt_tomont(&v);
+
+ polyvec_add(&b, &b, &ep);
+ poly_add(&v, &v, &epp);
+ poly_add(&v, &v, &k);
+ polyvec_reduce(&b);
+ poly_reduce(&v);
+
+ pack_ciphertext(c, &b, &v);
+}
+
+/*************************************************
+* Name: indcpa_dec
+*
+* Description: Decryption function of the CPA-secure
+* public-key encryption scheme underlying Kyber.
+*
+* Arguments: - uint8_t *m: pointer to output decrypted message
+* (of length KYBER_INDCPA_MSGBYTES)
+* - const uint8_t *c: pointer to input ciphertext
+* (of length KYBER_INDCPA_BYTES)
+* - const uint8_t *sk: pointer to input secret key
+* (of length KYBER_INDCPA_SECRETKEYBYTES)
+**************************************************/
+static void
+indcpa_dec(uint8_t m[KYBER_INDCPA_MSGBYTES],
+ const uint8_t c[KYBER_INDCPA_BYTES],
+ const uint8_t sk[KYBER_INDCPA_SECRETKEYBYTES])
+{
+ polyvec b, skpv;
+ poly v, mp;
+
+ unpack_ciphertext(&b, &v, c);
+ unpack_sk(&skpv, sk);
+
+ polyvec_ntt(&b);
+ polyvec_basemul_acc_montgomery(&mp, &skpv, &b);
+ poly_invntt_tomont(&mp);
+
+ poly_sub(&mp, &v, &mp);
+ poly_reduce(&mp);
+
+ poly_tomsg(m, &mp);
+}
+/** end: ref/indcpa.c **/
+
+/** begin: ref/fips202.c **/
+/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from
+ * http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202"
+ * implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein,
+ * and Peter Schwabe */
+
+#define NROUNDS 24
+#define ROL(a, offset) ((a << offset) ^ (a >> (64 - offset)))
+
+/*************************************************
+* Name: load64
+*
+* Description: Load 8 bytes into uint64_t in little-endian order
+*
+* Arguments: - const uint8_t *x: pointer to input byte array
+*
+* Returns the loaded 64-bit unsigned integer
+**************************************************/
+static uint64_t
+load64(const uint8_t x[8])
+{
+ unsigned int i;
+ uint64_t r = 0;
+
+ for (i = 0; i < 8; i++)
+ r |= (uint64_t)x[i] << 8 * i;
+
+ return r;
+}
+
+/*************************************************
+* Name: store64
+*
+* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
+*
+* Arguments: - uint8_t *x: pointer to the output byte array (allocated)
+* - uint64_t u: input 64-bit unsigned integer
+**************************************************/
+static void
+store64(uint8_t x[8], uint64_t u)
+{
+ unsigned int i;
+
+ for (i = 0; i < 8; i++)
+ x[i] = u >> 8 * i;
+}
+
+/* Keccak round constants */
+static const uint64_t KeccakF_RoundConstants[NROUNDS] = {
+ (uint64_t)0x0000000000000001ULL,
+ (uint64_t)0x0000000000008082ULL,
+ (uint64_t)0x800000000000808aULL,
+ (uint64_t)0x8000000080008000ULL,
+ (uint64_t)0x000000000000808bULL,
+ (uint64_t)0x0000000080000001ULL,
+ (uint64_t)0x8000000080008081ULL,
+ (uint64_t)0x8000000000008009ULL,
+ (uint64_t)0x000000000000008aULL,
+ (uint64_t)0x0000000000000088ULL,
+ (uint64_t)0x0000000080008009ULL,
+ (uint64_t)0x000000008000000aULL,
+ (uint64_t)0x000000008000808bULL,
+ (uint64_t)0x800000000000008bULL,
+ (uint64_t)0x8000000000008089ULL,
+ (uint64_t)0x8000000000008003ULL,
+ (uint64_t)0x8000000000008002ULL,
+ (uint64_t)0x8000000000000080ULL,
+ (uint64_t)0x000000000000800aULL,
+ (uint64_t)0x800000008000000aULL,
+ (uint64_t)0x8000000080008081ULL,
+ (uint64_t)0x8000000000008080ULL,
+ (uint64_t)0x0000000080000001ULL,
+ (uint64_t)0x8000000080008008ULL
+};
+
+/*************************************************
+* Name: KeccakF1600_StatePermute
+*
+* Description: The Keccak F1600 Permutation
+*
+* Arguments: - uint64_t *state: pointer to input/output Keccak state
+**************************************************/
+static void
+KeccakF1600_StatePermute(uint64_t state[25])
+{
+ int round;
+
+ uint64_t Aba, Abe, Abi, Abo, Abu;
+ uint64_t Aga, Age, Agi, Ago, Agu;
+ uint64_t Aka, Ake, Aki, Ako, Aku;
+ uint64_t Ama, Ame, Ami, Amo, Amu;
+ uint64_t Asa, Ase, Asi, Aso, Asu;
+ uint64_t BCa, BCe, BCi, BCo, BCu;
+ uint64_t Da, De, Di, Do, Du;
+ uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
+ uint64_t Ega, Ege, Egi, Ego, Egu;
+ uint64_t Eka, Eke, Eki, Eko, Eku;
+ uint64_t Ema, Eme, Emi, Emo, Emu;
+ uint64_t Esa, Ese, Esi, Eso, Esu;
+
+ //copyFromState(A, state)
+ Aba = state[0];
+ Abe = state[1];
+ Abi = state[2];
+ Abo = state[3];
+ Abu = state[4];
+ Aga = state[5];
+ Age = state[6];
+ Agi = state[7];
+ Ago = state[8];
+ Agu = state[9];
+ Aka = state[10];
+ Ake = state[11];
+ Aki = state[12];
+ Ako = state[13];
+ Aku = state[14];
+ Ama = state[15];
+ Ame = state[16];
+ Ami = state[17];
+ Amo = state[18];
+ Amu = state[19];
+ Asa = state[20];
+ Ase = state[21];
+ Asi = state[22];
+ Aso = state[23];
+ Asu = state[24];
+
+ for (round = 0; round < NROUNDS; round += 2) {
+ // prepareTheta
+ BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa;
+ BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase;
+ BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;
+ BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;
+ BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;
+
+ //thetaRhoPiChiIotaPrepareTheta(round, A, E)
+ Da = BCu ^ ROL(BCe, 1);
+ De = BCa ^ ROL(BCi, 1);
+ Di = BCe ^ ROL(BCo, 1);
+ Do = BCi ^ ROL(BCu, 1);
+ Du = BCo ^ ROL(BCa, 1);
+
+ Aba ^= Da;
+ BCa = Aba;
+ Age ^= De;
+ BCe = ROL(Age, 44);
+ Aki ^= Di;
+ BCi = ROL(Aki, 43);
+ Amo ^= Do;
+ BCo = ROL(Amo, 21);
+ Asu ^= Du;
+ BCu = ROL(Asu, 14);
+ Eba = BCa ^ ((~BCe) & BCi);
+ Eba ^= (uint64_t)KeccakF_RoundConstants[round];
+ Ebe = BCe ^ ((~BCi) & BCo);
+ Ebi = BCi ^ ((~BCo) & BCu);
+ Ebo = BCo ^ ((~BCu) & BCa);
+ Ebu = BCu ^ ((~BCa) & BCe);
+
+ Abo ^= Do;
+ BCa = ROL(Abo, 28);
+ Agu ^= Du;
+ BCe = ROL(Agu, 20);
+ Aka ^= Da;
+ BCi = ROL(Aka, 3);
+ Ame ^= De;
+ BCo = ROL(Ame, 45);
+ Asi ^= Di;
+ BCu = ROL(Asi, 61);
+ Ega = BCa ^ ((~BCe) & BCi);
+ Ege = BCe ^ ((~BCi) & BCo);
+ Egi = BCi ^ ((~BCo) & BCu);
+ Ego = BCo ^ ((~BCu) & BCa);
+ Egu = BCu ^ ((~BCa) & BCe);
+
+ Abe ^= De;
+ BCa = ROL(Abe, 1);
+ Agi ^= Di;
+ BCe = ROL(Agi, 6);
+ Ako ^= Do;
+ BCi = ROL(Ako, 25);
+ Amu ^= Du;
+ BCo = ROL(Amu, 8);
+ Asa ^= Da;
+ BCu = ROL(Asa, 18);
+ Eka = BCa ^ ((~BCe) & BCi);
+ Eke = BCe ^ ((~BCi) & BCo);
+ Eki = BCi ^ ((~BCo) & BCu);
+ Eko = BCo ^ ((~BCu) & BCa);
+ Eku = BCu ^ ((~BCa) & BCe);
+
+ Abu ^= Du;
+ BCa = ROL(Abu, 27);
+ Aga ^= Da;
+ BCe = ROL(Aga, 36);
+ Ake ^= De;
+ BCi = ROL(Ake, 10);
+ Ami ^= Di;
+ BCo = ROL(Ami, 15);
+ Aso ^= Do;
+ BCu = ROL(Aso, 56);
+ Ema = BCa ^ ((~BCe) & BCi);
+ Eme = BCe ^ ((~BCi) & BCo);
+ Emi = BCi ^ ((~BCo) & BCu);
+ Emo = BCo ^ ((~BCu) & BCa);
+ Emu = BCu ^ ((~BCa) & BCe);
+
+ Abi ^= Di;
+ BCa = ROL(Abi, 62);
+ Ago ^= Do;
+ BCe = ROL(Ago, 55);
+ Aku ^= Du;
+ BCi = ROL(Aku, 39);
+ Ama ^= Da;
+ BCo = ROL(Ama, 41);
+ Ase ^= De;
+ BCu = ROL(Ase, 2);
+ Esa = BCa ^ ((~BCe) & BCi);
+ Ese = BCe ^ ((~BCi) & BCo);
+ Esi = BCi ^ ((~BCo) & BCu);
+ Eso = BCo ^ ((~BCu) & BCa);
+ Esu = BCu ^ ((~BCa) & BCe);
+
+ // prepareTheta
+ BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa;
+ BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;
+ BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;
+ BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;
+ BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;
+
+ //thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+ Da = BCu ^ ROL(BCe, 1);
+ De = BCa ^ ROL(BCi, 1);
+ Di = BCe ^ ROL(BCo, 1);
+ Do = BCi ^ ROL(BCu, 1);
+ Du = BCo ^ ROL(BCa, 1);
+
+ Eba ^= Da;
+ BCa = Eba;
+ Ege ^= De;
+ BCe = ROL(Ege, 44);
+ Eki ^= Di;
+ BCi = ROL(Eki, 43);
+ Emo ^= Do;
+ BCo = ROL(Emo, 21);
+ Esu ^= Du;
+ BCu = ROL(Esu, 14);
+ Aba = BCa ^ ((~BCe) & BCi);
+ Aba ^= (uint64_t)KeccakF_RoundConstants[round + 1];
+ Abe = BCe ^ ((~BCi) & BCo);
+ Abi = BCi ^ ((~BCo) & BCu);
+ Abo = BCo ^ ((~BCu) & BCa);
+ Abu = BCu ^ ((~BCa) & BCe);
+
+ Ebo ^= Do;
+ BCa = ROL(Ebo, 28);
+ Egu ^= Du;
+ BCe = ROL(Egu, 20);
+ Eka ^= Da;
+ BCi = ROL(Eka, 3);
+ Eme ^= De;
+ BCo = ROL(Eme, 45);
+ Esi ^= Di;
+ BCu = ROL(Esi, 61);
+ Aga = BCa ^ ((~BCe) & BCi);
+ Age = BCe ^ ((~BCi) & BCo);
+ Agi = BCi ^ ((~BCo) & BCu);
+ Ago = BCo ^ ((~BCu) & BCa);
+ Agu = BCu ^ ((~BCa) & BCe);
+
+ Ebe ^= De;
+ BCa = ROL(Ebe, 1);
+ Egi ^= Di;
+ BCe = ROL(Egi, 6);
+ Eko ^= Do;
+ BCi = ROL(Eko, 25);
+ Emu ^= Du;
+ BCo = ROL(Emu, 8);
+ Esa ^= Da;
+ BCu = ROL(Esa, 18);
+ Aka = BCa ^ ((~BCe) & BCi);
+ Ake = BCe ^ ((~BCi) & BCo);
+ Aki = BCi ^ ((~BCo) & BCu);
+ Ako = BCo ^ ((~BCu) & BCa);
+ Aku = BCu ^ ((~BCa) & BCe);
+
+ Ebu ^= Du;
+ BCa = ROL(Ebu, 27);
+ Ega ^= Da;
+ BCe = ROL(Ega, 36);
+ Eke ^= De;
+ BCi = ROL(Eke, 10);
+ Emi ^= Di;
+ BCo = ROL(Emi, 15);
+ Eso ^= Do;
+ BCu = ROL(Eso, 56);
+ Ama = BCa ^ ((~BCe) & BCi);
+ Ame = BCe ^ ((~BCi) & BCo);
+ Ami = BCi ^ ((~BCo) & BCu);
+ Amo = BCo ^ ((~BCu) & BCa);
+ Amu = BCu ^ ((~BCa) & BCe);
+
+ Ebi ^= Di;
+ BCa = ROL(Ebi, 62);
+ Ego ^= Do;
+ BCe = ROL(Ego, 55);
+ Eku ^= Du;
+ BCi = ROL(Eku, 39);
+ Ema ^= Da;
+ BCo = ROL(Ema, 41);
+ Ese ^= De;
+ BCu = ROL(Ese, 2);
+ Asa = BCa ^ ((~BCe) & BCi);
+ Ase = BCe ^ ((~BCi) & BCo);
+ Asi = BCi ^ ((~BCo) & BCu);
+ Aso = BCo ^ ((~BCu) & BCa);
+ Asu = BCu ^ ((~BCa) & BCe);
+ }
+
+ //copyToState(state, A)
+ state[0] = Aba;
+ state[1] = Abe;
+ state[2] = Abi;
+ state[3] = Abo;
+ state[4] = Abu;
+ state[5] = Aga;
+ state[6] = Age;
+ state[7] = Agi;
+ state[8] = Ago;
+ state[9] = Agu;
+ state[10] = Aka;
+ state[11] = Ake;
+ state[12] = Aki;
+ state[13] = Ako;
+ state[14] = Aku;
+ state[15] = Ama;
+ state[16] = Ame;
+ state[17] = Ami;
+ state[18] = Amo;
+ state[19] = Amu;
+ state[20] = Asa;
+ state[21] = Ase;
+ state[22] = Asi;
+ state[23] = Aso;
+ state[24] = Asu;
+}
+
+/*************************************************
+* Name: keccak_init
+*
+* Description: Initializes the Keccak state.
+*
+* Arguments: - uint64_t *s: pointer to Keccak state
+**************************************************/
+static void
+keccak_init(uint64_t s[25])
+{
+ unsigned int i;
+ for (i = 0; i < 25; i++)
+ s[i] = 0;
+}
+
+/*************************************************
+* Name: keccak_absorb
+*
+* Description: Absorb step of Keccak; incremental.
+*
+* Arguments: - uint64_t *s: pointer to Keccak state
+* - unsigned int pos: position in current block to be absorbed
+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+*
+* Returns new position pos in current block
+**************************************************/
+static unsigned int
+keccak_absorb(uint64_t s[25],
+ unsigned int pos,
+ unsigned int r,
+ const uint8_t *in,
+ size_t inlen)
+{
+ unsigned int i;
+
+ while (pos + inlen >= r) {
+ for (i = pos; i < r; i++)
+ s[i / 8] ^= (uint64_t)*in++ << 8 * (i % 8);
+ inlen -= r - pos;
+ KeccakF1600_StatePermute(s);
+ pos = 0;
+ }
+
+ for (i = pos; i < pos + inlen; i++)
+ s[i / 8] ^= (uint64_t)*in++ << 8 * (i % 8);
+
+ return i;
+}
+
+/*************************************************
+* Name: keccak_finalize
+*
+* Description: Finalize absorb step.
+*
+* Arguments: - uint64_t *s: pointer to Keccak state
+* - unsigned int pos: position in current block to be absorbed
+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+* - uint8_t p: domain separation byte
+**************************************************/
+static void
+keccak_finalize(uint64_t s[25], unsigned int pos, unsigned int r, uint8_t p)
+{
+ s[pos / 8] ^= (uint64_t)p << 8 * (pos % 8);
+ s[r / 8 - 1] ^= 1ULL << 63;
+}
+
+/*************************************************
+* Name: keccak_squeeze
+*
+* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes.
+* Modifies the state. Can be called multiple times to keep
+* squeezing, i.e., is incremental.
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of bytes to be squeezed (written to out)
+* - uint64_t *s: pointer to input/output Keccak state
+* - unsigned int pos: number of bytes in current block already squeezed
+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+*
+* Returns new position pos in current block
+**************************************************/
+static unsigned int
+keccak_squeeze(uint8_t *out,
+ size_t outlen,
+ uint64_t s[25],
+ unsigned int pos,
+ unsigned int r)
+{
+ unsigned int i;
+
+ while (outlen) {
+ if (pos == r) {
+ KeccakF1600_StatePermute(s);
+ pos = 0;
+ }
+ for (i = pos; i < r && i < pos + outlen; i++)
+ *out++ = s[i / 8] >> 8 * (i % 8);
+ outlen -= i - pos;
+ pos = i;
+ }
+
+ return pos;
+}
+
+/*************************************************
+* Name: keccak_absorb_once
+*
+* Description: Absorb step of Keccak;
+* non-incremental, starts by zeroeing the state.
+*
+* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state
+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+* - uint8_t p: domain-separation byte for different Keccak-derived functions
+**************************************************/
+static void
+keccak_absorb_once(uint64_t s[25],
+ unsigned int r,
+ const uint8_t *in,
+ size_t inlen,
+ uint8_t p)
+{
+ unsigned int i;
+
+ for (i = 0; i < 25; i++)
+ s[i] = 0;
+
+ while (inlen >= r) {
+ for (i = 0; i < r / 8; i++)
+ s[i] ^= load64(in + 8 * i);
+ in += r;
+ inlen -= r;
+ KeccakF1600_StatePermute(s);
+ }
+
+ for (i = 0; i < inlen; i++)
+ s[i / 8] ^= (uint64_t)in[i] << 8 * (i % 8);
+
+ s[i / 8] ^= (uint64_t)p << 8 * (i % 8);
+ s[(r - 1) / 8] ^= 1ULL << 63;
+}
+
+/*************************************************
+* Name: keccak_squeezeblocks
+*
+* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
+* Modifies the state. Can be called multiple times to keep
+* squeezing, i.e., is incremental. Assumes zero bytes of current
+* block have already been squeezed.
+*
+* Arguments: - uint8_t *out: pointer to output blocks
+* - size_t nblocks: number of blocks to be squeezed (written to out)
+* - uint64_t *s: pointer to input/output Keccak state
+* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
+**************************************************/
+static void
+keccak_squeezeblocks(uint8_t *out,
+ size_t nblocks,
+ uint64_t s[25],
+ unsigned int r)
+{
+ unsigned int i;
+
+ while (nblocks) {
+ KeccakF1600_StatePermute(s);
+ for (i = 0; i < r / 8; i++)
+ store64(out + 8 * i, s[i]);
+ out += r;
+ nblocks -= 1;
+ }
+}
+
+/*************************************************
+* Name: shake128_init
+*
+* Description: Initilizes Keccak state for use as SHAKE128 XOF
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
+**************************************************/
+void
+shake128_init(keccak_state *state)
+{
+ keccak_init(state->s);
+ state->pos = 0;
+}
+
+/*************************************************
+* Name: shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 XOF; incremental.
+*
+* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
+{
+ state->pos = keccak_absorb(state->s, state->pos, SHAKE128_RATE, in, inlen);
+}
+
+/*************************************************
+* Name: shake128_finalize
+*
+* Description: Finalize absorb step of the SHAKE128 XOF.
+*
+* Arguments: - keccak_state *state: pointer to Keccak state
+**************************************************/
+void
+shake128_finalize(keccak_state *state)
+{
+ keccak_finalize(state->s, state->pos, SHAKE128_RATE, 0x1F);
+ state->pos = SHAKE128_RATE;
+}
+
+/*************************************************
+* Name: shake128_squeeze
+*
+* Description: Squeeze step of SHAKE128 XOF. Squeezes arbitraily many
+* bytes. Can be called multiple times to keep squeezing.
+*
+* Arguments: - uint8_t *out: pointer to output blocks
+* - size_t outlen : number of bytes to be squeezed (written to output)
+* - keccak_state *s: pointer to input/output Keccak state
+**************************************************/
+void
+shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
+{
+ state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE128_RATE);
+}
+
+/*************************************************
+* Name: shake128_absorb_once
+*
+* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
+{
+ keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F);
+ state->pos = SHAKE128_RATE;
+}
+
+/*************************************************
+* Name: shake128_squeezeblocks
+*
+* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
+* SHAKE128_RATE bytes each. Can be called multiple times
+* to keep squeezing. Assumes new block has not yet been
+* started (state->pos = SHAKE128_RATE).
+*
+* Arguments: - uint8_t *out: pointer to output blocks
+* - size_t nblocks: number of blocks to be squeezed (written to output)
+* - keccak_state *s: pointer to input/output Keccak state
+**************************************************/
+void
+shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
+{
+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE);
+}
+
+/*************************************************
+* Name: shake256_init
+*
+* Description: Initilizes Keccak state for use as SHAKE256 XOF
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
+**************************************************/
+void
+shake256_init(keccak_state *state)
+{
+ keccak_init(state->s);
+ state->pos = 0;
+}
+
+/*************************************************
+* Name: shake256_absorb
+*
+* Description: Absorb step of the SHAKE256 XOF; incremental.
+*
+* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
+{
+ state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen);
+}
+
+/*************************************************
+* Name: shake256_finalize
+*
+* Description: Finalize absorb step of the SHAKE256 XOF.
+*
+* Arguments: - keccak_state *state: pointer to Keccak state
+**************************************************/
+void
+shake256_finalize(keccak_state *state)
+{
+ keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F);
+ state->pos = SHAKE256_RATE;
+}
+
+/*************************************************
+* Name: shake256_squeeze
+*
+* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many
+* bytes. Can be called multiple times to keep squeezing.
+*
+* Arguments: - uint8_t *out: pointer to output blocks
+* - size_t outlen : number of bytes to be squeezed (written to output)
+* - keccak_state *s: pointer to input/output Keccak state
+**************************************************/
+void
+shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
+{
+ state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE);
+}
+
+/*************************************************
+* Name: shake256_absorb_once
+*
+* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *in: pointer to input to be absorbed into s
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
+{
+ keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F);
+ state->pos = SHAKE256_RATE;
+}
+
+/*************************************************
+* Name: shake256_squeezeblocks
+*
+* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
+* SHAKE256_RATE bytes each. Can be called multiple times
+* to keep squeezing. Assumes next block has not yet been
+* started (state->pos = SHAKE256_RATE).
+*
+* Arguments: - uint8_t *out: pointer to output blocks
+* - size_t nblocks: number of blocks to be squeezed (written to output)
+* - keccak_state *s: pointer to input/output Keccak state
+**************************************************/
+void
+shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
+{
+ keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE);
+}
+
+/*************************************************
+* Name: shake128
+*
+* Description: SHAKE128 XOF with non-incremental API
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: requested output length in bytes
+* - const uint8_t *in: pointer to input
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
+{
+ size_t nblocks;
+ keccak_state state;
+
+ shake128_absorb_once(&state, in, inlen);
+ nblocks = outlen / SHAKE128_RATE;
+ shake128_squeezeblocks(out, nblocks, &state);
+ outlen -= nblocks * SHAKE128_RATE;
+ out += nblocks * SHAKE128_RATE;
+ shake128_squeeze(out, outlen, &state);
+}
+
+/*************************************************
+* Name: shake256
+*
+* Description: SHAKE256 XOF with non-incremental API
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: requested output length in bytes
+* - const uint8_t *in: pointer to input
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
+{
+ size_t nblocks;
+ keccak_state state;
+
+ shake256_absorb_once(&state, in, inlen);
+ nblocks = outlen / SHAKE256_RATE;
+ shake256_squeezeblocks(out, nblocks, &state);
+ outlen -= nblocks * SHAKE256_RATE;
+ out += nblocks * SHAKE256_RATE;
+ shake256_squeeze(out, outlen, &state);
+}
+
+/*************************************************
+* Name: sha3_256
+*
+* Description: SHA3-256 with non-incremental API
+*
+* Arguments: - uint8_t *h: pointer to output (32 bytes)
+* - const uint8_t *in: pointer to input
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen)
+{
+ unsigned int i;
+ uint64_t s[25];
+
+ keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06);
+ KeccakF1600_StatePermute(s);
+ for (i = 0; i < 4; i++)
+ store64(h + 8 * i, s[i]);
+}
+
+/*************************************************
+* Name: sha3_512
+*
+* Description: SHA3-512 with non-incremental API
+*
+* Arguments: - uint8_t *h: pointer to output (64 bytes)
+* - const uint8_t *in: pointer to input
+* - size_t inlen: length of input in bytes
+**************************************************/
+void
+sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen)
+{
+ unsigned int i;
+ uint64_t s[25];
+
+ keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06);
+ KeccakF1600_StatePermute(s);
+ for (i = 0; i < 8; i++)
+ store64(h + 8 * i, s[i]);
+}
+/** end: ref/fips202.c **/
+
+/** begin: ref/symmetric-shake.c **/
+/*************************************************
+* Name: kyber_shake128_absorb
+*
+* Description: Absorb step of the SHAKE128 specialized for the Kyber context.
+*
+* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
+* - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state
+* - uint8_t i: additional byte of input
+* - uint8_t j: additional byte of input
+**************************************************/
+static void
+kyber_shake128_absorb(keccak_state *state,
+ const uint8_t seed[KYBER_SYMBYTES],
+ uint8_t x,
+ uint8_t y)
+{
+ uint8_t extseed[KYBER_SYMBYTES + 2];
+
+ memcpy(extseed, seed, KYBER_SYMBYTES);
+ extseed[KYBER_SYMBYTES + 0] = x;
+ extseed[KYBER_SYMBYTES + 1] = y;
+
+ shake128_absorb_once(state, extseed, sizeof(extseed));
+}
+
+/*************************************************
+* Name: kyber_shake256_prf
+*
+* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
+* and then generates outlen bytes of SHAKE256 output
+*
+* Arguments: - uint8_t *out: pointer to output
+* - size_t outlen: number of requested output bytes
+* - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
+* - uint8_t nonce: single-byte nonce (public PRF input)
+**************************************************/
+static void
+kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce)
+{
+ uint8_t extkey[KYBER_SYMBYTES + 1];
+
+ memcpy(extkey, key, KYBER_SYMBYTES);
+ extkey[KYBER_SYMBYTES] = nonce;
+
+ shake256(out, outlen, extkey, sizeof(extkey));
+}
+/** end: ref/symmetric-shake.c **/
+
+/** begin: ref/kem.c **/
+/*************************************************
+* Name: crypto_kem_keypair_derand
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+* - uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int
+crypto_kem_keypair_derand(uint8_t *pk,
+ uint8_t *sk,
+ const uint8_t *coins)
+{
+ size_t i;
+ indcpa_keypair_derand(pk, sk, coins);
+ for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++)
+ sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i];
+ hash_h(sk + KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ /* Value z for pseudo-random output on reject */
+ for (i = 0; i < KYBER_SYMBYTES; i++)
+ sk[KYBER_SECRETKEYBYTES - KYBER_SYMBYTES + i] = coins[KYBER_SYMBYTES + i];
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_keypair
+*
+* Description: Generates public and private key
+* for CCA-secure Kyber key encapsulation mechanism
+*
+* Arguments: - uint8_t *pk: pointer to output public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - uint8_t *sk: pointer to output private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int
+crypto_kem_keypair(uint8_t *pk,
+ uint8_t *sk)
+{
+ uint8_t coins[2 * KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ randombytes(coins + KYBER_SYMBYTES, KYBER_SYMBYTES);
+ crypto_kem_keypair_derand(pk, sk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc_derand
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+* - const uint8_t *coins: pointer to input randomness
+* (an already allocated array filled with KYBER_SYMBYTES random bytes)
+**
+* Returns 0 (success)
+**************************************************/
+int
+crypto_kem_enc_derand(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk,
+ const uint8_t *coins)
+{
+ uint8_t buf[2 * KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2 * KYBER_SYMBYTES];
+
+ /* Don't release system RNG output */
+ hash_h(buf, coins, KYBER_SYMBYTES);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES);
+ hash_g(kr, buf, 2 * KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(ct, buf, pk, kr + KYBER_SYMBYTES);
+
+ /* overwrite coins in kr with H(c) */
+ hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES);
+ /* hash concatenation of pre-k and H(c) to k */
+ kdf(ss, kr, 2 * KYBER_SYMBYTES);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_enc
+*
+* Description: Generates cipher text and shared
+* secret for given public key
+*
+* Arguments: - uint8_t *ct: pointer to output cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *pk: pointer to input public key
+* (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
+*
+* Returns 0 (success)
+**************************************************/
+int
+crypto_kem_enc(uint8_t *ct,
+ uint8_t *ss,
+ const uint8_t *pk)
+{
+ uint8_t coins[KYBER_SYMBYTES];
+ randombytes(coins, KYBER_SYMBYTES);
+ crypto_kem_enc_derand(ct, ss, pk, coins);
+ return 0;
+}
+
+/*************************************************
+* Name: crypto_kem_dec
+*
+* Description: Generates shared secret for given
+* cipher text and private key
+*
+* Arguments: - uint8_t *ss: pointer to output shared secret
+* (an already allocated array of KYBER_SSBYTES bytes)
+* - const uint8_t *ct: pointer to input cipher text
+* (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
+* - const uint8_t *sk: pointer to input private key
+* (an already allocated array of KYBER_SECRETKEYBYTES bytes)
+*
+* Returns 0.
+*
+* On failure, ss will contain a pseudo-random value.
+**************************************************/
+int
+crypto_kem_dec(uint8_t *ss,
+ const uint8_t *ct,
+ const uint8_t *sk)
+{
+ size_t i;
+ int fail;
+ uint8_t buf[2 * KYBER_SYMBYTES];
+ /* Will contain key, coins */
+ uint8_t kr[2 * KYBER_SYMBYTES];
+ uint8_t cmp[KYBER_CIPHERTEXTBYTES];
+ const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES;
+
+ indcpa_dec(buf, ct, sk);
+
+ /* Multitarget countermeasure for coins + contributory KEM */
+ for (i = 0; i < KYBER_SYMBYTES; i++)
+ buf[KYBER_SYMBYTES + i] = sk[KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES + i];
+ hash_g(kr, buf, 2 * KYBER_SYMBYTES);
+
+ /* coins are in kr+KYBER_SYMBYTES */
+ indcpa_enc(cmp, buf, pk, kr + KYBER_SYMBYTES);
+
+ fail = verify(ct, cmp, KYBER_CIPHERTEXTBYTES);
+
+ /* overwrite coins in kr with H(c) */
+ hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES);
+
+ /* Overwrite pre-k with z on re-encryption failure */
+ cmov(kr, sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES, fail);
+
+ /* hash concatenation of pre-k and H(c) to k */
+ kdf(ss, kr, 2 * KYBER_SYMBYTES);
+ return 0;
+}
+/** end: ref/kem.c **/
diff --git a/security/nss/lib/freebl/kyber-pqcrystals-ref.h b/security/nss/lib/freebl/kyber-pqcrystals-ref.h
new file mode 100644
index 0000000000..c666b1a543
--- /dev/null
+++ b/security/nss/lib/freebl/kyber-pqcrystals-ref.h
@@ -0,0 +1,144 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * This file was generated from
+ * https://github.com/pq-crystals/kyber/commit/e0d1c6ff
+ *
+ * Files from that repository are listed here surrounded by
+ * "* begin: [file] *" and "* end: [file] *" comments.
+ *
+ * The following changes have been made:
+ * - include guards have been removed,
+ * - include directives have been removed,
+ * - "#ifdef KYBER90S" blocks have been evaluated with "KYBER90S" undefined,
+ * - functions outside of kem.c have been made static.
+*/
+
+/** begin: ref/LICENSE **
+Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/);
+or Apache 2.0 License (https://www.apache.org/licenses/LICENSE-2.0.html).
+
+For Keccak and AES we are using public-domain
+code from sources and by authors listed in
+comments on top of the respective files.
+** end: ref/LICENSE **/
+
+/** begin: ref/AUTHORS **
+Joppe Bos,
+Léo Ducas,
+Eike Kiltz,
+Tancrède Lepoint,
+Vadim Lyubashevsky,
+John Schanck,
+Peter Schwabe,
+Gregor Seiler,
+Damien Stehlé
+** end: ref/AUTHORS **/
+
+#ifndef KYBER_PQCRYSTALS_REF_H
+#define KYBER_PQCRYSTALS_REF_H
+
+/** begin: ref/api.h **/
+#include <stdint.h>
+
+#define pqcrystals_kyber512_SECRETKEYBYTES 1632
+#define pqcrystals_kyber512_PUBLICKEYBYTES 800
+#define pqcrystals_kyber512_CIPHERTEXTBYTES 768
+#define pqcrystals_kyber512_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber512_ENCCOINBYTES 32
+#define pqcrystals_kyber512_BYTES 32
+
+#define pqcrystals_kyber512_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_ref_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber512_90s_ref_SECRETKEYBYTES pqcrystals_kyber512_SECRETKEYBYTES
+#define pqcrystals_kyber512_90s_ref_PUBLICKEYBYTES pqcrystals_kyber512_PUBLICKEYBYTES
+#define pqcrystals_kyber512_90s_ref_CIPHERTEXTBYTES pqcrystals_kyber512_CIPHERTEXTBYTES
+#define pqcrystals_kyber512_90s_ref_KEYPAIRCOINBYTES pqcrystals_kyber512_KEYPAIRCOINBYTES
+#define pqcrystals_kyber512_90s_ref_ENCCOINBYTES pqcrystals_kyber512_ENCCOINBYTES
+#define pqcrystals_kyber512_90s_ref_BYTES pqcrystals_kyber512_BYTES
+
+int pqcrystals_kyber512_90s_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber512_90s_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber512_90s_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber512_90s_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber512_90s_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_SECRETKEYBYTES 2400
+#define pqcrystals_kyber768_PUBLICKEYBYTES 1184
+#define pqcrystals_kyber768_CIPHERTEXTBYTES 1088
+#define pqcrystals_kyber768_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber768_ENCCOINBYTES 32
+#define pqcrystals_kyber768_BYTES 32
+
+#define pqcrystals_kyber768_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_ref_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber768_90s_ref_SECRETKEYBYTES pqcrystals_kyber768_SECRETKEYBYTES
+#define pqcrystals_kyber768_90s_ref_PUBLICKEYBYTES pqcrystals_kyber768_PUBLICKEYBYTES
+#define pqcrystals_kyber768_90s_ref_CIPHERTEXTBYTES pqcrystals_kyber768_CIPHERTEXTBYTES
+#define pqcrystals_kyber768_90s_ref_KEYPAIRCOINBYTES pqcrystals_kyber768_KEYPAIRCOINBYTES
+#define pqcrystals_kyber768_90s_ref_ENCCOINBYTES pqcrystals_kyber768_ENCCOINBYTES
+#define pqcrystals_kyber768_90s_ref_BYTES pqcrystals_kyber768_BYTES
+
+int pqcrystals_kyber768_90s_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber768_90s_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber768_90s_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber768_90s_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber768_90s_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_SECRETKEYBYTES 3168
+#define pqcrystals_kyber1024_PUBLICKEYBYTES 1568
+#define pqcrystals_kyber1024_CIPHERTEXTBYTES 1568
+#define pqcrystals_kyber1024_KEYPAIRCOINBYTES 64
+#define pqcrystals_kyber1024_ENCCOINBYTES 32
+#define pqcrystals_kyber1024_BYTES 32
+
+#define pqcrystals_kyber1024_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_ref_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+
+#define pqcrystals_kyber1024_90s_ref_SECRETKEYBYTES pqcrystals_kyber1024_SECRETKEYBYTES
+#define pqcrystals_kyber1024_90s_ref_PUBLICKEYBYTES pqcrystals_kyber1024_PUBLICKEYBYTES
+#define pqcrystals_kyber1024_90s_ref_CIPHERTEXTBYTES pqcrystals_kyber1024_CIPHERTEXTBYTES
+#define pqcrystals_kyber1024_90s_ref_KEYPAIRCOINBYTES pqcrystals_kyber1024_KEYPAIRCOINBYTES
+#define pqcrystals_kyber1024_90s_ref_ENCCOINBYTES pqcrystals_kyber1024_ENCCOINBYTES
+#define pqcrystals_kyber1024_90s_ref_BYTES pqcrystals_kyber1024_BYTES
+
+int pqcrystals_kyber1024_90s_ref_keypair_derand(uint8_t *pk, uint8_t *sk, const uint8_t *coins);
+int pqcrystals_kyber1024_90s_ref_keypair(uint8_t *pk, uint8_t *sk);
+int pqcrystals_kyber1024_90s_ref_enc_derand(uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
+int pqcrystals_kyber1024_90s_ref_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
+int pqcrystals_kyber1024_90s_ref_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
+/** end: ref/api.h **/
+
+#endif // KYBER_PQCRYSTALS_REF_H
diff --git a/security/nss/lib/freebl/kyber.c b/security/nss/lib/freebl/kyber.c
new file mode 100644
index 0000000000..911512bf18
--- /dev/null
+++ b/security/nss/lib/freebl/kyber.c
@@ -0,0 +1,205 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <stdbool.h>
+
+#include "blapi.h"
+#include "secerr.h"
+#include "secitem.h"
+
+#include "kyber-pqcrystals-ref.h"
+#include "kyber.h"
+
+/* Consistency check between kyber-pqcrystals-ref.h and kyber.h */
+PR_STATIC_ASSERT(KYBER768_PUBLIC_KEY_BYTES == pqcrystals_kyber768_PUBLICKEYBYTES);
+PR_STATIC_ASSERT(KYBER768_PRIVATE_KEY_BYTES == pqcrystals_kyber768_SECRETKEYBYTES);
+PR_STATIC_ASSERT(KYBER768_CIPHERTEXT_BYTES == pqcrystals_kyber768_CIPHERTEXTBYTES);
+PR_STATIC_ASSERT(KYBER_SHARED_SECRET_BYTES == pqcrystals_kyber768_BYTES);
+PR_STATIC_ASSERT(KYBER_KEYPAIR_COIN_BYTES == pqcrystals_kyber768_KEYPAIRCOINBYTES);
+PR_STATIC_ASSERT(KYBER_ENC_COIN_BYTES == pqcrystals_kyber768_ENCCOINBYTES);
+
+static bool
+valid_params(KyberParams params)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_pubkey(KyberParams params, const SECItem *pubkey)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return pubkey && pubkey->len == KYBER768_PUBLIC_KEY_BYTES;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_privkey(KyberParams params, const SECItem *privkey)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return privkey && privkey->len == KYBER768_PRIVATE_KEY_BYTES;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_ciphertext(KyberParams params, const SECItem *ciphertext)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return ciphertext && ciphertext->len == KYBER768_CIPHERTEXT_BYTES;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_secret(KyberParams params, const SECItem *secret)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return secret && secret->len == KYBER_SHARED_SECRET_BYTES;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_keypair_seed(KyberParams params, const SECItem *seed)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ case params_kyber768_round3_test_mode:
+ return !seed || seed->len == KYBER_KEYPAIR_COIN_BYTES;
+ default:
+ return false;
+ }
+}
+
+static bool
+valid_enc_seed(KyberParams params, const SECItem *seed)
+{
+ switch (params) {
+ case params_kyber768_round3:
+ return !seed;
+ case params_kyber768_round3_test_mode:
+ return !seed || seed->len == KYBER_SHARED_SECRET_BYTES;
+ default:
+ return false;
+ }
+}
+
+SECStatus
+Kyber_NewKey(KyberParams params, const SECItem *keypair_seed, SECItem *privkey, SECItem *pubkey)
+{
+ if (!valid_params(params)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if (!(valid_keypair_seed(params, keypair_seed) && valid_privkey(params, privkey) && valid_pubkey(params, pubkey))) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ uint8_t randbuf[KYBER_KEYPAIR_COIN_BYTES];
+ uint8_t *coins;
+ if (keypair_seed) {
+ coins = keypair_seed->data;
+ } else {
+ if (RNG_GenerateGlobalRandomBytes(randbuf, sizeof randbuf) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+ coins = randbuf;
+ }
+ NSS_CLASSIFY(coins, KYBER_KEYPAIR_COIN_BYTES);
+ if (params == params_kyber768_round3 || params == params_kyber768_round3_test_mode) {
+ pqcrystals_kyber768_ref_keypair_derand(pubkey->data, privkey->data, coins);
+ } else {
+ /* unreachable */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ NSS_DECLASSIFY(pubkey->data, pubkey->len);
+ return SECSuccess;
+}
+
+SECStatus
+Kyber_Encapsulate(KyberParams params, const SECItem *enc_seed, const SECItem *pubkey, SECItem *ciphertext, SECItem *secret)
+{
+ if (!valid_params(params)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if (!(valid_enc_seed(params, enc_seed) && valid_pubkey(params, pubkey) && valid_ciphertext(params, ciphertext) && valid_secret(params, secret))) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ uint8_t randbuf[KYBER_ENC_COIN_BYTES];
+ uint8_t *coins;
+ if (enc_seed) {
+ coins = enc_seed->data;
+ } else {
+ if (RNG_GenerateGlobalRandomBytes(randbuf, sizeof randbuf) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+ coins = randbuf;
+ }
+ NSS_CLASSIFY(coins, KYBER_ENC_COIN_BYTES);
+ if (params == params_kyber768_round3 || params == params_kyber768_round3_test_mode) {
+ pqcrystals_kyber768_ref_enc_derand(ciphertext->data, secret->data, pubkey->data, coins);
+ } else {
+ /* unreachable */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+Kyber_Decapsulate(KyberParams params, const SECItem *privkey, const SECItem *ciphertext, SECItem *secret)
+{
+ if (!valid_params(params)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if (!(valid_privkey(params, privkey) && valid_ciphertext(params, ciphertext) && valid_secret(params, secret))) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (params == params_kyber768_round3 || params == params_kyber768_round3_test_mode) {
+ pqcrystals_kyber768_ref_dec(secret->data, ciphertext->data, privkey->data);
+ } else {
+ // unreachable
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c
new file mode 100644
index 0000000000..14ecfcaa25
--- /dev/null
+++ b/security/nss/lib/freebl/ldvector.c
@@ -0,0 +1,493 @@
+/*
+ * ldvector.c - platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+extern int FREEBL_InitStubs(void);
+#endif
+
+#include "loader.h"
+#include "cmac.h"
+#include "alghmac.h"
+#include "hmacct.h"
+#include "blapii.h"
+#include "secerr.h"
+
+SECStatus
+FREEBL_Deprecated(void)
+{
+
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+ return SECFailure;
+}
+
+static const struct FREEBLVectorStr vector = {
+
+ sizeof vector,
+ FREEBL_VERSION,
+
+ RSA_NewKey,
+ RSA_PublicKeyOp,
+ RSA_PrivateKeyOp,
+ DSA_NewKey,
+ DSA_SignDigest,
+ DSA_VerifyDigest,
+ DSA_NewKeyFromSeed,
+ DSA_SignDigestWithSeed,
+ DH_GenParam,
+ DH_NewKey,
+ DH_Derive,
+ KEA_Derive,
+ KEA_Verify,
+ RC4_CreateContext,
+ RC4_DestroyContext,
+ RC4_Encrypt,
+ RC4_Decrypt,
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ RC2_CreateContext,
+ RC2_DestroyContext,
+ RC2_Encrypt,
+ RC2_Decrypt,
+#else
+ (F_RC2_CreateContext)FREEBL_Deprecated,
+ (F_RC2_DestroyContext)FREEBL_Deprecated,
+ (F_RC2_Encrypt)FREEBL_Deprecated,
+ (F_RC2_Decrypt)FREEBL_Deprecated,
+#endif
+ RC5_CreateContext,
+ RC5_DestroyContext,
+ RC5_Encrypt,
+ RC5_Decrypt,
+ DES_CreateContext,
+ DES_DestroyContext,
+ DES_Encrypt,
+ DES_Decrypt,
+ AES_CreateContext,
+ AES_DestroyContext,
+ AES_Encrypt,
+ AES_Decrypt,
+ MD5_Hash,
+ MD5_HashBuf,
+ MD5_NewContext,
+ MD5_DestroyContext,
+ MD5_Begin,
+ MD5_Update,
+ MD5_End,
+ MD5_FlattenSize,
+ MD5_Flatten,
+ MD5_Resurrect,
+ MD5_TraceState,
+ MD2_Hash,
+ MD2_NewContext,
+ MD2_DestroyContext,
+ MD2_Begin,
+ MD2_Update,
+ MD2_End,
+ MD2_FlattenSize,
+ MD2_Flatten,
+ MD2_Resurrect,
+ SHA1_Hash,
+ SHA1_HashBuf,
+ SHA1_NewContext,
+ SHA1_DestroyContext,
+ SHA1_Begin,
+ SHA1_Update,
+ SHA1_End,
+ SHA1_TraceState,
+ SHA1_FlattenSize,
+ SHA1_Flatten,
+ SHA1_Resurrect,
+ RNG_RNGInit,
+ RNG_RandomUpdate,
+ RNG_GenerateGlobalRandomBytes,
+ RNG_RNGShutdown,
+ PQG_ParamGen,
+ PQG_ParamGenSeedLen,
+ PQG_VerifyParams,
+
+ /* End of Version 3.001. */
+
+ RSA_PrivateKeyOpDoubleChecked,
+ RSA_PrivateKeyCheck,
+ BL_Cleanup,
+
+ /* End of Version 3.002. */
+
+ SHA256_NewContext,
+ SHA256_DestroyContext,
+ SHA256_Begin,
+ SHA256_Update,
+ SHA256_End,
+ SHA256_HashBuf,
+ SHA256_Hash,
+ SHA256_TraceState,
+ SHA256_FlattenSize,
+ SHA256_Flatten,
+ SHA256_Resurrect,
+
+ SHA512_NewContext,
+ SHA512_DestroyContext,
+ SHA512_Begin,
+ SHA512_Update,
+ SHA512_End,
+ SHA512_HashBuf,
+ SHA512_Hash,
+ SHA512_TraceState,
+ SHA512_FlattenSize,
+ SHA512_Flatten,
+ SHA512_Resurrect,
+
+ SHA384_NewContext,
+ SHA384_DestroyContext,
+ SHA384_Begin,
+ SHA384_Update,
+ SHA384_End,
+ SHA384_HashBuf,
+ SHA384_Hash,
+ SHA384_TraceState,
+ SHA384_FlattenSize,
+ SHA384_Flatten,
+ SHA384_Resurrect,
+
+ /* End of Version 3.003. */
+
+ AESKeyWrap_CreateContext,
+ AESKeyWrap_DestroyContext,
+ AESKeyWrap_Encrypt,
+ AESKeyWrap_Decrypt,
+
+ /* End of Version 3.004. */
+
+ BLAPI_SHVerify,
+ BLAPI_VerifySelf,
+
+ /* End of Version 3.005. */
+
+ EC_NewKey,
+ EC_NewKeyFromSeed,
+ EC_ValidatePublicKey,
+ ECDH_Derive,
+ ECDSA_SignDigest,
+ ECDSA_VerifyDigest,
+ ECDSA_SignDigestWithSeed,
+
+ /* End of Version 3.006. */
+ /* End of Version 3.007. */
+
+ AES_InitContext,
+ AESKeyWrap_InitContext,
+ DES_InitContext,
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ RC2_InitContext,
+#else
+ (F_RC2_InitContext)FREEBL_Deprecated,
+#endif
+ RC4_InitContext,
+
+ AES_AllocateContext,
+ AESKeyWrap_AllocateContext,
+ DES_AllocateContext,
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ RC2_AllocateContext,
+#else
+ (F_RC2_AllocateContext)FREEBL_Deprecated,
+#endif
+ RC4_AllocateContext,
+
+ MD2_Clone,
+ MD5_Clone,
+ SHA1_Clone,
+ SHA256_Clone,
+ SHA384_Clone,
+ SHA512_Clone,
+
+ TLS_PRF,
+ HASH_GetRawHashObject,
+
+ HMAC_Create,
+ HMAC_Init,
+ HMAC_Begin,
+ HMAC_Update,
+ HMAC_Clone,
+ HMAC_Finish,
+ HMAC_Destroy,
+
+ RNG_SystemInfoForRNG,
+
+ /* End of Version 3.008. */
+
+ FIPS186Change_GenerateX,
+ FIPS186Change_ReduceModQForDSA,
+
+ /* End of Version 3.009. */
+ Camellia_InitContext,
+ Camellia_AllocateContext,
+ Camellia_CreateContext,
+ Camellia_DestroyContext,
+ Camellia_Encrypt,
+ Camellia_Decrypt,
+
+ PQG_DestroyParams,
+ PQG_DestroyVerify,
+
+/* End of Version 3.010. */
+
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ SEED_InitContext,
+ SEED_AllocateContext,
+ SEED_CreateContext,
+ SEED_DestroyContext,
+ SEED_Encrypt,
+ SEED_Decrypt,
+#else
+ (F_SEED_InitContext)FREEBL_Deprecated,
+ (F_SEED_AllocateContext)FREEBL_Deprecated,
+ (F_SEED_CreateContext)FREEBL_Deprecated,
+ (F_SEED_DestroyContext)FREEBL_Deprecated,
+ (F_SEED_Encrypt)FREEBL_Deprecated,
+ (F_SEED_Decrypt)FREEBL_Deprecated,
+#endif /* NSS_DISABLE_DEPRECATED_SEED */
+
+ BL_Init,
+ BL_SetForkState,
+
+ PRNGTEST_Instantiate,
+ PRNGTEST_Reseed,
+ PRNGTEST_Generate,
+
+ PRNGTEST_Uninstantiate,
+
+ /* End of Version 3.011. */
+
+ RSA_PopulatePrivateKey,
+
+ DSA_NewRandom,
+
+ JPAKE_Sign,
+ JPAKE_Verify,
+ JPAKE_Round2,
+ JPAKE_Final,
+
+ /* End of Version 3.012 */
+
+ TLS_P_hash,
+ SHA224_NewContext,
+ SHA224_DestroyContext,
+ SHA224_Begin,
+ SHA224_Update,
+ SHA224_End,
+ SHA224_HashBuf,
+ SHA224_Hash,
+ SHA224_TraceState,
+ SHA224_FlattenSize,
+ SHA224_Flatten,
+ SHA224_Resurrect,
+ SHA224_Clone,
+ BLAPI_SHVerifyFile,
+
+ /* End of Version 3.013 */
+
+ PQG_ParamGenV2,
+ PRNGTEST_RunHealthTests,
+
+ /* End of Version 3.014 */
+
+ HMAC_ConstantTime,
+ SSLv3_MAC_ConstantTime,
+
+ /* End of Version 3.015 */
+
+ RSA_SignRaw,
+ RSA_CheckSignRaw,
+ RSA_CheckSignRecoverRaw,
+ RSA_EncryptRaw,
+ RSA_DecryptRaw,
+ RSA_EncryptOAEP,
+ RSA_DecryptOAEP,
+ RSA_EncryptBlock,
+ RSA_DecryptBlock,
+ RSA_SignPSS,
+ RSA_CheckSignPSS,
+ RSA_Sign,
+ RSA_CheckSign,
+ RSA_CheckSignRecover,
+
+ /* End of Version 3.016 */
+
+ EC_FillParams,
+ EC_DecodeParams,
+ EC_CopyParams,
+
+ /* End of Version 3.017 */
+
+ ChaCha20Poly1305_InitContext,
+ ChaCha20Poly1305_CreateContext,
+ ChaCha20Poly1305_DestroyContext,
+ ChaCha20Poly1305_Seal,
+ ChaCha20Poly1305_Open,
+
+ /* End of Version 3.018 */
+
+ EC_GetPointSize,
+
+ /* End of Version 3.019 */
+
+ BLAKE2B_Hash,
+ BLAKE2B_HashBuf,
+ BLAKE2B_MAC_HashBuf,
+ BLAKE2B_NewContext,
+ BLAKE2B_DestroyContext,
+ BLAKE2B_Begin,
+ BLAKE2B_MAC_Begin,
+ BLAKE2B_Update,
+ BLAKE2B_End,
+ BLAKE2B_FlattenSize,
+ BLAKE2B_Flatten,
+ BLAKE2B_Resurrect,
+
+ /* End of Version 3.020 */
+
+ ChaCha20_Xor,
+
+ /* End of version 3.021 */
+
+ CMAC_Init,
+ CMAC_Create,
+ CMAC_Begin,
+ CMAC_Update,
+ CMAC_Finish,
+ CMAC_Destroy,
+
+ /* End of version 3.022 */
+ ChaCha20Poly1305_Encrypt,
+ ChaCha20Poly1305_Decrypt,
+ AES_AEAD,
+ AESKeyWrap_EncryptKWP,
+ AESKeyWrap_DecryptKWP,
+
+ /* End of version 3.023 */
+ KEA_PrimeCheck,
+
+ /* End of version 3.024 */
+ ChaCha20_InitContext,
+ ChaCha20_CreateContext,
+ ChaCha20_DestroyContext,
+
+ /* End of version 3.025 */
+
+ SHA3_224_NewContext,
+ SHA3_224_DestroyContext,
+ SHA3_224_FlattenSize,
+ SHA3_224_Begin,
+ SHA3_224_Update,
+ SHA3_224_End,
+ SHA3_224_HashBuf,
+ SHA3_224_Hash,
+
+ SHA3_256_NewContext,
+ SHA3_256_DestroyContext,
+ SHA3_256_FlattenSize,
+ SHA3_256_Begin,
+ SHA3_256_Update,
+ SHA3_256_End,
+ SHA3_256_HashBuf,
+ SHA3_256_Hash,
+
+ SHA3_384_NewContext,
+ SHA3_384_DestroyContext,
+ SHA3_384_FlattenSize,
+ SHA3_384_Begin,
+ SHA3_384_Update,
+ SHA3_384_End,
+ SHA3_384_HashBuf,
+ SHA3_384_Hash,
+
+ SHA3_512_NewContext,
+ SHA3_512_DestroyContext,
+ SHA3_512_FlattenSize,
+ SHA3_512_Begin,
+ SHA3_512_Update,
+ SHA3_512_End,
+ SHA3_512_HashBuf,
+ SHA3_512_Hash,
+
+ SHAKE_128_NewContext,
+ SHAKE_128_DestroyContext,
+ SHAKE_128_Begin,
+ SHAKE_128_Absorb,
+ SHAKE_128_SqueezeEnd,
+ SHAKE_128_HashBuf,
+ SHAKE_128_Hash,
+
+ SHAKE_256_NewContext,
+ SHAKE_256_DestroyContext,
+ SHAKE_256_Begin,
+ SHAKE_256_Absorb,
+ SHAKE_256_SqueezeEnd,
+ SHAKE_256_HashBuf,
+ SHAKE_256_Hash,
+
+ /* End of version 3.026 */
+
+ Kyber_NewKey,
+ Kyber_Encapsulate,
+ Kyber_Decapsulate,
+
+ /* End of version 3.027 */
+};
+
+const FREEBLVector*
+FREEBL_GetVector(void)
+{
+#ifdef FREEBL_NO_DEPEND
+ SECStatus rv;
+#endif
+
+#define NSS_VERSION_VARIABLE __nss_freebl_version
+#include "verref.h"
+
+#ifdef FREEBL_NO_DEPEND
+ /* this entry point is only valid if nspr and nss-util has been loaded */
+ rv = FREEBL_InitStubs();
+ if (rv != SECSuccess) {
+ return NULL;
+ }
+#endif
+
+#ifndef NSS_FIPS_DISABLED
+ /* In FIPS mode make sure the Full self tests have been run before
+ * continuing. */
+ BL_POSTRan(PR_FALSE);
+#endif
+
+ return &vector;
+}
+
+#ifdef FREEBL_LOWHASH
+static const struct NSSLOWVectorStr nssvector = {
+ sizeof nssvector,
+ NSSLOW_VERSION,
+ FREEBL_GetVector,
+ NSSLOW_Init,
+ NSSLOW_Shutdown,
+ NSSLOW_Reset,
+ NSSLOWHASH_NewContext,
+ NSSLOWHASH_Begin,
+ NSSLOWHASH_Update,
+ NSSLOWHASH_End,
+ NSSLOWHASH_Destroy,
+ NSSLOWHASH_Length
+};
+
+const NSSLOWVector*
+NSSLOW_GetVector(void)
+{
+ /* POST check and stub init happens in FREEBL_GetVector() and
+ * NSSLOW_Init() respectively */
+ return &nssvector;
+}
+#endif
diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c
new file mode 100644
index 0000000000..473169264c
--- /dev/null
+++ b/security/nss/lib/freebl/loader.c
@@ -0,0 +1,2856 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+
+static const char *NameOfThisSharedLib =
+ SHLIB_PREFIX "softokn" SOFTOKEN_SHLIB_VERSION "." SHLIB_SUFFIX;
+
+static PRLibrary *blLib = NULL;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const FREEBLVector *vector;
+static const char *libraryName = NULL;
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/* determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+ PRLibrary *handle;
+ const char *name = getLibName();
+
+ if (!name) {
+ PR_SetError(PR_LOAD_LIBRARY_ERROR, 0);
+ return PR_FAILURE;
+ }
+
+ handle = loader_LoadLibrary(name);
+ if (handle) {
+ PRFuncPtr address = PR_FindFunctionSymbol(handle, "FREEBL_GetVector");
+ if (address) {
+ FREEBLGetVectorFn *getVector = (FREEBLGetVectorFn *)address;
+ const FREEBLVector *dsoVector = getVector();
+ if (dsoVector) {
+ unsigned short dsoVersion = dsoVector->version;
+ unsigned short myVersion = FREEBL_VERSION;
+ if (MSB(dsoVersion) == MSB(myVersion) &&
+ LSB(dsoVersion) >= LSB(myVersion) &&
+ dsoVector->length >= sizeof(FREEBLVector)) {
+ vector = dsoVector;
+ libraryName = name;
+ blLib = handle;
+ return PR_SUCCESS;
+ }
+ }
+ }
+#ifdef DEBUG
+ if (blLib) {
+ PRStatus status = PR_UnloadLibrary(blLib);
+ PORT_Assert(PR_SUCCESS == status);
+ }
+#else
+ if (blLib)
+ PR_UnloadLibrary(blLib);
+#endif
+ }
+ return PR_FAILURE;
+}
+
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType loadFreeBLOnce;
+
+static PRStatus
+freebl_RunLoaderOnce(void)
+{
+ PRStatus status;
+
+ status = PR_CallOnce(&loadFreeBLOnce, &freebl_LoadDSO);
+ return status;
+}
+
+SECStatus
+BL_Init(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_BL_Init)();
+}
+
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RSA_NewKey)(keySizeInBits, publicExponent);
+}
+
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PublicKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyOpDoubleChecked)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyCheck)(key);
+}
+
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewKey)(params, privKey);
+}
+
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params, const unsigned char *seed,
+ DSAPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewKeyFromSeed)(params, seed, privKey);
+}
+
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *seed)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_SignDigestWithSeed)(key, signature, digest, seed);
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewRandom)(arena, q, seed);
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_GenParam)(primeLen, params);
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_NewKey)(params, privKey);
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue, SECItem *prime, SECItem *privateValue,
+ SECItem *derivedSecret, unsigned int maxOutBytes)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_Derive)(publicValue, prime, privateValue,
+ derivedSecret, maxOutBytes);
+}
+
+SECStatus
+KEA_Derive(SECItem *prime, SECItem *public1, SECItem *public2,
+ SECItem *private1, SECItem *private2, SECItem *derivedSecret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_KEA_Derive)(prime, public1, public2,
+ private1, private2, derivedSecret);
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return (vector->p_KEA_Verify)(Y, prime, subPrime);
+}
+
+PRBool
+KEA_PrimeCheck(SECItem *prime)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return (vector->p_KEA_PrimeCheck)(prime);
+}
+
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC4_CreateContext)(key, len);
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RC4_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode, unsigned effectiveKeyLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ return (vector->p_RC2_CreateContext)(key, len, iv, mode, effectiveKeyLen);
+#else
+ return NULL;
+#endif
+}
+
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ (vector->p_RC2_DestroyContext)(cx, freeit);
+#else
+ return;
+#endif
+}
+
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ return (vector->p_RC2_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+#else
+ return SECFailure;
+#endif
+}
+
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ return (vector->p_RC2_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+#else
+ return SECFailure;
+#endif
+}
+
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC5_CreateContext)(key, rounds, wordSize, iv, mode);
+}
+
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RC5_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC5_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC5_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+DESContext *
+DES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_DES_CreateContext)(key, iv, mode, encrypt);
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_DES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ return (vector->p_SEED_CreateContext)(key, iv, mode, encrypt);
+#else
+ return NULL;
+#endif
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ (vector->p_SEED_DestroyContext)(cx, freeit);
+#else
+ return;
+#endif
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ return (vector->p_SEED_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+#else
+ return SECFailure;
+#endif
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ return (vector->p_SEED_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+#else
+ return SECFailure;
+#endif
+}
+
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AES_CreateContext)(key, iv, mode, encrypt, keylen,
+ blocklen);
+}
+
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_AES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_Encrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_Decrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AES_AEAD(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ void *params, unsigned int paramsLen,
+ const unsigned char *aad, unsigned int aadLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_AEAD)(cx, output, outputLen, maxOutputLen, input,
+ inputLen, params, paramsLen, aad, aadLen);
+}
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_Hash)(dest, src);
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_HashBuf)(dest, src, src_length);
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD5_NewContext)();
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_DestroyContext)(cx, freeit);
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Begin)(cx);
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Update)(cx, input, inputLen);
+}
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_MD5_FlattenSize)(cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_Flatten)(cx, space);
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD5_Resurrect)(space, arg);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_TraceState)(cx);
+}
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD2_Hash)(dest, src);
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD2_NewContext)();
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_DestroyContext)(cx, freeit);
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Begin)(cx);
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Update)(cx, input, inputLen);
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_MD2_FlattenSize)(cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD2_Flatten)(cx, space);
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD2_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_Hash)(dest, src);
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_HashBuf)(dest, src, src_length);
+}
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA1_NewContext)();
+}
+
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_DestroyContext)(cx, freeit);
+}
+
+void
+SHA1_Begin(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Begin)(cx);
+}
+
+void
+SHA1_Update(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Update)(cx, input, inputLen);
+}
+
+void
+SHA1_End(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA1_TraceState(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_TraceState)(cx);
+}
+
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA1_FlattenSize)(cx);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_Flatten)(cx, space);
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA1_Resurrect)(space, arg);
+}
+
+SECStatus
+RNG_RNGInit(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_RNGInit)();
+}
+
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_RandomUpdate)(data, bytes);
+}
+
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_GenerateGlobalRandomBytes)(dest, len);
+}
+
+void
+RNG_RNGShutdown(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RNG_RNGShutdown)();
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGen)(j, pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGenSeedLen)(j, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PQG_VerifyParams(const PQGParams *params, const PQGVerify *vfy,
+ SECStatus *result)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_VerifyParams)(params, vfy, result);
+}
+
+void
+PQG_DestroyParams(PQGParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_PQG_DestroyParams)(params);
+}
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_PQG_DestroyVerify)(vfy);
+}
+
+void
+BL_Cleanup(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_BL_Cleanup)();
+}
+
+void
+BL_Unload(void)
+{
+ /* This function is not thread-safe, but doesn't need to be, because it is
+ * only called from functions that are also defined as not thread-safe,
+ * namely C_Finalize in softoken, and the SSL bypass shutdown callback called
+ * from NSS_Shutdown. */
+ char *disableUnload = NULL;
+ vector = NULL;
+ disableUnload = PR_GetEnvSecure("NSS_DISABLE_UNLOAD");
+ if (blLib && !disableUnload) {
+#ifdef DEBUG
+ PRStatus status = PR_UnloadLibrary(blLib);
+ PORT_Assert(PR_SUCCESS == status);
+#else
+ PR_UnloadLibrary(blLib);
+#endif
+ }
+ blLib = NULL;
+ loadFreeBLOnce = pristineCallOnce;
+}
+
+/* ============== New for 3.003 =============================== */
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_Hash)(dest, src);
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_HashBuf)(dest, src, src_length);
+}
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA256_NewContext)();
+}
+
+void
+SHA256_DestroyContext(SHA256Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_DestroyContext)(cx, freeit);
+}
+
+void
+SHA256_Begin(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Begin)(cx);
+}
+
+void
+SHA256_Update(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Update)(cx, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA256_TraceState(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_TraceState)(cx);
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA256_FlattenSize)(cx);
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_Flatten)(cx, space);
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA256_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_Hash)(dest, src);
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_HashBuf)(dest, src, src_length);
+}
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA512_NewContext)();
+}
+
+void
+SHA512_DestroyContext(SHA512Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_DestroyContext)(cx, freeit);
+}
+
+void
+SHA512_Begin(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Begin)(cx);
+}
+
+void
+SHA512_Update(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Update)(cx, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA512_TraceState(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_TraceState)(cx);
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA512_FlattenSize)(cx);
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_Flatten)(cx, space);
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA512_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_Hash)(dest, src);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_HashBuf)(dest, src, src_length);
+}
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA384_NewContext)();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_DestroyContext)(cx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Begin)(cx);
+}
+
+void
+SHA384_Update(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Update)(cx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA384_TraceState(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_TraceState)(cx);
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA384_FlattenSize)(cx);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_Flatten)(cx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA384_Resurrect)(space, arg);
+}
+
+AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return vector->p_AESKeyWrap_CreateContext(key, iv, encrypt, keylen);
+}
+
+void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ vector->p_AESKeyWrap_DestroyContext(cx, freeit);
+}
+
+SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_Encrypt(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_Decrypt(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AESKeyWrap_EncryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_EncryptKWP(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AESKeyWrap_DecryptKWP(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_DecryptKWP(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_SHVerify(name, addr);
+}
+
+/*
+ * The Caller is expected to pass NULL as the name, which will
+ * trigger the p_BLAPI_VerifySelf() to return 'TRUE'. Pass the real
+ * name of the shared library we loaded (the static libraryName set
+ * in freebl_LoadDSO) to p_BLAPI_VerifySelf.
+ */
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+ PORT_Assert(!name);
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_VerifySelf(libraryName);
+}
+
+/* ============== New for 3.006 =============================== */
+
+SECStatus
+EC_NewKey(ECParams *params, ECPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_NewKey)(params, privKey);
+}
+
+SECStatus
+EC_NewKeyFromSeed(ECParams *params, ECPrivateKey **privKey,
+ const unsigned char *seed, int seedlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_NewKeyFromSeed)(params, privKey, seed, seedlen);
+}
+
+SECStatus
+EC_ValidatePublicKey(ECParams *params, SECItem *publicValue)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_ValidatePublicKey)(params, publicValue);
+}
+
+SECStatus
+ECDH_Derive(SECItem *publicValue, ECParams *params, SECItem *privateValue,
+ PRBool withCofactor, SECItem *derivedSecret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDH_Derive)(publicValue, params, privateValue,
+ withCofactor, derivedSecret);
+}
+
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *seed, const int seedlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_SignDigestWithSeed)(key, signature, digest,
+ seed, seedlen);
+}
+
+/* ============== New for 3.008 =============================== */
+
+AESContext *
+AES_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AES_AllocateContext)();
+}
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AESKeyWrap_AllocateContext)();
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_DES_AllocateContext)();
+}
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ return (vector->p_RC2_AllocateContext)();
+#else
+ return NULL;
+#endif
+}
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC4_AllocateContext)();
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_InitContext)(cx, key, keylen, iv, mode, encrypt,
+ blocklen);
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AESKeyWrap_InitContext)(cx, key, keylen, iv, mode,
+ encrypt, blocklen);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_SEED
+ return (vector->p_SEED_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+#else
+ return SECFailure;
+#endif
+}
+
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int effectiveKeyLen, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+#ifndef NSS_DISABLE_DEPRECATED_RC2
+ return (vector->p_RC2_InitContext)(cx, key, keylen, iv, mode,
+ effectiveKeyLen, xtra);
+#else
+ return SECFailure;
+#endif
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *x1, int x2,
+ unsigned int x3, unsigned int x4)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_InitContext)(cx, key, keylen, x1, x2, x3, x4);
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Clone)(dest, src);
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Clone)(dest, src);
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Clone)(dest, src);
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Clone)(dest, src);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Clone)(dest, src);
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Clone)(dest, src);
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_TLS_PRF)(secret, label, seed, result, isFIPS);
+}
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HASH_GetRawHashObject)(hashType);
+}
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Destroy)(cx, freeit);
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hashObj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HMAC_Create)(hashObj, secret, secret_len, isFIPS);
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hashObj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_Init)(cx, hashObj, secret, secret_len, isFIPS);
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Begin)(cx);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Update)(cx, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_Finish)(cx, result, result_len, max_result_len);
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HMAC_Clone)(cx);
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RNG_SystemInfoForRNG)();
+}
+
+SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY, const unsigned char *XSEEDj,
+ unsigned char *x_j)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_FIPS186Change_GenerateX)(XKEY, XSEEDj, x_j);
+}
+
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_FIPS186Change_ReduceModQForDSA)(w, q, xj);
+}
+
+/* === new for Camellia === */
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int unused)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_InitContext)(cx, key, keylen, iv, mode, encrypt,
+ unused);
+}
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_Camellia_AllocateContext)();
+}
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_Camellia_CreateContext)(key, iv, mode, encrypt, keylen);
+}
+
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_Camellia_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_Encrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_Decrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+void
+BL_SetForkState(PRBool forked)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_BL_SetForkState)(forked);
+}
+
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Instantiate)(entropy, entropy_len,
+ nonce, nonce_len,
+ personal_string, ps_len);
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Reseed)(entropy, entropy_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Generate)(bytes, bytes_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Uninstantiate)();
+}
+
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PopulatePrivateKey)(key);
+}
+
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Sign)(arena, pqg, hashType, signerID, x,
+ testRandom, gxIn, gxOut, gv, r);
+}
+
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Verify)(arena, pqg, hashType, signerID, peerID,
+ gx, gv, r);
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Round2)(arena, p, q, gx1, gx3, gx4, base, x2, s, x2s);
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Final)(arena, p, q, x2, gx4, x2s, B, K);
+}
+
+SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_TLS_P_hash)(hashAlg, secret, label, seed, result, isFIPS);
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_Hash)(dest, src);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_HashBuf)(dest, src, src_length);
+}
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA224_NewContext)();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_DestroyContext)(cx, freeit);
+}
+
+void
+SHA224_Begin(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Begin)(cx);
+}
+
+void
+SHA224_Update(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Update)(cx, input, inputLen);
+}
+
+void
+SHA224_End(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA224_TraceState(SHA224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_TraceState)(cx);
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA224_FlattenSize)(cx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_Flatten)(cx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA224_Resurrect)(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Clone)(dest, src);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *name)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_SHVerifyFile(name);
+}
+
+/* === new for DSA-2 === */
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGenV2)(L, N, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PRNGTEST_RunHealthTests(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_PRNGTEST_RunHealthTests();
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SSLv3_MAC_ConstantTime)(
+ result, resultLen, maxResultLen,
+ hashObj,
+ secret, secretLen,
+ header, headerLen,
+ body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_ConstantTime)(
+ result, resultLen, maxResultLen,
+ hashObj,
+ secret, secretLen,
+ header, headerLen,
+ body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_SignRaw)(key, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRaw)(key, sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRecoverRaw)(key, data, dataLen, maxDataLen,
+ sig, sigLen);
+}
+
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptRaw)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptRaw)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptOAEP)(key, hashAlg, maskHashAlg, label,
+ labelLen, seed, seedLen, output,
+ outputLen, maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptOAEP)(key, hashAlg, maskHashAlg, label,
+ labelLen, output, outputLen,
+ maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptBlock)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptBlock)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_SignPSS)(key, hashAlg, maskHashAlg, salt, saltLen,
+ output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignPSS)(key, hashAlg, maskHashAlg, saltLen,
+ sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_Sign)(key, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSign)(key, sig, sigLen, data, dataLen);
+}
+
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRecover)(key, output, outputLen, maxOutputLen,
+ sig, sigLen);
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena,
+ const SECItem *encodedParams,
+ ECParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_FillParams)(arena, encodedParams, params);
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams,
+ ECParams **ecparams)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_DecodeParams)(encodedParams, ecparams);
+}
+
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_CopyParams)(arena, dstParams, srcParams);
+}
+
+SECStatus
+ChaCha20_Xor(unsigned char *output, const unsigned char *block, unsigned int len,
+ const unsigned char *k, const unsigned char *nonce, PRUint32 ctr)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_ChaCha20_Xor)(output, block, len, k, nonce, ctr);
+}
+
+SECStatus
+ChaCha20_InitContext(ChaCha20Context *ctx, const unsigned char *key,
+ unsigned int keyLen,
+ const unsigned char *nonce,
+ unsigned int nonceLen,
+ PRUint32 ctr)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20_InitContext)(ctx, key, keyLen, nonce, nonceLen, ctr);
+}
+
+ChaCha20Context *
+ChaCha20_CreateContext(const unsigned char *key, unsigned int keyLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ PRUint32 ctr)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_ChaCha20_CreateContext)(key, keyLen, nonce, nonceLen, ctr);
+}
+
+void
+ChaCha20_DestroyContext(ChaCha20Context *ctx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_ChaCha20_DestroyContext)(ctx, freeit);
+}
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_InitContext)(ctx, key, keyLen, tagLen);
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_ChaCha20Poly1305_CreateContext)(key, keyLen, tagLen);
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_ChaCha20Poly1305_DestroyContext)(ctx, freeit);
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Seal)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen);
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Open)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen);
+}
+
+SECStatus
+ChaCha20Poly1305_Encrypt(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen,
+ unsigned char *tagOut)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Encrypt)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen, tagOut);
+}
+
+SECStatus
+ChaCha20Poly1305_Decrypt(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen,
+ unsigned char *tagIn)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Decrypt)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen, tagIn);
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_GetPointSize)(params);
+}
+
+SECStatus
+BLAKE2B_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_Hash)(dest, src);
+}
+
+SECStatus
+BLAKE2B_HashBuf(unsigned char *output, const unsigned char *input, PRUint32 inlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_HashBuf)(output, input, inlen);
+}
+
+SECStatus
+BLAKE2B_MAC_HashBuf(unsigned char *output, const unsigned char *input,
+ unsigned int inlen, const unsigned char *key,
+ unsigned int keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_MAC_HashBuf)(output, input, inlen, key, keylen);
+}
+
+BLAKE2BContext *
+BLAKE2B_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return NULL;
+ }
+ return (vector->p_BLAKE2B_NewContext)();
+}
+
+void
+BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return;
+ }
+ (vector->p_BLAKE2B_DestroyContext)(ctx, freeit);
+}
+
+SECStatus
+BLAKE2B_Begin(BLAKE2BContext *ctx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_Begin)(ctx);
+}
+
+SECStatus
+BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key, const size_t keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_MAC_Begin)(ctx, key, keylen);
+}
+
+SECStatus
+BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in, unsigned int inlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_Update)(ctx, in, inlen);
+}
+
+SECStatus
+BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out,
+ unsigned int *digestLen, size_t maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_End)(ctx, out, digestLen, maxDigestLen);
+}
+
+unsigned int
+BLAKE2B_FlattenSize(BLAKE2BContext *ctx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return 0;
+ }
+ return (vector->p_BLAKE2B_FlattenSize)(ctx);
+}
+
+SECStatus
+BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return SECFailure;
+ }
+ return (vector->p_BLAKE2B_Flatten)(ctx, space);
+}
+
+BLAKE2BContext *
+BLAKE2B_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return NULL;
+ }
+ return (vector->p_BLAKE2B_Resurrect)(space, arg);
+}
+
+/* == New for CMAC == */
+SECStatus
+CMAC_Init(CMACContext *ctx, CMACCipher type, const unsigned char *key,
+ unsigned int key_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_CMAC_Init)(ctx, type, key, key_len);
+}
+
+CMACContext *
+CMAC_Create(CMACCipher type, const unsigned char *key, unsigned int key_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_CMAC_Create)(type, key, key_len);
+}
+
+SECStatus
+CMAC_Begin(CMACContext *ctx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_CMAC_Begin)(ctx);
+}
+
+SECStatus
+CMAC_Update(CMACContext *ctx, const unsigned char *data, unsigned int data_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_CMAC_Update)(ctx, data, data_len);
+}
+
+SECStatus
+CMAC_Finish(CMACContext *ctx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_CMAC_Finish)(ctx, result, result_len, max_result_len);
+}
+
+void
+CMAC_Destroy(CMACContext *ctx, PRBool free_it)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_CMAC_Destroy)(ctx, free_it);
+}
+
+/* ============== New for 3.0026 =============================== */
+
+SHA3_224Context *
+SHA3_224_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA3_224_NewContext)();
+}
+
+void
+SHA3_224_DestroyContext(SHA3_224Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_224_DestroyContext)(cx, freeit);
+}
+
+unsigned int
+SHA3_224_FlattenSize(SHA3_224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA3_224_FlattenSize)(cx);
+}
+
+void
+SHA3_224_Begin(SHA3_224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_224_Begin)(cx);
+}
+
+void
+SHA3_224_Update(SHA3_224Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_224_Update)(cx, input, inputLen);
+}
+
+void
+SHA3_224_End(SHA3_224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_224_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+SECStatus
+SHA3_224_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_224_Hash)(dest, src);
+}
+
+SECStatus
+SHA3_224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_224_HashBuf)(dest, src, src_length);
+}
+
+SHA3_256Context *
+SHA3_256_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA3_256_NewContext)();
+}
+
+void
+SHA3_256_DestroyContext(SHA3_256Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_256_DestroyContext)(cx, freeit);
+}
+
+unsigned int
+SHA3_256_FlattenSize(SHA3_256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA3_256_FlattenSize)(cx);
+}
+
+void
+SHA3_256_Begin(SHA3_256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_256_Begin)(cx);
+}
+
+void
+SHA3_256_Update(SHA3_256Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_256_Update)(cx, input, inputLen);
+}
+
+void
+SHA3_256_End(SHA3_256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_256_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+SECStatus
+SHA3_256_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_256_Hash)(dest, src);
+}
+
+SECStatus
+SHA3_256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_256_HashBuf)(dest, src, src_length);
+}
+
+SHA3_384Context *
+SHA3_384_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA3_384_NewContext)();
+}
+
+void
+SHA3_384_DestroyContext(SHA3_384Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_384_DestroyContext)(cx, freeit);
+}
+
+unsigned int
+SHA3_384_FlattenSize(SHA3_384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA3_384_FlattenSize)(cx);
+}
+
+void
+SHA3_384_Begin(SHA3_384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_384_Begin)(cx);
+}
+
+void
+SHA3_384_Update(SHA3_384Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_384_Update)(cx, input, inputLen);
+}
+
+void
+SHA3_384_End(SHA3_384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_384_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+SECStatus
+SHA3_384_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_384_Hash)(dest, src);
+}
+
+SECStatus
+SHA3_384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_384_HashBuf)(dest, src, src_length);
+}
+
+SHA3_512Context *
+SHA3_512_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA3_512_NewContext)();
+}
+
+void
+SHA3_512_DestroyContext(SHA3_512Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_512_DestroyContext)(cx, freeit);
+}
+
+unsigned int
+SHA3_512_FlattenSize(SHA3_512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA3_512_FlattenSize)(cx);
+}
+
+void
+SHA3_512_Begin(SHA3_512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_512_Begin)(cx);
+}
+
+void
+SHA3_512_Update(SHA3_512Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_512_Update)(cx, input, inputLen);
+}
+
+void
+SHA3_512_End(SHA3_512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA3_512_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+SECStatus
+SHA3_512_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_512_Hash)(dest, src);
+}
+
+SECStatus
+SHA3_512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA3_512_HashBuf)(dest, src, src_length);
+}
+
+SHAKE_128Context *
+SHAKE_128_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHAKE_128_NewContext)();
+}
+
+void
+SHAKE_128_DestroyContext(SHAKE_128Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_128_DestroyContext)(cx, freeit);
+}
+
+void
+SHAKE_128_Begin(SHAKE_128Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_128_Begin)(cx);
+}
+
+void
+SHAKE_128_Absorb(SHAKE_128Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_128_Absorb)(cx, input, inputLen);
+}
+
+void
+SHAKE_128_SqueezeEnd(SHAKE_128Context *cx, unsigned char *digest,
+ unsigned int digestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_128_SqueezeEnd)(cx, digest, digestLen);
+}
+
+SECStatus
+SHAKE_128_HashBuf(unsigned char *dest, PRUint32 dest_length, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHAKE_128_HashBuf)(dest, dest_length, src, src_length);
+}
+
+SECStatus
+SHAKE_128_Hash(unsigned char *dest, PRUint32 dest_length, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHAKE_128_Hash)(dest, dest_length, src);
+}
+
+SHAKE_256Context *
+SHAKE_256_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHAKE_256_NewContext)();
+}
+
+void
+SHAKE_256_DestroyContext(SHAKE_256Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_256_DestroyContext)(cx, freeit);
+}
+
+void
+SHAKE_256_Begin(SHAKE_256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_256_Begin)(cx);
+}
+
+void
+SHAKE_256_Absorb(SHAKE_256Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_256_Absorb)(cx, input, inputLen);
+}
+
+void
+SHAKE_256_SqueezeEnd(SHAKE_256Context *cx, unsigned char *digest,
+ unsigned int digestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHAKE_256_SqueezeEnd)(cx, digest, digestLen);
+}
+
+SECStatus
+SHAKE_256_HashBuf(unsigned char *dest, PRUint32 dest_length, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHAKE_256_HashBuf)(dest, dest_length, src, src_length);
+}
+
+SECStatus
+SHAKE_256_Hash(unsigned char *dest, PRUint32 dest_length, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHAKE_256_Hash)(dest, dest_length, src);
+}
+
+/* ============== New for 3.0027 =============================== */
+
+SECStatus
+Kyber_NewKey(KyberParams params, const SECItem *seed, SECItem *privKey, SECItem *pubKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Kyber_NewKey)(params, seed, privKey, pubKey);
+}
+
+SECStatus
+Kyber_Encapsulate(KyberParams params, const SECItem *seed, const SECItem *pubKey, SECItem *ciphertext, SECItem *secret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Kyber_Encapsulate)(params, seed, pubKey, ciphertext, secret);
+}
+
+SECStatus
+Kyber_Decapsulate(KyberParams params, const SECItem *privKey, const SECItem *ciphertext, SECItem *secret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Kyber_Decapsulate)(params, privKey, ciphertext, secret);
+}
diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h
new file mode 100644
index 0000000000..62159d88c7
--- /dev/null
+++ b/security/nss/lib/freebl/loader.h
@@ -0,0 +1,1028 @@
+/*
+ * loader.h - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _LOADER_H_
+#define _LOADER_H_ 1
+
+#include "blapi.h"
+
+#define FREEBL_VERSION 0x0327
+
+struct FREEBLVectorStr {
+
+ unsigned short length; /* of this struct in bytes */
+ unsigned short version; /* of this struct. */
+
+ RSAPrivateKey *(*p_RSA_NewKey)(int keySizeInBits,
+ SECItem *publicExponent);
+
+ SECStatus (*p_RSA_PublicKeyOp)(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_RSA_PrivateKeyOp)(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_DSA_NewKey)(const PQGParams *params,
+ DSAPrivateKey **privKey);
+
+ SECStatus (*p_DSA_SignDigest)(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_DSA_VerifyDigest)(DSAPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_DSA_NewKeyFromSeed)(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey);
+
+ SECStatus (*p_DSA_SignDigestWithSeed)(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed);
+
+ SECStatus (*p_DH_GenParam)(int primeLen, DHParams **params);
+
+ SECStatus (*p_DH_NewKey)(DHParams *params,
+ DHPrivateKey **privKey);
+
+ SECStatus (*p_DH_Derive)(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int maxOutBytes);
+
+ SECStatus (*p_KEA_Derive)(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret);
+
+ PRBool (*p_KEA_Verify)(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+ RC4Context *(*p_RC4_CreateContext)(const unsigned char *key, int len);
+
+ void (*p_RC4_DestroyContext)(RC4Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC4_Encrypt)(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC4_Decrypt)(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ RC2Context *(*p_RC2_CreateContext)(const unsigned char *key,
+ unsigned int len, const unsigned char *iv,
+ int mode, unsigned effectiveKeyLen);
+
+ void (*p_RC2_DestroyContext)(RC2Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC2_Encrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC2_Decrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ RC5Context *(*p_RC5_CreateContext)(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode);
+
+ void (*p_RC5_DestroyContext)(RC5Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC5_Encrypt)(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC5_Decrypt)(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ DESContext *(*p_DES_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+
+ void (*p_DES_DestroyContext)(DESContext *cx, PRBool freeit);
+
+ SECStatus (*p_DES_Encrypt)(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_DES_Decrypt)(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ AESContext *(*p_AES_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, int encrypt, unsigned int keylen,
+ unsigned int blocklen);
+
+ void (*p_AES_DestroyContext)(AESContext *cx, PRBool freeit);
+
+ SECStatus (*p_AES_Encrypt)(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_AES_Decrypt)(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_MD5_Hash)(unsigned char *dest, const char *src);
+
+ SECStatus (*p_MD5_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+ MD5Context *(*p_MD5_NewContext)(void);
+
+ void (*p_MD5_DestroyContext)(MD5Context *cx, PRBool freeit);
+
+ void (*p_MD5_Begin)(MD5Context *cx);
+
+ void (*p_MD5_Update)(MD5Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+ void (*p_MD5_End)(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ unsigned int (*p_MD5_FlattenSize)(MD5Context *cx);
+
+ SECStatus (*p_MD5_Flatten)(MD5Context *cx, unsigned char *space);
+
+ MD5Context *(*p_MD5_Resurrect)(unsigned char *space, void *arg);
+
+ void (*p_MD5_TraceState)(MD5Context *cx);
+
+ SECStatus (*p_MD2_Hash)(unsigned char *dest, const char *src);
+
+ MD2Context *(*p_MD2_NewContext)(void);
+
+ void (*p_MD2_DestroyContext)(MD2Context *cx, PRBool freeit);
+
+ void (*p_MD2_Begin)(MD2Context *cx);
+
+ void (*p_MD2_Update)(MD2Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+ void (*p_MD2_End)(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ unsigned int (*p_MD2_FlattenSize)(MD2Context *cx);
+
+ SECStatus (*p_MD2_Flatten)(MD2Context *cx, unsigned char *space);
+
+ MD2Context *(*p_MD2_Resurrect)(unsigned char *space, void *arg);
+
+ SECStatus (*p_SHA1_Hash)(unsigned char *dest, const char *src);
+
+ SECStatus (*p_SHA1_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+ SHA1Context *(*p_SHA1_NewContext)(void);
+
+ void (*p_SHA1_DestroyContext)(SHA1Context *cx, PRBool freeit);
+
+ void (*p_SHA1_Begin)(SHA1Context *cx);
+
+ void (*p_SHA1_Update)(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+
+ void (*p_SHA1_End)(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ void (*p_SHA1_TraceState)(SHA1Context *cx);
+
+ unsigned int (*p_SHA1_FlattenSize)(SHA1Context *cx);
+
+ SECStatus (*p_SHA1_Flatten)(SHA1Context *cx, unsigned char *space);
+
+ SHA1Context *(*p_SHA1_Resurrect)(unsigned char *space, void *arg);
+
+ SECStatus (*p_RNG_RNGInit)(void);
+
+ SECStatus (*p_RNG_RandomUpdate)(const void *data, size_t bytes);
+
+ SECStatus (*p_RNG_GenerateGlobalRandomBytes)(void *dest, size_t len);
+
+ void (*p_RNG_RNGShutdown)(void);
+
+ SECStatus (*p_PQG_ParamGen)(unsigned int j, PQGParams **pParams,
+ PQGVerify **pVfy);
+
+ SECStatus (*p_PQG_ParamGenSeedLen)(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy);
+
+ SECStatus (*p_PQG_VerifyParams)(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result);
+
+ /* Version 3.001 came to here */
+
+ SECStatus (*p_RSA_PrivateKeyOpDoubleChecked)(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_RSA_PrivateKeyCheck)(const RSAPrivateKey *key);
+
+ void (*p_BL_Cleanup)(void);
+
+ /* Version 3.002 came to here */
+
+ SHA256Context *(*p_SHA256_NewContext)(void);
+ void (*p_SHA256_DestroyContext)(SHA256Context *cx, PRBool freeit);
+ void (*p_SHA256_Begin)(SHA256Context *cx);
+ void (*p_SHA256_Update)(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA256_End)(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA256_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA256_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA256_TraceState)(SHA256Context *cx);
+ unsigned int (*p_SHA256_FlattenSize)(SHA256Context *cx);
+ SECStatus (*p_SHA256_Flatten)(SHA256Context *cx, unsigned char *space);
+ SHA256Context *(*p_SHA256_Resurrect)(unsigned char *space, void *arg);
+
+ SHA512Context *(*p_SHA512_NewContext)(void);
+ void (*p_SHA512_DestroyContext)(SHA512Context *cx, PRBool freeit);
+ void (*p_SHA512_Begin)(SHA512Context *cx);
+ void (*p_SHA512_Update)(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA512_End)(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA512_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA512_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA512_TraceState)(SHA512Context *cx);
+ unsigned int (*p_SHA512_FlattenSize)(SHA512Context *cx);
+ SECStatus (*p_SHA512_Flatten)(SHA512Context *cx, unsigned char *space);
+ SHA512Context *(*p_SHA512_Resurrect)(unsigned char *space, void *arg);
+
+ SHA384Context *(*p_SHA384_NewContext)(void);
+ void (*p_SHA384_DestroyContext)(SHA384Context *cx, PRBool freeit);
+ void (*p_SHA384_Begin)(SHA384Context *cx);
+ void (*p_SHA384_Update)(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA384_End)(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA384_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA384_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA384_TraceState)(SHA384Context *cx);
+ unsigned int (*p_SHA384_FlattenSize)(SHA384Context *cx);
+ SECStatus (*p_SHA384_Flatten)(SHA384Context *cx, unsigned char *space);
+ SHA384Context *(*p_SHA384_Resurrect)(unsigned char *space, void *arg);
+
+ /* Version 3.003 came to here */
+
+ AESKeyWrapContext *(*p_AESKeyWrap_CreateContext)(const unsigned char *key,
+ const unsigned char *iv, int encrypt, unsigned int keylen);
+
+ void (*p_AESKeyWrap_DestroyContext)(AESKeyWrapContext *cx, PRBool freeit);
+
+ SECStatus (*p_AESKeyWrap_Encrypt)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_AESKeyWrap_Decrypt)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ /* Version 3.004 came to here */
+
+ PRBool (*p_BLAPI_SHVerify)(const char *name, PRFuncPtr addr);
+ PRBool (*p_BLAPI_VerifySelf)(const char *name);
+
+ /* Version 3.005 came to here */
+
+ SECStatus (*p_EC_NewKey)(ECParams *params,
+ ECPrivateKey **privKey);
+
+ SECStatus (*p_EC_NewKeyFromSeed)(ECParams *params,
+ ECPrivateKey **privKey,
+ const unsigned char *seed,
+ int seedlen);
+
+ SECStatus (*p_EC_ValidatePublicKey)(ECParams *params,
+ SECItem *publicValue);
+
+ SECStatus (*p_ECDH_Derive)(SECItem *publicValue,
+ ECParams *params,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret);
+
+ SECStatus (*p_ECDSA_SignDigest)(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_ECDSA_VerifyDigest)(ECPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_ECDSA_SignDigestWithSeed)(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed,
+ const int seedlen);
+
+ /* Version 3.006 came to here */
+
+ /* no modification to FREEBLVectorStr itself
+ * but ECParamStr was modified
+ */
+
+ /* Version 3.007 came to here */
+
+ SECStatus (*p_AES_InitContext)(AESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+ SECStatus (*p_AESKeyWrap_InitContext)(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+ SECStatus (*p_DES_InitContext)(DESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+ SECStatus (*p_RC2_InitContext)(RC2Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int effectiveKeyLen,
+ unsigned int);
+ SECStatus (*p_RC4_InitContext)(RC4Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+
+ AESContext *(*p_AES_AllocateContext)(void);
+ AESKeyWrapContext *(*p_AESKeyWrap_AllocateContext)(void);
+ DESContext *(*p_DES_AllocateContext)(void);
+ RC2Context *(*p_RC2_AllocateContext)(void);
+ RC4Context *(*p_RC4_AllocateContext)(void);
+
+ void (*p_MD2_Clone)(MD2Context *dest, MD2Context *src);
+ void (*p_MD5_Clone)(MD5Context *dest, MD5Context *src);
+ void (*p_SHA1_Clone)(SHA1Context *dest, SHA1Context *src);
+ void (*p_SHA256_Clone)(SHA256Context *dest, SHA256Context *src);
+ void (*p_SHA384_Clone)(SHA384Context *dest, SHA384Context *src);
+ void (*p_SHA512_Clone)(SHA512Context *dest, SHA512Context *src);
+
+ SECStatus (*p_TLS_PRF)(const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS);
+
+ const SECHashObject *(*p_HASH_GetRawHashObject)(HASH_HashType hashType);
+
+ HMACContext *(*p_HMAC_Create)(const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+ SECStatus (*p_HMAC_Init)(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+ void (*p_HMAC_Begin)(HMACContext *cx);
+ void (*p_HMAC_Update)(HMACContext *cx, const unsigned char *data,
+ unsigned int data_len);
+ HMACContext *(*p_HMAC_Clone)(HMACContext *cx);
+ SECStatus (*p_HMAC_Finish)(HMACContext *cx, unsigned char *result,
+ unsigned int *result_len,
+ unsigned int max_result_len);
+ void (*p_HMAC_Destroy)(HMACContext *cx, PRBool freeit);
+
+ void (*p_RNG_SystemInfoForRNG)(void);
+
+ /* Version 3.008 came to here */
+
+ SECStatus (*p_FIPS186Change_GenerateX)(unsigned char *XKEY,
+ const unsigned char *XSEEDj,
+ unsigned char *x_j);
+ SECStatus (*p_FIPS186Change_ReduceModQForDSA)(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj);
+
+ /* Version 3.009 came to here */
+
+ SECStatus (*p_Camellia_InitContext)(CamelliaContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int unused);
+
+ CamelliaContext *(*p_Camellia_AllocateContext)(void);
+ CamelliaContext *(*p_Camellia_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen);
+ void (*p_Camellia_DestroyContext)(CamelliaContext *cx, PRBool freeit);
+
+ SECStatus (*p_Camellia_Encrypt)(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ SECStatus (*p_Camellia_Decrypt)(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ void (*p_PQG_DestroyParams)(PQGParams *params);
+
+ void (*p_PQG_DestroyVerify)(PQGVerify *vfy);
+
+ /* Version 3.010 came to here */
+
+ SECStatus (*p_SEED_InitContext)(SEEDContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+
+ SEEDContext *(*p_SEED_AllocateContext)(void);
+
+ SEEDContext *(*p_SEED_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+
+ void (*p_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit);
+
+ SECStatus (*p_SEED_Encrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_SEED_Decrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_BL_Init)(void);
+ void (*p_BL_SetForkState)(PRBool);
+
+ SECStatus (*p_PRNGTEST_Instantiate)(const PRUint8 *entropy,
+ unsigned int entropy_len,
+ const PRUint8 *nonce,
+ unsigned int nonce_len,
+ const PRUint8 *personal_string,
+ unsigned int ps_len);
+
+ SECStatus (*p_PRNGTEST_Reseed)(const PRUint8 *entropy,
+ unsigned int entropy_len,
+ const PRUint8 *additional,
+ unsigned int additional_len);
+
+ SECStatus (*p_PRNGTEST_Generate)(PRUint8 *bytes,
+ unsigned int bytes_len,
+ const PRUint8 *additional,
+ unsigned int additional_len);
+
+ SECStatus (*p_PRNGTEST_Uninstantiate)(void);
+ /* Version 3.011 came to here */
+
+ SECStatus (*p_RSA_PopulatePrivateKey)(RSAPrivateKey *key);
+
+ SECStatus (*p_DSA_NewRandom)(PLArenaPool *arena, const SECItem *q,
+ SECItem *seed);
+
+ SECStatus (*p_JPAKE_Sign)(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *x, const SECItem *testRandom,
+ const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r);
+
+ SECStatus (*p_JPAKE_Verify)(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r);
+
+ SECStatus (*p_JPAKE_Round2)(PLArenaPool *arena, const SECItem *p,
+ const SECItem *q, const SECItem *gx1,
+ const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2,
+ const SECItem *s, SECItem *x2s);
+
+ SECStatus (*p_JPAKE_Final)(PLArenaPool *arena, const SECItem *p,
+ const SECItem *q, const SECItem *x2,
+ const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K);
+
+ /* Version 3.012 came to here */
+
+ SECStatus (*p_TLS_P_hash)(HASH_HashType hashAlg,
+ const SECItem *secret,
+ const char *label,
+ SECItem *seed,
+ SECItem *result,
+ PRBool isFIPS);
+
+ SHA224Context *(*p_SHA224_NewContext)(void);
+ void (*p_SHA224_DestroyContext)(SHA224Context *cx, PRBool freeit);
+ void (*p_SHA224_Begin)(SHA224Context *cx);
+ void (*p_SHA224_Update)(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA224_End)(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA224_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA224_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA224_TraceState)(SHA224Context *cx);
+ unsigned int (*p_SHA224_FlattenSize)(SHA224Context *cx);
+ SECStatus (*p_SHA224_Flatten)(SHA224Context *cx, unsigned char *space);
+ SHA224Context *(*p_SHA224_Resurrect)(unsigned char *space, void *arg);
+ void (*p_SHA224_Clone)(SHA224Context *dest, SHA224Context *src);
+ PRBool (*p_BLAPI_SHVerifyFile)(const char *name);
+
+ /* Version 3.013 came to here */
+
+ SECStatus (*p_PQG_ParamGenV2)(unsigned int L, unsigned int N,
+ unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy);
+ SECStatus (*p_PRNGTEST_RunHealthTests)(void);
+
+ /* Version 3.014 came to here */
+
+ SECStatus (*p_HMAC_ConstantTime)(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+ SECStatus (*p_SSLv3_MAC_ConstantTime)(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+ /* Version 3.015 came to here */
+
+ SECStatus (*p_RSA_SignRaw)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSignRaw)(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+ SECStatus (*p_RSA_CheckSignRecoverRaw)(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+ SECStatus (*p_RSA_EncryptRaw)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptRaw)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_EncryptOAEP)(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptOAEP)(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_EncryptBlock)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptBlock)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_SignPSS)(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSignPSS)(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+ SECStatus (*p_RSA_Sign)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSign)(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+ SECStatus (*p_RSA_CheckSignRecover)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+ /* Version 3.016 came to here */
+
+ SECStatus (*p_EC_FillParams)(PLArenaPool *arena,
+ const SECItem *encodedParams, ECParams *params);
+ SECStatus (*p_EC_DecodeParams)(const SECItem *encodedParams,
+ ECParams **ecparams);
+ SECStatus (*p_EC_CopyParams)(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams);
+
+ /* Version 3.017 came to here */
+
+ SECStatus (*p_ChaCha20Poly1305_InitContext)(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ unsigned int tagLen);
+
+ ChaCha20Poly1305Context *(*p_ChaCha20Poly1305_CreateContext)(
+ const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+ void (*p_ChaCha20Poly1305_DestroyContext)(ChaCha20Poly1305Context *ctx,
+ PRBool freeit);
+
+ SECStatus (*p_ChaCha20Poly1305_Seal)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+ SECStatus (*p_ChaCha20Poly1305_Open)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+ /* Version 3.018 came to here */
+
+ int (*p_EC_GetPointSize)(const ECParams *);
+
+ /* Version 3.019 came to here */
+
+ SECStatus (*p_BLAKE2B_Hash)(unsigned char *dest, const char *src);
+ SECStatus (*p_BLAKE2B_HashBuf)(unsigned char *output,
+ const unsigned char *input, PRUint32 inlen);
+ SECStatus (*p_BLAKE2B_MAC_HashBuf)(unsigned char *output,
+ const unsigned char *input,
+ unsigned int inlen,
+ const unsigned char *key,
+ unsigned int keylen);
+ BLAKE2BContext *(*p_BLAKE2B_NewContext)();
+ void (*p_BLAKE2B_DestroyContext)(BLAKE2BContext *ctx, PRBool freeit);
+ SECStatus (*p_BLAKE2B_Begin)(BLAKE2BContext *ctx);
+ SECStatus (*p_BLAKE2B_MAC_Begin)(BLAKE2BContext *ctx, const PRUint8 *key,
+ const size_t keylen);
+ SECStatus (*p_BLAKE2B_Update)(BLAKE2BContext *ctx, const unsigned char *in,
+ unsigned int inlen);
+ SECStatus (*p_BLAKE2B_End)(BLAKE2BContext *ctx, unsigned char *out,
+ unsigned int *digestLen, size_t maxDigestLen);
+ unsigned int (*p_BLAKE2B_FlattenSize)(BLAKE2BContext *ctx);
+ SECStatus (*p_BLAKE2B_Flatten)(BLAKE2BContext *ctx, unsigned char *space);
+ BLAKE2BContext *(*p_BLAKE2B_Resurrect)(unsigned char *space, void *arg);
+
+ /* Version 3.020 came to here */
+
+ SECStatus (*p_ChaCha20_Xor)(unsigned char *output, const unsigned char *block,
+ unsigned int len, const unsigned char *k,
+ const unsigned char *nonce, PRUint32 ctr);
+
+ /* Version 3.021 came to here */
+
+ SECStatus (*p_CMAC_Init)(CMACContext *ctx, CMACCipher type,
+ const unsigned char *key, unsigned int key_len);
+ CMACContext *(*p_CMAC_Create)(CMACCipher type, const unsigned char *key,
+ unsigned int key_len);
+ SECStatus (*p_CMAC_Begin)(CMACContext *ctx);
+ SECStatus (*p_CMAC_Update)(CMACContext *ctx, const unsigned char *data,
+ unsigned int data_len);
+ SECStatus (*p_CMAC_Finish)(CMACContext *ctx, unsigned char *result,
+ unsigned int *result_len,
+ unsigned int max_result_len);
+ void (*p_CMAC_Destroy)(CMACContext *ctx, PRBool free_it);
+
+ /* Version 3.022 came to here */
+ SECStatus (*p_ChaCha20Poly1305_Encrypt)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen, unsigned char *tagOut);
+
+ SECStatus (*p_ChaCha20Poly1305_Decrypt)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen, unsigned char *tagIn);
+ SECStatus (*p_AES_AEAD)(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ void *params, unsigned int paramsLen,
+ const unsigned char *aad, unsigned int aadLen);
+ SECStatus (*p_AESKeyWrap_EncryptKWP)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ SECStatus (*p_AESKeyWrap_DecryptKWP)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ /* Version 3.023 came to here */
+
+ PRBool (*p_KEA_PrimeCheck)(SECItem *prime);
+ /* Version 3.024 came to here */
+
+ SECStatus (*p_ChaCha20_InitContext)(ChaCha20Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ const unsigned char *nonce,
+ unsigned int nonceLen,
+ PRUint32 ctr);
+
+ ChaCha20Context *(*p_ChaCha20_CreateContext)(const unsigned char *key,
+ unsigned int keyLen,
+ const unsigned char *nonce,
+ unsigned int nonceLen,
+ PRUint32 ctr);
+
+ void (*p_ChaCha20_DestroyContext)(ChaCha20Context *ctx, PRBool freeit);
+
+ /* Version 3.025 came to here */
+
+ SHA3_224Context *(*p_SHA3_224_NewContext)(void);
+ void (*p_SHA3_224_DestroyContext)(SHA3_224Context *cx, PRBool freeit);
+ unsigned int (*p_SHA3_224_FlattenSize)(SHA3_224Context *cx);
+ void (*p_SHA3_224_Begin)(SHA3_224Context *cx);
+ void (*p_SHA3_224_Update)(SHA3_224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA3_224_End)(SHA3_224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ SECStatus (*p_SHA3_224_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA3_224_Hash)(unsigned char *dest, const char *src);
+
+ SHA3_256Context *(*p_SHA3_256_NewContext)(void);
+ void (*p_SHA3_256_DestroyContext)(SHA3_256Context *cx, PRBool freeit);
+ unsigned int (*p_SHA3_256_FlattenSize)(SHA3_256Context *cx);
+ void (*p_SHA3_256_Begin)(SHA3_256Context *cx);
+ void (*p_SHA3_256_Update)(SHA3_256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA3_256_End)(SHA3_256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ SECStatus (*p_SHA3_256_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA3_256_Hash)(unsigned char *dest, const char *src);
+
+ SHA3_384Context *(*p_SHA3_384_NewContext)(void);
+ void (*p_SHA3_384_DestroyContext)(SHA3_384Context *cx, PRBool freeit);
+ unsigned int (*p_SHA3_384_FlattenSize)(SHA3_384Context *cx);
+ void (*p_SHA3_384_Begin)(SHA3_384Context *cx);
+ void (*p_SHA3_384_Update)(SHA3_384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA3_384_End)(SHA3_384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ SECStatus (*p_SHA3_384_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA3_384_Hash)(unsigned char *dest, const char *src);
+
+ SHA3_512Context *(*p_SHA3_512_NewContext)(void);
+ void (*p_SHA3_512_DestroyContext)(SHA3_512Context *cx, PRBool freeit);
+ unsigned int (*p_SHA3_512_FlattenSize)(SHA3_512Context *cx);
+ void (*p_SHA3_512_Begin)(SHA3_512Context *cx);
+ void (*p_SHA3_512_Update)(SHA3_512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA3_512_End)(SHA3_512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ SECStatus (*p_SHA3_512_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA3_512_Hash)(unsigned char *dest, const char *src);
+
+ SHAKE_128Context *(*p_SHAKE_128_NewContext)(void);
+ void (*p_SHAKE_128_DestroyContext)(SHAKE_128Context *cx, PRBool freeit);
+ void (*p_SHAKE_128_Begin)(SHAKE_128Context *cx);
+ void (*p_SHAKE_128_Absorb)(SHAKE_128Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHAKE_128_SqueezeEnd)(SHAKE_128Context *cx, unsigned char *digest,
+ unsigned int digestLen);
+
+ SECStatus (*p_SHAKE_128_HashBuf)(unsigned char *dest, PRUint32 dest_length,
+ const unsigned char *src, PRUint32 src_length);
+ SECStatus (*p_SHAKE_128_Hash)(unsigned char *dest, PRUint32 dest_length, const char *src);
+
+ SHAKE_256Context *(*p_SHAKE_256_NewContext)(void);
+ void (*p_SHAKE_256_DestroyContext)(SHAKE_256Context *cx, PRBool freeit);
+ void (*p_SHAKE_256_Begin)(SHAKE_256Context *cx);
+ void (*p_SHAKE_256_Absorb)(SHAKE_256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHAKE_256_SqueezeEnd)(SHAKE_256Context *cx, unsigned char *digest,
+ unsigned int digestLen);
+
+ SECStatus (*p_SHAKE_256_HashBuf)(unsigned char *dest, PRUint32 dest_length,
+ const unsigned char *src, PRUint32 src_length);
+ SECStatus (*p_SHAKE_256_Hash)(unsigned char *dest, PRUint32 dest_length, const char *src);
+
+ /* Version 3.026 came to here */
+
+ SECStatus (*p_Kyber_NewKey)(KyberParams params, const SECItem *seed, SECItem *privKey, SECItem *pubKey);
+
+ SECStatus (*p_Kyber_Encapsulate)(KyberParams params, const SECItem *seed, const SECItem *pubKey, SECItem *ciphertext, SECItem *secret);
+
+ SECStatus (*p_Kyber_Decapsulate)(KyberParams params, const SECItem *privKey, const SECItem *ciphertext, SECItem *secret);
+
+ /* Version 3.027 came to here */
+
+ /* Add new function pointers at the end of this struct and bump
+ * FREEBL_VERSION at the beginning of this file. */
+};
+
+typedef struct FREEBLVectorStr FREEBLVector;
+
+#ifdef FREEBL_LOWHASH
+#include "nsslowhash.h"
+
+#define NSSLOW_VERSION 0x0300
+
+struct NSSLOWVectorStr {
+ unsigned short length; /* of this struct in bytes */
+ unsigned short version; /* of this struct. */
+ const FREEBLVector *(*p_FREEBL_GetVector)(void);
+ NSSLOWInitContext *(*p_NSSLOW_Init)(void);
+ void (*p_NSSLOW_Shutdown)(NSSLOWInitContext *context);
+ void (*p_NSSLOW_Reset)(NSSLOWInitContext *context);
+ NSSLOWHASHContext *(*p_NSSLOWHASH_NewContext)(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType);
+ void (*p_NSSLOWHASH_Begin)(NSSLOWHASHContext *context);
+ void (*p_NSSLOWHASH_Update)(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len);
+ void (*p_NSSLOWHASH_End)(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len);
+ void (*p_NSSLOWHASH_Destroy)(NSSLOWHASHContext *context);
+ unsigned int (*p_NSSLOWHASH_Length)(NSSLOWHASHContext *context);
+};
+
+typedef struct NSSLOWVectorStr NSSLOWVector;
+#endif
+
+SEC_BEGIN_PROTOS
+
+#ifdef FREEBL_LOWHASH
+typedef const NSSLOWVector *NSSLOWGetVectorFn(void);
+
+extern NSSLOWGetVectorFn NSSLOW_GetVector;
+#endif
+
+typedef const FREEBLVector *FREEBLGetVectorFn(void);
+
+extern FREEBLGetVectorFn FREEBL_GetVector;
+
+SEC_END_PROTOS
+
+#endif
+
+#ifdef NSS_DISABLE_DEPRECATED_SEED
+typedef SECStatus (*F_SEED_InitContext)(SEEDContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+
+typedef SEEDContext *(*F_SEED_AllocateContext)(void);
+
+typedef SEEDContext *(*F_SEED_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+
+typedef void (*F_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit);
+
+typedef SECStatus (*F_SEED_Encrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+typedef SECStatus (*F_SEED_Decrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+#endif
+
+#ifdef NSS_DISABLE_DEPRECATED_RC2
+typedef RC2Context *(*F_RC2_CreateContext)(const unsigned char *key,
+ unsigned int len, const unsigned char *iv,
+ int mode, unsigned effectiveKeyLen);
+
+typedef void (*F_RC2_DestroyContext)(RC2Context *cx, PRBool freeit);
+
+typedef SECStatus (*F_RC2_Encrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+typedef SECStatus (*F_RC2_Decrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+typedef SECStatus (*F_RC2_InitContext)(RC2Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int effectiveKeyLen,
+ unsigned int);
+
+typedef RC2Context *(*F_RC2_AllocateContext)(void);
+#endif
+
+typedef SECStatus (*F_Kyber_NewKey)(KyberParams params, const SECItem *seed, SECItem *privKey, SECItem *pubKey);
+
+typedef SECStatus (*F_Kyber_Encapsulate)(KyberParams params, const SECItem *seed, const SECItem *pubKey, SECItem *ciphertext, SECItem *secret);
+
+typedef SECStatus (*F_Kyber_Decapsulate)(KyberParams params, const SECItem *privKey, const SECItem *ciphertext, SECItem *secret);
diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c
new file mode 100644
index 0000000000..be53bbdc62
--- /dev/null
+++ b/security/nss/lib/freebl/lowhash_vector.c
@@ -0,0 +1,224 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define _GNU_SOURCE 1
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+#include "nsslowhash.h"
+#include <dlfcn.h>
+#include "pratom.h"
+
+static PRLibrary *blLib;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const NSSLOWVector *vector;
+static const char *libraryName = NULL;
+
+/* pretty much only glibc uses this, make sure we don't have any depenencies
+ * on nspr.. */
+#undef PORT_Alloc
+#undef PORT_Free
+#define PORT_Alloc malloc
+#define PR_Malloc malloc
+#define PORT_Free free
+#define PR_Free free
+#define PR_GetDirectorySeparator() '/'
+#define PR_LoadLibraryWithFlags(libspec, flags) \
+ (PRLibrary *)dlopen(libSpec.value.pathname, RTLD_NOW | RTLD_LOCAL)
+#define PR_GetLibraryFilePathname(name, addr) \
+ freebl_lowhash_getLibraryFilePath(addr)
+
+static char *
+freebl_lowhash_getLibraryFilePath(void *addr)
+{
+ Dl_info dli;
+ if (dladdr(addr, &dli) == 0) {
+ return NULL;
+ }
+ return strdup(dli.dli_fname);
+}
+
+/*
+ * The PR_LoadLibraryWithFlags call above defines this variable away, so we
+ * don't need it..
+ */
+#ifdef nodef
+static const char *NameOfThisSharedLib =
+ SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/* determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+ PRLibrary *handle;
+ const char *name = getLibName();
+
+ if (!name) {
+ /*PR_SetError(PR_LOAD_LIBRARY_ERROR,0); */
+ return PR_FAILURE;
+ }
+ handle = loader_LoadLibrary(name);
+ if (handle) {
+ void *address = dlsym(handle, "NSSLOW_GetVector");
+ if (address) {
+ NSSLOWGetVectorFn *getVector = (NSSLOWGetVectorFn *)address;
+ const NSSLOWVector *dsoVector = getVector();
+ if (dsoVector) {
+ unsigned short dsoVersion = dsoVector->version;
+ unsigned short myVersion = NSSLOW_VERSION;
+ if (MSB(dsoVersion) == MSB(myVersion) &&
+ LSB(dsoVersion) >= LSB(myVersion) &&
+ dsoVector->length >= sizeof(NSSLOWVector)) {
+ vector = dsoVector;
+ libraryName = name;
+ blLib = handle;
+ return PR_SUCCESS;
+ }
+ }
+ }
+ (void)dlclose(handle);
+ }
+ return PR_FAILURE;
+}
+
+static PRCallOnceType loadFreeBLOnce;
+
+static void
+freebl_RunLoaderOnce(void)
+{
+ /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped
+ * down version. */
+ if (loadFreeBLOnce.initialized) {
+ return;
+ }
+ if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) {
+ loadFreeBLOnce.status = freebl_LoadDSO();
+ loadFreeBLOnce.initialized = 1;
+ } else {
+ /* shouldn't have a lot of takers on the else clause, which is good
+ * since we don't have condition variables yet.
+ * 'initialized' only ever gets set (not cleared) so we don't
+ * need the traditional locks. */
+ while (!loadFreeBLOnce.initialized) {
+ sleep(1); /* don't have condition variables, just give up the CPU */
+ }
+ }
+}
+
+static const NSSLOWVector *
+freebl_InitVector(void)
+{
+ if (!vector) {
+ freebl_RunLoaderOnce();
+ }
+ return vector;
+}
+
+const FREEBLVector *
+FREEBL_GetVector(void)
+{
+ if (freebl_InitVector()) {
+ return (vector->p_FREEBL_GetVector)();
+ }
+ return NULL;
+}
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+ if (freebl_InitVector()) {
+ return (vector->p_NSSLOW_Init)();
+ }
+ return NULL;
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOW_Shutdown)(context);
+ }
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOW_Reset)(context);
+ }
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType)
+{
+ if (freebl_InitVector()) {
+ return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType);
+ }
+ return NULL;
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOWHASH_Begin)(context);
+ }
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOWHASH_Update)(context, buf, len);
+ }
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOWHASH_End)(context, buf, ret, len);
+ }
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+ if (freebl_InitVector()) {
+ (vector->p_NSSLOWHASH_Destroy)(context);
+ }
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+ if (freebl_InitVector()) {
+ return (vector->p_NSSLOWHASH_Length)(context);
+ }
+ return -1;
+}
diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn
new file mode 100644
index 0000000000..a0e43ae52a
--- /dev/null
+++ b/security/nss/lib/freebl/manifest.mn
@@ -0,0 +1,212 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# NOTE: any ifdefs in this file must be defined on the gmake command line
+# (if anywhere). They cannot come from Makefile or config.mk
+
+CORE_DEPTH = ../..
+
+MODULE = nss
+
+# copied from Linux.mk. We have a chicken and egg issue here. We need to set
+# Library name before we call the platform code in coreconf, but we need to
+# Pick up the automatic setting of FREEBL_LOWHASH before we can set the
+# Library name... so for now we mimic the code in Linux.mk to get the
+# automatic setting early...
+#
+# On Linux 2.6 or later, build libfreebl3.so with no NSPR and libnssutil3.so
+# dependencies by default. Set FREEBL_NO_DEPEND to 0 in the environment to
+# override this.
+#
+#
+include $(CORE_DEPTH)/coreconf/arch.mk
+ifeq ($(OS_ARCH),Linux)
+ifneq ($(OS_TARGET),Android)
+ifeq (2.6,$(firstword $(sort 2.6 $(OS_RELEASE))))
+ifndef FREEBL_NO_DEPEND
+FREEBL_NO_DEPEND = 1
+FREEBL_LOWHASH = 1
+endif
+endif
+endif
+endif
+
+
+LIBRARY_NAME = freebl
+LIBRARY_VERSION = 3
+
+ifdef FREEBL_CHILD_BUILD
+ ifdef USE_ABI32_INT32
+ LIBRARY_NAME = freebl_32int
+ endif
+ ifdef USE_ABI32_INT64
+ LIBRARY_NAME = freebl_32int64
+ endif
+ ifdef USE_ABI32_FPU
+ LIBRARY_NAME = freebl_32fpu
+ endif
+ ifdef USE_ABI64_INT
+ LIBRARY_NAME = freebl_64int
+ endif
+ ifdef USE_ABI64_FPU
+ LIBRARY_NAME = freebl_64fpu
+ endif
+ ifdef FREEBL_LOWHASH
+ LIBRARY_NAME = freeblpriv
+ endif
+ ifdef USE_STUB_BUILD
+ # for the stub build, reset name to the default (from freeblpriv)
+ LIBRARY_NAME = freebl
+ endif
+endif
+
+# if the library name contains _, we prefix the version with _
+ifneq (,$(findstring _,$(LIBRARY_NAME)))
+ LIBRARY_VERSION := _$(LIBRARY_VERSION)
+endif
+
+MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def
+
+SOFTOKEN_LIBRARY_VERSION = 3
+
+DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \
+ -DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \
+ -DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\"
+
+# We only support one parameter set, Kyber768, which has K=3. If we decide
+# to support more parameters, we'll need to build separate objects from
+# kyber-pqcrystals-ref.c using different values of KYBER_K.
+DEFINES += -DKYBER_K=3
+
+REQUIRES =
+
+EXPORTS = \
+ blapit.h \
+ shsign.h \
+ ecl-exp.h \
+ $(LOWHASH_EXPORTS) \
+ $(NULL)
+
+PRIVATE_EXPORTS = \
+ cmac.h \
+ alghmac.h \
+ blake2b.h \
+ blapi.h \
+ chacha20poly1305.h \
+ hmacct.h \
+ secmpi.h \
+ secrng.h \
+ ec.h \
+ ecl.h \
+ ecl-curve.h \
+ eclt.h \
+ $(NULL)
+
+MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h
+MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c
+
+
+ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h
+ECL_SRCS = ecl.c ecl_mult.c ecl_gf.c \
+ ecp_aff.c ecp_jac.c ecp_mont.c \
+ ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \
+ ecp_256_32.c ecp_25519.c ecp_secp256r1.c ecp_secp384r1.c ecp_secp521r1.c \
+ ecp_secp384r1_wrap.c ecp_secp521r1_wrap.c
+SHA_SRCS = sha_fast.c
+MPCPU_SRCS = mpcpucache.c
+VERIFIED_SRCS = $(NULL)
+
+CSRCS = \
+ freeblver.c \
+ ldvector.c \
+ sysrand.c \
+ $(SHA_SRCS) \
+ md2.c \
+ md5.c \
+ sha512.c \
+ sha3.c \
+ shake.c \
+ cmac.c \
+ alghmac.c \
+ rawhash.c \
+ arcfour.c \
+ arcfive.c \
+ crypto_primitives.c \
+ blake2b.c \
+ desblapi.c \
+ des.c \
+ drbg.c \
+ chacha20poly1305.c \
+ cts.c \
+ ctr.c \
+ blinit.c \
+ fipsfreebl.c \
+ gcm.c \
+ hmacct.c \
+ rijndael.c \
+ aeskeywrap.c \
+ camellia.c \
+ dh.c \
+ ec.c \
+ ecdecode.c \
+ pqg.c \
+ dsa.c \
+ rsa.c \
+ rsa_blind.c \
+ rsapkcs.c \
+ shvfy.c \
+ tlsprfalg.c \
+ jpake.c \
+ secmpi.c \
+ kyber.c \
+ kyber-pqcrystals-ref.c \
+ $(MPI_SRCS) \
+ $(MPCPU_SRCS) \
+ $(ECL_SRCS) \
+ $(VERIFIED_SRCS) \
+ $(STUBS_SRCS) \
+ $(LOWHASH_SRCS) \
+ $(EXTRA_SRCS) \
+ $(NULL)
+
+ifndef NSS_DISABLE_DEPRECATED_SEED
+ CSRCS += deprecated/seed.c
+endif
+
+ifndef NSS_DISABLE_DEPRECATED_RC2
+ CSRCS += deprecated/alg2268.c
+endif
+
+ALL_CSRCS := $(CSRCS)
+
+ALL_HDRS = \
+ cmac.h \
+ alghmac.h \
+ blake2b.h \
+ blapi.h \
+ blapit.h \
+ des.h \
+ ec.h \
+ loader.h \
+ rijndael.h \
+ camellia.h \
+ secmpi.h \
+ sha_fast.h \
+ sha256.h \
+ shsign.h \
+ vis_proto.h \
+ seed.h \
+ $(NULL)
+
+
+ifdef AES_GEN_VAL
+DEFINES += -DRIJNDAEL_GENERATE_VALUES
+else
+ifdef AES_GEN_VAL_M
+DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
+else
+DEFINES += -DRIJNDAEL_INCLUDE_TABLES
+endif
+endif
diff --git a/security/nss/lib/freebl/md2.c b/security/nss/lib/freebl/md2.c
new file mode 100644
index 0000000000..cb3d3d82bc
--- /dev/null
+++ b/security/nss/lib/freebl/md2.c
@@ -0,0 +1,269 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+
+#include "blapi.h"
+
+#define MD2_DIGEST_LEN 16
+#define MD2_BUFSIZE 16
+#define MD2_X_SIZE 48 /* The X array, [CV | INPUT | TMP VARS] */
+#define MD2_CV 0 /* index into X for chaining variables */
+#define MD2_INPUT 16 /* index into X for input */
+#define MD2_TMPVARS 32 /* index into X for temporary variables */
+#define MD2_CHECKSUM_SIZE 16
+
+struct MD2ContextStr {
+ unsigned char checksum[MD2_BUFSIZE];
+ unsigned char X[MD2_X_SIZE];
+ PRUint8 unusedBuffer;
+};
+
+static const PRUint8 MD2S[256] = {
+ 0051, 0056, 0103, 0311, 0242, 0330, 0174, 0001,
+ 0075, 0066, 0124, 0241, 0354, 0360, 0006, 0023,
+ 0142, 0247, 0005, 0363, 0300, 0307, 0163, 0214,
+ 0230, 0223, 0053, 0331, 0274, 0114, 0202, 0312,
+ 0036, 0233, 0127, 0074, 0375, 0324, 0340, 0026,
+ 0147, 0102, 0157, 0030, 0212, 0027, 0345, 0022,
+ 0276, 0116, 0304, 0326, 0332, 0236, 0336, 0111,
+ 0240, 0373, 0365, 0216, 0273, 0057, 0356, 0172,
+ 0251, 0150, 0171, 0221, 0025, 0262, 0007, 0077,
+ 0224, 0302, 0020, 0211, 0013, 0042, 0137, 0041,
+ 0200, 0177, 0135, 0232, 0132, 0220, 0062, 0047,
+ 0065, 0076, 0314, 0347, 0277, 0367, 0227, 0003,
+ 0377, 0031, 0060, 0263, 0110, 0245, 0265, 0321,
+ 0327, 0136, 0222, 0052, 0254, 0126, 0252, 0306,
+ 0117, 0270, 0070, 0322, 0226, 0244, 0175, 0266,
+ 0166, 0374, 0153, 0342, 0234, 0164, 0004, 0361,
+ 0105, 0235, 0160, 0131, 0144, 0161, 0207, 0040,
+ 0206, 0133, 0317, 0145, 0346, 0055, 0250, 0002,
+ 0033, 0140, 0045, 0255, 0256, 0260, 0271, 0366,
+ 0034, 0106, 0141, 0151, 0064, 0100, 0176, 0017,
+ 0125, 0107, 0243, 0043, 0335, 0121, 0257, 0072,
+ 0303, 0134, 0371, 0316, 0272, 0305, 0352, 0046,
+ 0054, 0123, 0015, 0156, 0205, 0050, 0204, 0011,
+ 0323, 0337, 0315, 0364, 0101, 0201, 0115, 0122,
+ 0152, 0334, 0067, 0310, 0154, 0301, 0253, 0372,
+ 0044, 0341, 0173, 0010, 0014, 0275, 0261, 0112,
+ 0170, 0210, 0225, 0213, 0343, 0143, 0350, 0155,
+ 0351, 0313, 0325, 0376, 0073, 0000, 0035, 0071,
+ 0362, 0357, 0267, 0016, 0146, 0130, 0320, 0344,
+ 0246, 0167, 0162, 0370, 0353, 0165, 0113, 0012,
+ 0061, 0104, 0120, 0264, 0217, 0355, 0037, 0032,
+ 0333, 0231, 0215, 0063, 0237, 0021, 0203, 0024
+};
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+ unsigned int len;
+ MD2Context *cx = MD2_NewContext();
+ if (!cx) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return SECFailure;
+ }
+ MD2_Begin(cx);
+ MD2_Update(cx, (const unsigned char *)src, PORT_Strlen(src));
+ MD2_End(cx, dest, &len, MD2_DIGEST_LEN);
+ MD2_DestroyContext(cx, PR_TRUE);
+ return SECSuccess;
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+ MD2Context *cx = (MD2Context *)PORT_ZAlloc(sizeof(MD2Context));
+ if (cx == NULL) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return NULL;
+ }
+ return cx;
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+ if (freeit)
+ PORT_ZFree(cx, sizeof(*cx));
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+ memset(cx, 0, sizeof(*cx));
+ cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+static void
+md2_compress(MD2Context *cx)
+{
+ int j;
+ unsigned char P;
+ P = cx->checksum[MD2_CHECKSUM_SIZE - 1];
+/* Compute the running checksum, and set the tmp variables to be
+ * CV[i] XOR input[i]
+ */
+#define CKSUMFN(n) \
+ P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT + n] ^ P]; \
+ cx->checksum[n] = P; \
+ cx->X[MD2_TMPVARS + n] = cx->X[n] ^ cx->X[MD2_INPUT + n];
+ CKSUMFN(0);
+ CKSUMFN(1);
+ CKSUMFN(2);
+ CKSUMFN(3);
+ CKSUMFN(4);
+ CKSUMFN(5);
+ CKSUMFN(6);
+ CKSUMFN(7);
+ CKSUMFN(8);
+ CKSUMFN(9);
+ CKSUMFN(10);
+ CKSUMFN(11);
+ CKSUMFN(12);
+ CKSUMFN(13);
+ CKSUMFN(14);
+ CKSUMFN(15);
+/* The compression function. */
+#define COMPRESS(n) \
+ P = cx->X[n] ^ MD2S[P]; \
+ cx->X[n] = P;
+ P = 0x00;
+ for (j = 0; j < 18; j++) {
+ COMPRESS(0);
+ COMPRESS(1);
+ COMPRESS(2);
+ COMPRESS(3);
+ COMPRESS(4);
+ COMPRESS(5);
+ COMPRESS(6);
+ COMPRESS(7);
+ COMPRESS(8);
+ COMPRESS(9);
+ COMPRESS(10);
+ COMPRESS(11);
+ COMPRESS(12);
+ COMPRESS(13);
+ COMPRESS(14);
+ COMPRESS(15);
+ COMPRESS(16);
+ COMPRESS(17);
+ COMPRESS(18);
+ COMPRESS(19);
+ COMPRESS(20);
+ COMPRESS(21);
+ COMPRESS(22);
+ COMPRESS(23);
+ COMPRESS(24);
+ COMPRESS(25);
+ COMPRESS(26);
+ COMPRESS(27);
+ COMPRESS(28);
+ COMPRESS(29);
+ COMPRESS(30);
+ COMPRESS(31);
+ COMPRESS(32);
+ COMPRESS(33);
+ COMPRESS(34);
+ COMPRESS(35);
+ COMPRESS(36);
+ COMPRESS(37);
+ COMPRESS(38);
+ COMPRESS(39);
+ COMPRESS(40);
+ COMPRESS(41);
+ COMPRESS(42);
+ COMPRESS(43);
+ COMPRESS(44);
+ COMPRESS(45);
+ COMPRESS(46);
+ COMPRESS(47);
+ P = (P + j) % 256;
+ }
+ cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ PRUint32 bytesToConsume;
+
+ /* Fill the remaining input buffer. */
+ if (cx->unusedBuffer != MD2_BUFSIZE) {
+ bytesToConsume = PR_MIN(inputLen, cx->unusedBuffer);
+ memcpy(&cx->X[MD2_INPUT + (MD2_BUFSIZE - cx->unusedBuffer)],
+ input, bytesToConsume);
+ if (cx->unusedBuffer + bytesToConsume >= MD2_BUFSIZE)
+ md2_compress(cx);
+ inputLen -= bytesToConsume;
+ input += bytesToConsume;
+ }
+
+ /* Iterate over 16-byte chunks of the input. */
+ while (inputLen >= MD2_BUFSIZE) {
+ memcpy(&cx->X[MD2_INPUT], input, MD2_BUFSIZE);
+ md2_compress(cx);
+ inputLen -= MD2_BUFSIZE;
+ input += MD2_BUFSIZE;
+ }
+
+ /* Copy any input that remains into the buffer. */
+ if (inputLen)
+ memcpy(&cx->X[MD2_INPUT], input, inputLen);
+ cx->unusedBuffer = MD2_BUFSIZE - inputLen;
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint8 padStart;
+ if (maxDigestLen < MD2_BUFSIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+ padStart = MD2_BUFSIZE - cx->unusedBuffer;
+ memset(&cx->X[MD2_INPUT + padStart], cx->unusedBuffer,
+ cx->unusedBuffer);
+ md2_compress(cx);
+ memcpy(&cx->X[MD2_INPUT], cx->checksum, MD2_BUFSIZE);
+ md2_compress(cx);
+ *digestLen = MD2_DIGEST_LEN;
+ memcpy(digest, &cx->X[MD2_CV], MD2_DIGEST_LEN);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+ return sizeof(*cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+ memcpy(space, cx, sizeof(*cx));
+ return SECSuccess;
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+ MD2Context *cx = MD2_NewContext();
+ if (cx)
+ memcpy(cx, space, sizeof(*cx));
+ return cx;
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
diff --git a/security/nss/lib/freebl/md5.c b/security/nss/lib/freebl/md5.c
new file mode 100644
index 0000000000..bdd36a61bd
--- /dev/null
+++ b/security/nss/lib/freebl/md5.c
@@ -0,0 +1,598 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prlong.h"
+
+#include "blapi.h"
+#include "blapii.h"
+
+#define MD5_HASH_LEN 16
+#define MD5_BUFFER_SIZE 64
+#define MD5_END_BUFFER (MD5_BUFFER_SIZE - 8)
+
+#define CV0_1 0x67452301
+#define CV0_2 0xefcdab89
+#define CV0_3 0x98badcfe
+#define CV0_4 0x10325476
+
+#define T1_0 0xd76aa478
+#define T1_1 0xe8c7b756
+#define T1_2 0x242070db
+#define T1_3 0xc1bdceee
+#define T1_4 0xf57c0faf
+#define T1_5 0x4787c62a
+#define T1_6 0xa8304613
+#define T1_7 0xfd469501
+#define T1_8 0x698098d8
+#define T1_9 0x8b44f7af
+#define T1_10 0xffff5bb1
+#define T1_11 0x895cd7be
+#define T1_12 0x6b901122
+#define T1_13 0xfd987193
+#define T1_14 0xa679438e
+#define T1_15 0x49b40821
+
+#define T2_0 0xf61e2562
+#define T2_1 0xc040b340
+#define T2_2 0x265e5a51
+#define T2_3 0xe9b6c7aa
+#define T2_4 0xd62f105d
+#define T2_5 0x02441453
+#define T2_6 0xd8a1e681
+#define T2_7 0xe7d3fbc8
+#define T2_8 0x21e1cde6
+#define T2_9 0xc33707d6
+#define T2_10 0xf4d50d87
+#define T2_11 0x455a14ed
+#define T2_12 0xa9e3e905
+#define T2_13 0xfcefa3f8
+#define T2_14 0x676f02d9
+#define T2_15 0x8d2a4c8a
+
+#define T3_0 0xfffa3942
+#define T3_1 0x8771f681
+#define T3_2 0x6d9d6122
+#define T3_3 0xfde5380c
+#define T3_4 0xa4beea44
+#define T3_5 0x4bdecfa9
+#define T3_6 0xf6bb4b60
+#define T3_7 0xbebfbc70
+#define T3_8 0x289b7ec6
+#define T3_9 0xeaa127fa
+#define T3_10 0xd4ef3085
+#define T3_11 0x04881d05
+#define T3_12 0xd9d4d039
+#define T3_13 0xe6db99e5
+#define T3_14 0x1fa27cf8
+#define T3_15 0xc4ac5665
+
+#define T4_0 0xf4292244
+#define T4_1 0x432aff97
+#define T4_2 0xab9423a7
+#define T4_3 0xfc93a039
+#define T4_4 0x655b59c3
+#define T4_5 0x8f0ccc92
+#define T4_6 0xffeff47d
+#define T4_7 0x85845dd1
+#define T4_8 0x6fa87e4f
+#define T4_9 0xfe2ce6e0
+#define T4_10 0xa3014314
+#define T4_11 0x4e0811a1
+#define T4_12 0xf7537e82
+#define T4_13 0xbd3af235
+#define T4_14 0x2ad7d2bb
+#define T4_15 0xeb86d391
+
+#define R1B0 0
+#define R1B1 1
+#define R1B2 2
+#define R1B3 3
+#define R1B4 4
+#define R1B5 5
+#define R1B6 6
+#define R1B7 7
+#define R1B8 8
+#define R1B9 9
+#define R1B10 10
+#define R1B11 11
+#define R1B12 12
+#define R1B13 13
+#define R1B14 14
+#define R1B15 15
+
+#define R2B0 1
+#define R2B1 6
+#define R2B2 11
+#define R2B3 0
+#define R2B4 5
+#define R2B5 10
+#define R2B6 15
+#define R2B7 4
+#define R2B8 9
+#define R2B9 14
+#define R2B10 3
+#define R2B11 8
+#define R2B12 13
+#define R2B13 2
+#define R2B14 7
+#define R2B15 12
+
+#define R3B0 5
+#define R3B1 8
+#define R3B2 11
+#define R3B3 14
+#define R3B4 1
+#define R3B5 4
+#define R3B6 7
+#define R3B7 10
+#define R3B8 13
+#define R3B9 0
+#define R3B10 3
+#define R3B11 6
+#define R3B12 9
+#define R3B13 12
+#define R3B14 15
+#define R3B15 2
+
+#define R4B0 0
+#define R4B1 7
+#define R4B2 14
+#define R4B3 5
+#define R4B4 12
+#define R4B5 3
+#define R4B6 10
+#define R4B7 1
+#define R4B8 8
+#define R4B9 15
+#define R4B10 6
+#define R4B11 13
+#define R4B12 4
+#define R4B13 11
+#define R4B14 2
+#define R4B15 9
+
+#define S1_0 7
+#define S1_1 12
+#define S1_2 17
+#define S1_3 22
+
+#define S2_0 5
+#define S2_1 9
+#define S2_2 14
+#define S2_3 20
+
+#define S3_0 4
+#define S3_1 11
+#define S3_2 16
+#define S3_3 23
+
+#define S4_0 6
+#define S4_1 10
+#define S4_2 15
+#define S4_3 21
+
+struct MD5ContextStr {
+ PRUint32 lsbInput;
+ PRUint32 msbInput;
+ PRUint32 cv[4];
+ union {
+ PRUint8 b[64];
+ PRUint32 w[16];
+ } u;
+};
+
+#define inBuf u.b
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+ return MD5_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ unsigned int len;
+ MD5Context cx;
+
+ MD5_Begin(&cx);
+ MD5_Update(&cx, src, src_length);
+ MD5_End(&cx, dest, &len, MD5_HASH_LEN);
+ memset(&cx, 0, sizeof cx);
+ return SECSuccess;
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+ /* no need to ZAlloc, MD5_Begin will init the context */
+ MD5Context *cx = (MD5Context *)PORT_Alloc(sizeof(MD5Context));
+ if (cx == NULL) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return NULL;
+ }
+ return cx;
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+ cx->lsbInput = 0;
+ cx->msbInput = 0;
+ /* memset(cx->inBuf, 0, sizeof(cx->inBuf)); */
+ cx->cv[0] = CV0_1;
+ cx->cv[1] = CV0_2;
+ cx->cv[2] = CV0_3;
+ cx->cv[3] = CV0_4;
+}
+
+#define cls(i32, s) (tmp = i32, tmp << s | tmp >> (32 - s))
+
+#if defined(SOLARIS) || defined(HPUX)
+#define addto64(sumhigh, sumlow, addend) \
+ sumlow += addend; \
+ sumhigh += (sumlow < addend);
+#else
+#define addto64(sumhigh, sumlow, addend) \
+ sumlow += addend; \
+ if (sumlow < addend) \
+ ++sumhigh;
+#endif
+
+#define MASK 0x00ff00ff
+#ifdef IS_LITTLE_ENDIAN
+#define lendian(i32) \
+ (i32)
+#else
+#define lendian(i32) \
+ (tmp = (i32 >> 16) | (i32 << 16), ((tmp & MASK) << 8) | ((tmp >> 8) & MASK))
+#endif
+
+#ifndef IS_LITTLE_ENDIAN
+
+#define lebytes(b4) \
+ ((b4)[3] << 24 | (b4)[2] << 16 | (b4)[1] << 8 | (b4)[0])
+
+static void
+md5_prep_state_le(MD5Context *cx)
+{
+ PRUint32 tmp;
+ cx->u.w[0] = lendian(cx->u.w[0]);
+ cx->u.w[1] = lendian(cx->u.w[1]);
+ cx->u.w[2] = lendian(cx->u.w[2]);
+ cx->u.w[3] = lendian(cx->u.w[3]);
+ cx->u.w[4] = lendian(cx->u.w[4]);
+ cx->u.w[5] = lendian(cx->u.w[5]);
+ cx->u.w[6] = lendian(cx->u.w[6]);
+ cx->u.w[7] = lendian(cx->u.w[7]);
+ cx->u.w[8] = lendian(cx->u.w[8]);
+ cx->u.w[9] = lendian(cx->u.w[9]);
+ cx->u.w[10] = lendian(cx->u.w[10]);
+ cx->u.w[11] = lendian(cx->u.w[11]);
+ cx->u.w[12] = lendian(cx->u.w[12]);
+ cx->u.w[13] = lendian(cx->u.w[13]);
+ cx->u.w[14] = lendian(cx->u.w[14]);
+ cx->u.w[15] = lendian(cx->u.w[15]);
+}
+
+static void
+md5_prep_buffer_le(MD5Context *cx, const PRUint8 *beBuf)
+{
+ cx->u.w[0] = lebytes(&beBuf[0]);
+ cx->u.w[1] = lebytes(&beBuf[4]);
+ cx->u.w[2] = lebytes(&beBuf[8]);
+ cx->u.w[3] = lebytes(&beBuf[12]);
+ cx->u.w[4] = lebytes(&beBuf[16]);
+ cx->u.w[5] = lebytes(&beBuf[20]);
+ cx->u.w[6] = lebytes(&beBuf[24]);
+ cx->u.w[7] = lebytes(&beBuf[28]);
+ cx->u.w[8] = lebytes(&beBuf[32]);
+ cx->u.w[9] = lebytes(&beBuf[36]);
+ cx->u.w[10] = lebytes(&beBuf[40]);
+ cx->u.w[11] = lebytes(&beBuf[44]);
+ cx->u.w[12] = lebytes(&beBuf[48]);
+ cx->u.w[13] = lebytes(&beBuf[52]);
+ cx->u.w[14] = lebytes(&beBuf[56]);
+ cx->u.w[15] = lebytes(&beBuf[60]);
+}
+#endif
+
+#define F(X, Y, Z) \
+ ((X & Y) | ((~X) & Z))
+
+#define G(X, Y, Z) \
+ ((X & Z) | (Y & (~Z)))
+
+#define H(X, Y, Z) \
+ (X ^ Y ^ Z)
+
+#define I(X, Y, Z) \
+ (Y ^ (X | (~Z)))
+
+#define FF(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + F(b, c, d) + bufint + ti, s)
+
+#define GG(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + G(b, c, d) + bufint + ti, s)
+
+#define HH(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + H(b, c, d) + bufint + ti, s)
+
+#define II(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + I(b, c, d) + bufint + ti, s)
+
+static void NO_SANITIZE_ALIGNMENT
+md5_compress(MD5Context *cx, const PRUint32 *wBuf)
+{
+ PRUint32 a, b, c, d;
+ PRUint32 tmp;
+ a = cx->cv[0];
+ b = cx->cv[1];
+ c = cx->cv[2];
+ d = cx->cv[3];
+ FF(a, b, c, d, wBuf[R1B0], S1_0, T1_0);
+ FF(d, a, b, c, wBuf[R1B1], S1_1, T1_1);
+ FF(c, d, a, b, wBuf[R1B2], S1_2, T1_2);
+ FF(b, c, d, a, wBuf[R1B3], S1_3, T1_3);
+ FF(a, b, c, d, wBuf[R1B4], S1_0, T1_4);
+ FF(d, a, b, c, wBuf[R1B5], S1_1, T1_5);
+ FF(c, d, a, b, wBuf[R1B6], S1_2, T1_6);
+ FF(b, c, d, a, wBuf[R1B7], S1_3, T1_7);
+ FF(a, b, c, d, wBuf[R1B8], S1_0, T1_8);
+ FF(d, a, b, c, wBuf[R1B9], S1_1, T1_9);
+ FF(c, d, a, b, wBuf[R1B10], S1_2, T1_10);
+ FF(b, c, d, a, wBuf[R1B11], S1_3, T1_11);
+ FF(a, b, c, d, wBuf[R1B12], S1_0, T1_12);
+ FF(d, a, b, c, wBuf[R1B13], S1_1, T1_13);
+ FF(c, d, a, b, wBuf[R1B14], S1_2, T1_14);
+ FF(b, c, d, a, wBuf[R1B15], S1_3, T1_15);
+ GG(a, b, c, d, wBuf[R2B0], S2_0, T2_0);
+ GG(d, a, b, c, wBuf[R2B1], S2_1, T2_1);
+ GG(c, d, a, b, wBuf[R2B2], S2_2, T2_2);
+ GG(b, c, d, a, wBuf[R2B3], S2_3, T2_3);
+ GG(a, b, c, d, wBuf[R2B4], S2_0, T2_4);
+ GG(d, a, b, c, wBuf[R2B5], S2_1, T2_5);
+ GG(c, d, a, b, wBuf[R2B6], S2_2, T2_6);
+ GG(b, c, d, a, wBuf[R2B7], S2_3, T2_7);
+ GG(a, b, c, d, wBuf[R2B8], S2_0, T2_8);
+ GG(d, a, b, c, wBuf[R2B9], S2_1, T2_9);
+ GG(c, d, a, b, wBuf[R2B10], S2_2, T2_10);
+ GG(b, c, d, a, wBuf[R2B11], S2_3, T2_11);
+ GG(a, b, c, d, wBuf[R2B12], S2_0, T2_12);
+ GG(d, a, b, c, wBuf[R2B13], S2_1, T2_13);
+ GG(c, d, a, b, wBuf[R2B14], S2_2, T2_14);
+ GG(b, c, d, a, wBuf[R2B15], S2_3, T2_15);
+ HH(a, b, c, d, wBuf[R3B0], S3_0, T3_0);
+ HH(d, a, b, c, wBuf[R3B1], S3_1, T3_1);
+ HH(c, d, a, b, wBuf[R3B2], S3_2, T3_2);
+ HH(b, c, d, a, wBuf[R3B3], S3_3, T3_3);
+ HH(a, b, c, d, wBuf[R3B4], S3_0, T3_4);
+ HH(d, a, b, c, wBuf[R3B5], S3_1, T3_5);
+ HH(c, d, a, b, wBuf[R3B6], S3_2, T3_6);
+ HH(b, c, d, a, wBuf[R3B7], S3_3, T3_7);
+ HH(a, b, c, d, wBuf[R3B8], S3_0, T3_8);
+ HH(d, a, b, c, wBuf[R3B9], S3_1, T3_9);
+ HH(c, d, a, b, wBuf[R3B10], S3_2, T3_10);
+ HH(b, c, d, a, wBuf[R3B11], S3_3, T3_11);
+ HH(a, b, c, d, wBuf[R3B12], S3_0, T3_12);
+ HH(d, a, b, c, wBuf[R3B13], S3_1, T3_13);
+ HH(c, d, a, b, wBuf[R3B14], S3_2, T3_14);
+ HH(b, c, d, a, wBuf[R3B15], S3_3, T3_15);
+ II(a, b, c, d, wBuf[R4B0], S4_0, T4_0);
+ II(d, a, b, c, wBuf[R4B1], S4_1, T4_1);
+ II(c, d, a, b, wBuf[R4B2], S4_2, T4_2);
+ II(b, c, d, a, wBuf[R4B3], S4_3, T4_3);
+ II(a, b, c, d, wBuf[R4B4], S4_0, T4_4);
+ II(d, a, b, c, wBuf[R4B5], S4_1, T4_5);
+ II(c, d, a, b, wBuf[R4B6], S4_2, T4_6);
+ II(b, c, d, a, wBuf[R4B7], S4_3, T4_7);
+ II(a, b, c, d, wBuf[R4B8], S4_0, T4_8);
+ II(d, a, b, c, wBuf[R4B9], S4_1, T4_9);
+ II(c, d, a, b, wBuf[R4B10], S4_2, T4_10);
+ II(b, c, d, a, wBuf[R4B11], S4_3, T4_11);
+ II(a, b, c, d, wBuf[R4B12], S4_0, T4_12);
+ II(d, a, b, c, wBuf[R4B13], S4_1, T4_13);
+ II(c, d, a, b, wBuf[R4B14], S4_2, T4_14);
+ II(b, c, d, a, wBuf[R4B15], S4_3, T4_15);
+ cx->cv[0] += a;
+ cx->cv[1] += b;
+ cx->cv[2] += c;
+ cx->cv[3] += d;
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ PRUint32 bytesToConsume;
+ PRUint32 inBufIndex = cx->lsbInput & 63;
+ const PRUint32 *wBuf;
+
+ /* Add the number of input bytes to the 64-bit input counter. */
+ addto64(cx->msbInput, cx->lsbInput, inputLen);
+ if (inBufIndex) {
+ /* There is already data in the buffer. Fill with input. */
+ bytesToConsume = PR_MIN(inputLen, MD5_BUFFER_SIZE - inBufIndex);
+ memcpy(&cx->inBuf[inBufIndex], input, bytesToConsume);
+ if (inBufIndex + bytesToConsume >= MD5_BUFFER_SIZE) {
+/* The buffer is filled. Run the compression function. */
+#ifndef IS_LITTLE_ENDIAN
+ md5_prep_state_le(cx);
+#endif
+ md5_compress(cx, cx->u.w);
+ }
+ /* Remaining input. */
+ inputLen -= bytesToConsume;
+ input += bytesToConsume;
+ }
+
+ /* Iterate over 64-byte chunks of the message. */
+ while (inputLen >= MD5_BUFFER_SIZE) {
+#ifdef IS_LITTLE_ENDIAN
+#ifdef HAVE_UNALIGNED_ACCESS
+ /* x86 can handle arithmetic on non-word-aligned buffers */
+ wBuf = (PRUint32 *)input;
+#else
+ if ((ptrdiff_t)input & 0x3) {
+ /* buffer not aligned, copy it to force alignment */
+ memcpy(cx->inBuf, input, MD5_BUFFER_SIZE);
+ wBuf = cx->u.w;
+ } else {
+ /* buffer is aligned */
+ wBuf = (PRUint32 *)input;
+ }
+#endif
+#else
+ md5_prep_buffer_le(cx, input);
+ wBuf = cx->u.w;
+#endif
+ md5_compress(cx, wBuf);
+ inputLen -= MD5_BUFFER_SIZE;
+ input += MD5_BUFFER_SIZE;
+ }
+
+ /* Tail of message (message bytes mod 64). */
+ if (inputLen)
+ memcpy(cx->inBuf, input, inputLen);
+}
+
+static const unsigned char padbytes[] = {
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+ PRUint32 tmp;
+#endif
+ PRUint32 lowInput, highInput;
+ PRUint32 inBufIndex = cx->lsbInput & 63;
+
+ if (maxDigestLen < MD5_HASH_LEN) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+
+ /* Copy out the length of bits input before padding. */
+ lowInput = cx->lsbInput;
+ highInput = (cx->msbInput << 3) | (lowInput >> 29);
+ lowInput <<= 3;
+
+ if (inBufIndex < MD5_END_BUFFER) {
+ MD5_Update(cx, padbytes, MD5_END_BUFFER - inBufIndex);
+ } else {
+ MD5_Update(cx, padbytes,
+ MD5_END_BUFFER + MD5_BUFFER_SIZE - inBufIndex);
+ }
+
+ /* Store the number of bytes input (before padding) in final 64 bits. */
+ cx->u.w[14] = lendian(lowInput);
+ cx->u.w[15] = lendian(highInput);
+
+/* Final call to compress. */
+#ifndef IS_LITTLE_ENDIAN
+ md5_prep_state_le(cx);
+#endif
+ md5_compress(cx, cx->u.w);
+
+ /* Copy the resulting values out of the chain variables into return buf. */
+ if (digestLen)
+ *digestLen = MD5_HASH_LEN;
+#ifndef IS_LITTLE_ENDIAN
+ cx->cv[0] = lendian(cx->cv[0]);
+ cx->cv[1] = lendian(cx->cv[1]);
+ cx->cv[2] = lendian(cx->cv[2]);
+ cx->cv[3] = lendian(cx->cv[3]);
+#endif
+ memcpy(digest, cx->cv, MD5_HASH_LEN);
+}
+
+void
+MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+ PRUint32 tmp;
+#endif
+ PRUint32 cv[4];
+
+ if (maxDigestLen < MD5_HASH_LEN) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+
+ memcpy(cv, cx->cv, sizeof(cv));
+#ifndef IS_LITTLE_ENDIAN
+ cv[0] = lendian(cv[0]);
+ cv[1] = lendian(cv[1]);
+ cv[2] = lendian(cv[2]);
+ cv[3] = lendian(cv[3]);
+#endif
+ memcpy(digest, cv, MD5_HASH_LEN);
+ if (digestLen)
+ *digestLen = MD5_HASH_LEN;
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+ return sizeof(*cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+ memcpy(space, cx, sizeof(*cx));
+ return SECSuccess;
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+ MD5Context *cx = MD5_NewContext();
+ if (cx)
+ memcpy(cx, space, sizeof(*cx));
+ return cx;
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/mknewpc2.c b/security/nss/lib/freebl/mknewpc2.c
new file mode 100644
index 0000000000..6b29688163
--- /dev/null
+++ b/security/nss/lib/freebl/mknewpc2.c
@@ -0,0 +1,208 @@
+/*
+ * mknewpc2.c
+ *
+ * Generate PC-2 tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define DES_ENCRYPT 0
+#define DES_DECRYPT 1
+
+/* two 28-bit registers defined in key schedule production process */
+static HALF C0, D0;
+
+static HALF L0, R0;
+
+/* key schedule, 16 internal keys, each with 8 6-bit parts */
+static BYTE KS[8][16];
+
+/*
+ * This table takes the 56 bits in C0 and D0 and shows show they are
+ * permuted into the 8 6-bit parts of the key in the key schedule.
+ * The bits of C0 are numbered left to right, 1-28.
+ * The bits of D0 are numbered left to right, 29-56.
+ * Zeros in this table represent bits that are always zero.
+ * Note that all the bits in the first 4 rows come from C0,
+ * and all the bits in the second 4 rows come from D0.
+ */
+static const BYTE PC2[64] = {
+ 14, 17, 11, 24, 1, 5, 0, 0, /* S1 */
+ 3, 28, 15, 6, 21, 10, 0, 0, /* S2 */
+ 23, 19, 12, 4, 26, 8, 0, 0, /* S3 */
+ 16, 7, 27, 20, 13, 2, 0, 0, /* S4 */
+
+ 41, 52, 31, 37, 47, 55, 0, 0, /* S5 */
+ 30, 40, 51, 45, 33, 48, 0, 0, /* S6 */
+ 44, 49, 39, 56, 34, 53, 0, 0, /* S7 */
+ 46, 42, 50, 36, 29, 32, 0, 0 /* S8 */
+};
+
+/* This table represents the same info as PC2, except that
+ * The bits of C0 and D0 are each numbered right to left, 0-27.
+ * -1 values indicate bits that are always zero.
+ * As before all the bits in the first 4 rows come from C0,
+ * and all the bits in the second 4 rows come from D0.
+ */
+static signed char PC2a[64] = {
+ /* bits of C0 */
+ 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+ 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */
+ 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */
+ 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */
+ /* bits of D0 */
+ 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */
+ 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */
+ 12, 7, 17, 0, 22, 3, -1, -1, /* S7 */
+ 10, 14, 6, 20, 27, 24, -1, -1 /* S8 */
+};
+
+/* This table represents the same info as PC2a, except that
+ * The order of of the rows has been changed to increase the efficiency
+ * with which the key sechedule is created.
+ * Fewer shifts and ANDs are required to make the KS from these.
+ */
+static const signed char PC2b[64] = {
+ /* bits of C0 */
+ 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+ 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */
+ 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */
+ 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */
+ /* bits of D0 */
+ 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */
+ 10, 14, 6, 20, 27, 24, -1, -1, /* S8 */
+ 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */
+ 12, 7, 17, 0, 22, 3, -1, -1 /* S7 */
+};
+
+/* Only 24 of the 28 bits in C0 and D0 are used in PC2.
+ * The used bits of C0 and D0 are grouped into 4 groups of 6,
+ * so that the PC2 permutation can be accomplished with 4 lookups
+ * in tables of 64 entries.
+ * The following table shows how the bits of C0 and D0 are grouped
+ * into indexes for the respective table lookups.
+ * Bits are numbered right-to-left, 0-27, as in PC2b.
+ */
+static BYTE NDX[48] = {
+ /* Bits of C0 */
+ 27, 26, 25, 24, 23, 22, /* C0 table 0 */
+ 18, 17, 16, 15, 14, 13, /* C0 table 1 */
+ 9, 8, 7, 2, 1, 0, /* C0 table 2 */
+ 5, 4, 21, 20, 12, 11, /* C0 table 3 */
+ /* bits of D0 */
+ 27, 26, 25, 24, 23, 22, /* D0 table 0 */
+ 20, 19, 17, 16, 15, 14, /* D0 table 1 */
+ 12, 11, 10, 9, 8, 7, /* D0 table 2 */
+ 6, 5, 4, 3, 1, 0 /* D0 table 3 */
+};
+
+/* Here's the code that does that grouping.
+ left = PC2LOOKUP(0, 0, ((c0 >> 22) & 0x3F) );
+ left |= PC2LOOKUP(0, 1, ((c0 >> 13) & 0x3F) );
+ left |= PC2LOOKUP(0, 2, ((c0 >> 4) & 0x38) | (c0 & 0x7) );
+ left |= PC2LOOKUP(0, 3, ((c0>>18)&0xC) | ((c0>>11)&0x3) | (c0&0x30));
+
+ right = PC2LOOKUP(1, 0, ((d0 >> 22) & 0x3F) );
+ right |= PC2LOOKUP(1, 1, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf) );
+ right |= PC2LOOKUP(1, 2, ((d0 >> 7) & 0x3F) );
+ right |= PC2LOOKUP(1, 3, ((d0 >> 1) & 0x3C) | (d0 & 0x3));
+*/
+
+void
+make_pc2a(void)
+{
+
+ int i, j;
+
+ for (i = 0; i < 64; ++i) {
+ j = PC2[i];
+ if (j == 0)
+ j = -1;
+ else if (j < 29)
+ j = 28 - j;
+ else
+ j = 56 - j;
+ PC2a[i] = j;
+ }
+ for (i = 0; i < 64; i += 8) {
+ printf("%3d,%3d,%3d,%3d,%3d,%3d,%3d,%3d,\n",
+ PC2a[i + 0], PC2a[i + 1], PC2a[i + 2], PC2a[i + 3],
+ PC2a[i + 4], PC2a[i + 5], PC2a[i + 6], PC2a[i + 7]);
+ }
+}
+
+HALF PC2cd0[64];
+
+HALF PC_2H[8][64];
+
+void
+mktable()
+{
+ int i;
+ int table;
+ const BYTE* ndx = NDX;
+ HALF mask;
+
+ mask = 0x80000000;
+ for (i = 0; i < 32; ++i, mask >>= 1) {
+ int bit = PC2b[i];
+ if (bit < 0)
+ continue;
+ PC2cd0[bit + 32] = mask;
+ }
+
+ mask = 0x80000000;
+ for (i = 32; i < 64; ++i, mask >>= 1) {
+ int bit = PC2b[i];
+ if (bit < 0)
+ continue;
+ PC2cd0[bit] = mask;
+ }
+
+#if DEBUG
+ for (i = 0; i < 64; ++i) {
+ printf("0x%08x,\n", PC2cd0[i]);
+ }
+#endif
+ for (i = 0; i < 24; ++i) {
+ NDX[i] += 32; /* because c0 is the upper half */
+ }
+
+ for (table = 0; table < 8; ++table) {
+ HALF bitvals[6];
+ for (i = 0; i < 6; ++i) {
+ bitvals[5 - i] = PC2cd0[*ndx++];
+ }
+ for (i = 0; i < 64; ++i) {
+ int j;
+ int k = 0;
+ HALF value = 0;
+
+ for (j = i; j; j >>= 1, ++k) {
+ if (j & 1) {
+ value |= bitvals[k];
+ }
+ }
+ PC_2H[table][i] = value;
+ }
+ printf("/* table %d */ {\n", table);
+ for (i = 0; i < 64; i += 4) {
+ printf(" 0x%08x, 0x%08x, 0x%08x, 0x%08x, \n",
+ PC_2H[table][i], PC_2H[table][i + 1],
+ PC_2H[table][i + 2], PC_2H[table][i + 3]);
+ }
+ printf(" },\n");
+ }
+}
+
+int
+main(void)
+{
+ /* make_pc2a(); */
+ mktable();
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mksp.c b/security/nss/lib/freebl/mksp.c
new file mode 100644
index 0000000000..ca83ac8e7c
--- /dev/null
+++ b/security/nss/lib/freebl/mksp.c
@@ -0,0 +1,119 @@
+/*
+ * mksp.c
+ *
+ * Generate SP tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+
+/*
+ * sboxes - the tables for the s-box functions
+ * from FIPS 46, pages 15-16.
+ */
+unsigned char S[8][64] = {
+ /* Func S1 = */
+ { 14, 0, 4, 15, 13, 7, 1, 4, 2, 14, 15, 2, 11, 13, 8, 1,
+ 3, 10, 10, 6, 6, 12, 12, 11, 5, 9, 9, 5, 0, 3, 7, 8,
+ 4, 15, 1, 12, 14, 8, 8, 2, 13, 4, 6, 9, 2, 1, 11, 7,
+ 15, 5, 12, 11, 9, 3, 7, 14, 3, 10, 10, 0, 5, 6, 0, 13 },
+ /* Func S2 = */
+ { 15, 3, 1, 13, 8, 4, 14, 7, 6, 15, 11, 2, 3, 8, 4, 14,
+ 9, 12, 7, 0, 2, 1, 13, 10, 12, 6, 0, 9, 5, 11, 10, 5,
+ 0, 13, 14, 8, 7, 10, 11, 1, 10, 3, 4, 15, 13, 4, 1, 2,
+ 5, 11, 8, 6, 12, 7, 6, 12, 9, 0, 3, 5, 2, 14, 15, 9 },
+ /* Func S3 = */
+ { 10, 13, 0, 7, 9, 0, 14, 9, 6, 3, 3, 4, 15, 6, 5, 10,
+ 1, 2, 13, 8, 12, 5, 7, 14, 11, 12, 4, 11, 2, 15, 8, 1,
+ 13, 1, 6, 10, 4, 13, 9, 0, 8, 6, 15, 9, 3, 8, 0, 7,
+ 11, 4, 1, 15, 2, 14, 12, 3, 5, 11, 10, 5, 14, 2, 7, 12 },
+ /* Func S4 = */
+ { 7, 13, 13, 8, 14, 11, 3, 5, 0, 6, 6, 15, 9, 0, 10, 3,
+ 1, 4, 2, 7, 8, 2, 5, 12, 11, 1, 12, 10, 4, 14, 15, 9,
+ 10, 3, 6, 15, 9, 0, 0, 6, 12, 10, 11, 1, 7, 13, 13, 8,
+ 15, 9, 1, 4, 3, 5, 14, 11, 5, 12, 2, 7, 8, 2, 4, 14 },
+ /* Func S5 = */
+ { 2, 14, 12, 11, 4, 2, 1, 12, 7, 4, 10, 7, 11, 13, 6, 1,
+ 8, 5, 5, 0, 3, 15, 15, 10, 13, 3, 0, 9, 14, 8, 9, 6,
+ 4, 11, 2, 8, 1, 12, 11, 7, 10, 1, 13, 14, 7, 2, 8, 13,
+ 15, 6, 9, 15, 12, 0, 5, 9, 6, 10, 3, 4, 0, 5, 14, 3 },
+ /* Func S6 = */
+ { 12, 10, 1, 15, 10, 4, 15, 2, 9, 7, 2, 12, 6, 9, 8, 5,
+ 0, 6, 13, 1, 3, 13, 4, 14, 14, 0, 7, 11, 5, 3, 11, 8,
+ 9, 4, 14, 3, 15, 2, 5, 12, 2, 9, 8, 5, 12, 15, 3, 10,
+ 7, 11, 0, 14, 4, 1, 10, 7, 1, 6, 13, 0, 11, 8, 6, 13 },
+ /* Func S7 = */
+ { 4, 13, 11, 0, 2, 11, 14, 7, 15, 4, 0, 9, 8, 1, 13, 10,
+ 3, 14, 12, 3, 9, 5, 7, 12, 5, 2, 10, 15, 6, 8, 1, 6,
+ 1, 6, 4, 11, 11, 13, 13, 8, 12, 1, 3, 4, 7, 10, 14, 7,
+ 10, 9, 15, 5, 6, 0, 8, 15, 0, 14, 5, 2, 9, 3, 2, 12 },
+ /* Func S8 = */
+ { 13, 1, 2, 15, 8, 13, 4, 8, 6, 10, 15, 3, 11, 7, 1, 4,
+ 10, 12, 9, 5, 3, 6, 14, 11, 5, 0, 0, 14, 12, 9, 7, 2,
+ 7, 2, 11, 1, 4, 14, 1, 7, 9, 4, 12, 10, 14, 8, 2, 13,
+ 0, 15, 6, 12, 10, 9, 13, 0, 15, 3, 3, 5, 5, 6, 8, 11 }
+};
+
+/*
+ * Permutation function for results from s-boxes
+ * from FIPS 46 pages 12 and 16.
+ * P =
+ */
+unsigned char P[32] = {
+ 16, 7, 20, 21, 29, 12, 28, 17,
+ 1, 15, 23, 26, 5, 18, 31, 10,
+ 2, 8, 24, 14, 32, 27, 3, 9,
+ 19, 13, 30, 6, 22, 11, 4, 25
+};
+
+unsigned int Pinv[32];
+unsigned int SP[8][64];
+
+void
+makePinv(void)
+{
+ int i;
+ unsigned int Pi = 0x80000000;
+ for (i = 0; i < 32; ++i) {
+ int j = 32 - P[i];
+ Pinv[j] = Pi;
+ Pi >>= 1;
+ }
+}
+
+void
+makeSP(void)
+{
+ int box;
+ for (box = 0; box < 8; ++box) {
+ int item;
+ printf("/* box S%d */ {\n", box + 1);
+ for (item = 0; item < 64; ++item) {
+ unsigned int s = S[box][item];
+ unsigned int val = 0;
+ unsigned int bitnum = (7 - box) * 4;
+ for (; s; s >>= 1, ++bitnum) {
+ if (s & 1) {
+ val |= Pinv[bitnum];
+ }
+ }
+ val = (val << 3) | (val >> 29);
+ SP[box][item] = val;
+ }
+ for (item = 0; item < 64; item += 4) {
+ printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
+ SP[box][item], SP[box][item + 1], SP[box][item + 2], SP[box][item + 3]);
+ }
+ printf(" },\n");
+ }
+}
+
+int
+main()
+{
+ makePinv();
+ makeSP();
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README
new file mode 100644
index 0000000000..a49aa9d8d7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/README
@@ -0,0 +1,646 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+About the MPI Library
+---------------------
+
+The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
+signed integer arithmetic package. The implementation is not the most
+efficient possible, but the code is small and should be fairly easily
+portable to just about any machine that supports an ANSI C compiler,
+as long as it is capable of at least 16-bit arithmetic (but also see
+below for more on this).
+
+This library was written with an eye to cryptographic applications;
+thus, some care is taken to make sure that temporary values are not
+left lying around in memory when they are no longer in use. This adds
+some overhead for zeroing buffers before they are released back into
+the free pool; however, it gives you the assurance that there is only
+one copy of your important values residing in your process's address
+space at a time. Obviously, it is difficult to guarantee anything, in
+a pre-emptive multitasking environment, but this at least helps you
+keep a lid on the more obvious ways your data can get spread around in
+memory.
+
+
+Using the Library
+-----------------
+
+To use the MPI library in your program, you must include the header:
+
+#include "mpi.h"
+
+This header provides all the type and function declarations you'll
+need to use the library. Almost all the names defined by the library
+begin with the prefix 'mp_', so it should be easy to keep them from
+clashing with your program's namespace (he says, glibly, knowing full
+well there are always pathological cases).
+
+There are a few things you may want to configure about the library.
+By default, the MPI library uses an unsigned short for its digit type,
+and an unsigned int for its word type. The word type must be big
+enough to contain at least two digits, for the primitive arithmetic to
+work out. On my machine, a short is 2 bytes and an int is 4 bytes --
+but if you have 64-bit ints, you might want to use a 4-byte digit and
+an 8-byte word. I have tested the library using 1-byte digits and
+2-byte words, as well. Whatever you choose to do, the things you need
+to change are:
+
+(1) The type definitions for mp_digit and mp_word.
+
+(2) The macro DIGIT_FMT which tells mp_print() how to display a
+ single digit. This is just a printf() format string, so you
+ can adjust it appropriately.
+
+(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the
+ largest value expressible in an mp_digit and an mp_word,
+ respectively.
+
+Both the mp_digit and mp_word should be UNSIGNED integer types. The
+code relies on having the full positive precision of the type used for
+digits and words.
+
+The remaining type definitions should be left alone, for the most
+part. The code in the library does not make any significant
+assumptions about the sizes of things, but there is little if any
+reason to change the other parameters, so I would recommend you leave
+them as you found them.
+
+
+Conventions
+-----------
+
+Most functions in the library return a value of type mp_err. This
+permits the library to communicate success or various kinds of failure
+to the calling program. The return values currently defined are:
+
+ MP_OKAY - okay, operation succeeded, all's well
+ MP_YES - okay, the answer is yes (same as MP_OKAY)
+ MP_NO - okay, but answer is no (not MP_OKAY)
+ MP_MEM - operation ran out of memory
+ MP_RANGE - input parameter was out of range
+ MP_BADARG - an invalid input parameter was provided
+ MP_UNDEF - no output value is defined for this input
+
+The only function which currently uses MP_UNDEF is mp_invmod().
+Division by zero is undefined, but the division functions will return
+MP_RANGE for a zero divisor. MP_BADARG usually means you passed a
+bogus mp_int structure to the function. MP_YES and MP_NO are not used
+by the library itself; they're defined so you can use them in your own
+extensions.
+
+If you need a readable interpretation of these error codes in your
+program, you may also use the mp_strerror() function. This function
+takes an mp_err as input, and returns a pointer to a human-readable
+string describing the meaning of the error. These strings are stored
+as constants within the library, so the caller should not attempt to
+modify or free the memory associated with these strings.
+
+The library represents values in signed-magnitude format. Values
+strictly less than zero are negative, all others are considered
+positive (zero is positive by fiat). You can access the 'sign' member
+of the mp_int structure directly, but better is to use the mp_cmp_z()
+function, to find out which side of zero the value lies on.
+
+Most arithmetic functions have a single-digit variant, as well as the
+full arbitrary-precision. An mp_digit is an unsigned value between 0
+and DIGIT_MAX inclusive. The radix is available as RADIX. The number
+of bits in a given digit is given as DIGIT_BIT.
+
+Generally, input parameters are given before output parameters.
+Unless otherwise specified, any input parameter can be re-used as an
+output parameter, without confusing anything.
+
+The basic numeric type defined by the library is an mp_int. Virtually
+all the functions in the library take a pointer to an mp_int as one of
+their parameters. An explanation of how to create and use these
+structures follows. And so, without further ado...
+
+
+Initialization and Cleanup
+--------------------------
+
+The basic numeric type defined by the library is an 'mp_int'.
+However, it is not sufficient to simply declare a variable of type
+mp_int in your program. These variables also need to be initialized
+before they can be used, to allocate the internal storage they require
+for computation.
+
+This is done using one of the following functions:
+
+ mp_init(mp_int *mp);
+ mp_init_copy(mp_int *mp, mp_int *from);
+ mp_init_size(mp_int *mp, mp_size p);
+
+Each of these requires a pointer to a structure of type mp_int. The
+basic mp_init() simply initializes the mp_int to a default size, and
+sets its value to zero. If you would like to initialize a copy of an
+existing mp_int, use mp_init_copy(), where the 'from' parameter is the
+mp_int you'd like to make a copy of. The third function,
+mp_init_size(), permits you to specify how many digits of precision
+should be preallocated for your mp_int. This can help the library
+avoid unnecessary re-allocations later on.
+
+The default precision used by mp_init() can be retrieved using:
+
+ precision = mp_get_prec();
+
+This returns the number of digits that will be allocated. You can
+change this value by using:
+
+ mp_set_prec(unsigned int prec);
+
+Any positive value is acceptable -- if you pass zero, the default
+precision will be re-set to the compiled-in library default (this is
+specified in the header file 'mpi-config.h', and typically defaults to
+8 or 16).
+
+Just as you must allocate an mp_int before you can use it, you must
+clean up the structure when you are done with it. This is performed
+using the mp_clear() function. Remember that any mp_int that you
+create as a local variable in a function must be mp_clear()'d before
+that function exits, or else the memory allocated to that mp_int will
+be orphaned and unrecoverable.
+
+To set an mp_int to a given value, the following functions are given:
+
+ mp_set(mp_int *mp, mp_digit d);
+ mp_set_int(mp_int *mp, long z);
+ mp_set_ulong(mp_int *mp, unsigned long z);
+
+The mp_set() function sets the mp_int to a single digit value, while
+mp_set_int() sets the mp_int to a signed long integer value.
+
+To set an mp_int to zero, use:
+
+ mp_zero(mp_int *mp);
+
+
+Copying and Moving
+------------------
+
+If you have two initialized mp_int's, and you want to copy the value
+of one into the other, use:
+
+ mp_copy(from, to)
+
+This takes care of clearing the old value of 'to', and copies the new
+value into it. If 'to' is not yet initialized, use mp_init_copy()
+instead (see above).
+
+Note: The library tries, whenever possible, to avoid allocating
+---- new memory. Thus, mp_copy() tries first to satisfy the needs
+ of the copy by re-using the memory already allocated to 'to'.
+ Only if this proves insufficient will mp_copy() actually
+ allocate new memory.
+
+ For this reason, if you know a priori that 'to' has enough
+ available space to hold 'from', you don't need to check the
+ return value of mp_copy() for memory failure. The USED()
+ macro tells you how many digits are used by an mp_int, and
+ the ALLOC() macro tells you how many are allocated.
+
+If you have two initialized mp_int's, and you want to exchange their
+values, use:
+
+ mp_exch(a, b)
+
+This is better than using mp_copy() with a temporary, since it will
+not (ever) touch the memory allocator -- it just swaps the exact
+contents of the two structures. The mp_exch() function cannot fail;
+if you pass it an invalid structure, it just ignores it, and does
+nothing.
+
+
+Basic Arithmetic
+----------------
+
+Once you have initialized your integers, you can operate on them. The
+basic arithmetic functions on full mp_int values are:
+
+mp_add(a, b, c) - computes c = a + b
+mp_sub(a, b, c) - computes c = a - b
+mp_mul(a, b, c) - computes c = a * b
+mp_sqr(a, b) - computes b = a * a
+mp_div(a, b, q, r) - computes q, r such that a = bq + r
+mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d
+mp_expt(a, b, c) - computes c = a ** b
+mp_2expt(a, k) - computes a = 2^k
+
+The mp_div_2d() function efficiently computes division by powers of
+two. Either the q or r parameter may be NULL, in which case that
+portion of the computation will be discarded.
+
+The algorithms used for some of the computations here are described in
+the following files which are included with this distribution:
+
+mul.txt Describes the multiplication algorithm
+div.txt Describes the division algorithm
+expt.txt Describes the exponentiation algorithm
+sqrt.txt Describes the square-root algorithm
+square.txt Describes the squaring algorithm
+
+There are single-digit versions of most of these routines, as well.
+In the following prototypes, 'd' is a single mp_digit:
+
+mp_add_d(a, d, c) - computes c = a + d
+mp_sub_d(a, d, c) - computes c = a - d
+mp_mul_d(a, d, c) - computes c = a * d
+mp_mul_2(a, c) - computes c = a * 2
+mp_div_d(a, d, q, r) - computes q, r such that a = bq + r
+mp_div_2(a, c) - computes c = a / 2
+mp_expt_d(a, d, c) - computes c = a ** d
+
+The mp_mul_2() and mp_div_2() functions take advantage of the internal
+representation of an mp_int to do multiplication by two more quickly
+than mp_mul_d() would. Other basic functions of an arithmetic variety
+include:
+
+mp_zero(a) - assign 0 to a
+mp_neg(a, c) - negate a: c = -a
+mp_abs(a, c) - absolute value: c = |a|
+
+
+Comparisons
+-----------
+
+Several comparison functions are provided. Each of these, unless
+otherwise specified, returns zero if the comparands are equal, < 0 if
+the first is less than the second, and > 0 if the first is greater
+than the second:
+
+mp_cmp_z(a) - compare a <=> 0
+mp_cmp_d(a, d) - compare a <=> d, d is a single digit
+mp_cmp(a, b) - compare a <=> b
+mp_cmp_mag(a, b) - compare |a| <=> |b|
+mp_isodd(a) - return nonzero if odd, zero otherwise
+mp_iseven(a) - return nonzero if even, zero otherwise
+
+
+Modular Arithmetic
+------------------
+
+Modular variations of the basic arithmetic functions are also
+supported. These are available if the MP_MODARITH parameter in
+mpi-config.h is turned on (it is by default). The modular arithmetic
+functions are:
+
+mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m
+mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below)
+mp_addmod(a, b, m, c) - compute c = (a + b) mod m
+mp_submod(a, b, m, c) - compute c = (a - b) mod m
+mp_mulmod(a, b, m, c) - compute c = (a * b) mod m
+mp_sqrmod(a, m, c) - compute c = (a * a) mod m
+mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m
+mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
+
+The mp_sqr() function squares its input argument. A call to mp_sqr(a,
+c) is identical in meaning to mp_mul(a, a, c); however, if the
+MP_SQUARE variable is set true in mpi-config.h (see below), then it
+will be implemented with a different algorithm, that is supposed to
+take advantage of the redundant computation that takes place during
+squaring. Unfortunately, some compilers result in worse performance
+on this code, so you can change the behaviour at will. There is a
+utility program "mulsqr.c" that lets you test which does better on
+your system.
+
+The mp_sqrmod() function is analogous to the mp_sqr() function; it
+uses the mp_sqr() function rather than mp_mul(), and then performs the
+modular reduction. This probably won't help much unless you are doing
+a lot of them.
+
+See the file 'square.txt' for a synopsis of the algorithm used.
+
+Note: The mp_mod_d() function computes a modular reduction around
+---- a single digit d. The result is a single digit c.
+
+Because an inverse is defined for a (mod m) if and only if (a, m) = 1
+(that is, if a and m are relatively prime), mp_invmod() may not be
+able to compute an inverse for the arguments. In this case, it
+returns the value MP_UNDEF, and does not modify c. If an inverse is
+defined, however, it returns MP_OKAY, and sets c to the value of the
+inverse (mod m).
+
+See the file 'redux.txt' for a description of the modular reduction
+algorithm used by mp_exptmod().
+
+
+Greatest Common Divisor
+-----------------------
+
+If The greates common divisor of two values can be found using one of the
+following functions:
+
+mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm
+mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b)
+mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b)
+
+Also provided is a function to compute modular inverses, if they
+exist:
+
+mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists
+
+The function mp_xgcd() computes the greatest common divisor, and also
+returns values of x and y satisfying Bezout's identity. This is used
+by mp_invmod() to find modular inverses. However, if you do not need
+these values, you will find that mp_gcd() is MUCH more efficient,
+since it doesn't need all the intermediate values that mp_xgcd()
+requires in order to compute x and y.
+
+The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
+algorithm due to Josef Stein.
+
+
+Input & Output Functions
+------------------------
+
+The following basic I/O routines are provided. These are present at
+all times:
+
+mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int
+mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int
+mp_radix_size(mp, r) - return length of buffer needed by mp_toradix()
+mp_raw_size(mp) - return length of buffer needed by mp_toraw()
+mp_toradix(mp, str, r) - convert an mp_int to a string of radix r
+ digits
+mp_toraw(mp, str) - convert an mp_int to a string of bytes
+mp_tovalue(ch, r) - convert ch to its value when taken as
+ a radix r digit, or -1 if invalid
+mp_strerror(err) - get a string describing mp_err value 'err'
+
+If you compile the MPI library with MP_IOFUNC defined, you will also
+have access to the following additional I/O function:
+
+mp_print(mp, ofp) - print an mp_int as text to output stream ofp
+
+Note that mp_radix_size() returns a size in bytes guaranteed to be AT
+LEAST big enough for the digits output by mp_toradix(). Because it
+uses an approximation technique to figure out how many digits will be
+needed, it may return a figure which is larger than necessary. Thus,
+the caller should not rely on the value to determine how many bytes
+will actually be written by mp_toradix(). The string mp_toradix()
+creates will be NUL terminated, so the standard C library function
+strlen() should be able to ascertain this for you, if you need it.
+
+The mp_read_radix() and mp_toradix() functions support bases from 2 to
+64 inclusive. If you require more general radix conversion facilities
+than this, you will need to write them yourself (that's why mp_div_d()
+is provided, after all).
+
+Note: mp_read_radix() will accept as digits either capital or
+---- lower-case letters. However, the current implementation of
+ mp_toradix() only outputs upper-case letters, when writing
+ bases betwee 10 and 36. The underlying code supports using
+ lower-case letters, but the interface stub does not have a
+ selector for it. You can add one yourself if you think it
+ is worthwhile -- I do not. Bases from 36 to 64 use lower-
+ case letters as distinct from upper-case. Bases 63 and
+ 64 use the characters '+' and '/' as digits.
+
+ Note also that compiling with MP_IOFUNC defined will cause
+ inclusion of <stdio.h>, so if you are trying to write code
+ which does not depend on the standard C library, you will
+ probably want to avoid this option. This is needed because
+ the mp_print() function takes a standard library FILE * as
+ one of its parameters, and uses the fprintf() function.
+
+The mp_toraw() function converts the integer to a sequence of bytes,
+in big-endian ordering (most-significant byte first). Assuming your
+bytes are 8 bits wide, this corresponds to base 256. The sign is
+encoded as a single leading byte, whose value is 0 for zero or
+positive values, or 1 for negative values. The mp_read_raw() function
+reverses this process -- it takes a buffer of bytes, interprets the
+first as a sign indicator (0 = zero/positive, nonzero = negative), and
+the rest as a sequence of 1-byte digits in big-endian ordering.
+
+The mp_raw_size() function returns the exact number of bytes required
+to store the given integer in "raw" format (as described in the
+previous paragraph). Zero is returned in case of error; a valid
+integer will require at least three bytes of storage.
+
+In previous versions of the MPI library, an "external representation
+format" was supported. This was removed, however, because I found I
+was never using it, it was not as portable as I would have liked, and
+I decided it was a waste of space.
+
+
+Other Functions
+---------------
+
+The files 'mpprime.h' and 'mpprime.c' define some routines which are
+useful for divisibility testing and probabilistic primality testing.
+The routines defined are:
+
+mpp_divis(a, b) - is a divisible by b?
+mpp_divis_d(a, d) - is a divisible by digit d?
+mpp_random(a) - set a to random value at current precision
+mpp_random_size(a, prec) - set a to random value at given precision
+
+Note: The mpp_random() and mpp_random_size() functions use the C
+---- library's rand() function to generate random values. It is
+ up to the caller to seed this generator before it is called.
+ These functions are not suitable for generating quantities
+ requiring cryptographic-quality randomness; they are intended
+ primarily for use in primality testing.
+
+ Note too that the MPI library does not call srand(), so your
+ application should do this, if you ever want the sequence
+ to change.
+
+mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits
+ in v? If so, let w be the index of
+ that digit
+
+mpp_divis_primes(a, np) - is a divisible by any of the first np
+ primes? If so, set np to the prime
+ which divided a.
+
+mpp_fermat(a, d) - test if w^a = w (mod a). If so,
+ returns MP_YES, otherwise MP_NO.
+
+mpp_pprime(a, nt) - perform nt iterations of the Rabin-
+ Miller probabilistic primality test
+ on a. Returns MP_YES if all tests
+ passed, or MP_NO if any test fails.
+
+The mpp_fermat() function works based on Fermat's little theorem, a
+consequence of which is that if p is a prime, and (w, p) = 1, then:
+
+ w^p = w (mod p)
+
+Put another way, if w^p != w (mod p), then p is not prime. The test
+is expensive to compute, but it helps to quickly eliminate an enormous
+class of composite numbers prior to Rabin-Miller testing.
+
+Building the Library
+--------------------
+
+The MPI library is designed to be as self-contained as possible. You
+should be able to compile it with your favourite ANSI C compiler, and
+link it into your program directly. If you are on a Unix system using
+the GNU C compiler (gcc), the following should work:
+
+% gcc -ansi -pedantic -Wall -O2 -c mpi.c
+
+The file 'mpi-config.h' defines several configurable parameters for
+the library, which you can adjust to suit your application. At the
+time of this writing, the available options are:
+
+MP_IOFUNC - Define true to include the mp_print() function,
+ which is moderately useful for debugging. This
+ implicitly includes <stdio.h>.
+
+MP_MODARITH - Define true to include the modular arithmetic
+ functions. If you don't need modular arithmetic
+ in your application, you can set this to zero to
+ leave out all the modular routines.
+
+MP_LOGTAB - If true, the file "logtab.h" is included, which
+ is basically a static table of base 2 logarithms.
+ These are used to compute how big the buffers for
+ radix conversion need to be. If you set this false,
+ the library includes <math.h> and uses log(). This
+ typically forces you to link against math libraries.
+
+
+MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument
+ checking macro, ARGCHK(), gets expanded. If this
+ is set to zero, ARGCHK() expands to nothing; no
+ argument checks are performed. If this is 1, the
+ ARGCHK() macro expands to code that returns MP_BADARG
+ or similar at runtime. If it is 2, ARGCHK() expands
+ to an assert() call that aborts the program on a
+ bad input.
+
+MP_DEBUG - Turns on debugging output. This is probably not at
+ all useful unless you are debugging the library. It
+ tends to spit out a LOT of output.
+
+MP_DEFPREC - The default precision of a newly-created mp_int, in
+ digits. The precision can be changed at runtime by
+ the mp_set_prec() function, but this is its initial
+ value.
+
+MP_SQUARE - If this is set to a nonzero value, the mp_sqr()
+ function will use an alternate algorithm that takes
+ advantage of the redundant inner product computation
+ when both multiplicands are identical. Unfortunately,
+ with some compilers this is actually SLOWER than just
+ calling mp_mul() with the same argument twice. So
+ if you set MP_SQUARE to zero, mp_sqr() will be expan-
+ ded into a call to mp_mul(). This applies to all
+ the uses of mp_sqr(), including mp_sqrmod() and the
+ internal calls to s_mp_sqr() inside mpi.c
+
+ The program 'mulsqr' (mulsqr.c) can be used to test
+ which works best for your configuration. Set up the
+ CC and CFLAGS variables in the Makefile, then type:
+
+ make mulsqr
+
+ Invoke it with arguments similar to the following:
+
+ mulsqr 25000 1024
+
+ That is, 25000 products computed on 1024-bit values.
+ The output will compare the two timings, and recommend
+ a setting for MP_SQUARE. It is off by default.
+
+If you would like to use the mp_print() function (see above), be sure
+to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the
+'tests' subdirectory expect this to be defined (although the test
+driver 'mpi-test' doesn't need it)
+
+The Makefile which comes with the library should take care of building
+the library for you, if you have set the CC and CFLAGS variables at
+the top of the file appropriately. By default, they are set up to
+use the GNU C compiler:
+
+CC=gcc
+CFLAGS=-ansi -pedantic -Wall -O2
+
+If all goes well, the library should compile without warnings using
+this combination. You should, of course, make whatever adjustments
+you find necessary.
+
+The MPI library distribution comes with several additional programs
+which are intended to demonstrate the use of the library, and provide
+a framework for testing it. There are a handful of test driver
+programs, in the files named 'mptest-X.c', where X is a digit. Also,
+there are some simple command-line utilities (in the 'utils'
+directory) for manipulating large numbers. These include:
+
+basecvt.c A radix-conversion program, supporting bases from
+ 2 to 64 inclusive.
+
+bbsrand.c A BBS (quadratic residue) pseudo-random number
+ generator. The file 'bbsrand.c' is just the driver
+ for the program; the real code lives in the files
+ 'bbs_rand.h' and 'bbs_rand.c'
+
+dec2hex.c Converts decimal to hexadecimal
+
+gcd.c Computes the greatest common divisor of two values.
+ If invoked as 'xgcd', also computes constants x and
+ y such that (a, b) = ax + by, in accordance with
+ Bezout's identity.
+
+hex2dec.c Converts hexadecimal to decimal
+
+invmod.c Computes modular inverses
+
+isprime.c Performs the Rabin-Miller probabilistic primality
+ test on a number. Values which fail this test are
+ definitely composite, and those which pass are very
+ likely to be prime (although there are no guarantees)
+
+lap.c Computes the order (least annihilating power) of
+ a value v modulo m. Very dumb algorithm.
+
+primegen.c Generates large (probable) primes.
+
+prng.c A pseudo-random number generator based on the
+ BBS generator code in 'bbs_rand.c'
+
+sieve.c Implements the Sieve of Eratosthenes, using a big
+ bitmap, to generate a list of prime numbers.
+
+fact.c Computes the factorial of an arbitrary precision
+ integer (iterative).
+
+exptmod.c Computes arbitrary precision modular exponentiation
+ from the command line (exptmod a b m -> a^b (mod m))
+
+Most of these can be built from the Makefile that comes with the
+library. Try 'make tools', if your environment supports it.
+
+
+Acknowledgements:
+----------------
+
+The algorithms used in this library were drawn primarily from Volume
+2 of Donald Knuth's magnum opus, _The Art of Computer Programming_,
+"Semi-Numerical Methods". Barrett's algorithm for modular reduction
+came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
+Cryptography_, Chapter 14.
+
+Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
+bug in mp_read_raw() that made things break on platforms which use
+signed chars.
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger. You can contact
+the author as follows:
+
+E-mail: <sting@linguist.dartmouth.edu>
+
+Postal: 8000 Cummings Hall, Thayer School of Engineering
+ Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
+ 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
+
+Last updated: 16-Jan-2000
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE
new file mode 100644
index 0000000000..35cca68ce9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE
@@ -0,0 +1,11 @@
+Within this directory, each of the file listed below is licensed under
+the terms given in the file LICENSE-MPL, also in this directory.
+
+basecvt.pod
+gcd.pod
+invmod.pod
+isprime.pod
+lap.pod
+mpi-test.pod
+prime.txt
+prng.pod
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
new file mode 100644
index 0000000000..41dc2327f1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod
new file mode 100644
index 0000000000..c3d87fbc7e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod
@@ -0,0 +1,65 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ basecvt - radix conversion for arbitrary precision integers
+
+=head1 SYNOPSIS
+
+ basecvt <ibase> <obase> [values]
+
+=head1 DESCRIPTION
+
+The B<basecvt> program is a command-line tool for converting integers
+of arbitrary precision from one radix to another. The current version
+supports radix values from 2 (binary) to 64, inclusive. The first two
+command line arguments specify the input and output radix, in base 10.
+Any further arguments are taken to be integers notated in the input
+radix, and these are converted to the output radix. The output is
+written, one integer per line, to standard output.
+
+When reading integers, only digits considered "valid" for the input
+radix are considered. Processing of an integer terminates when an
+invalid input digit is encountered. So, for example, if you set the
+input radix to 10 and enter '10ACF', B<basecvt> would assume that you
+had entered '10' and ignore the rest of the string.
+
+If no values are provided, no output is written, but the program
+simply terminates with a zero exit status. Error diagnostics are
+written to standard error in the event of out-of-range radix
+specifications. Regardless of the actual values of the input and
+output radix, the radix arguments are taken to be in base 10 (decimal)
+notation.
+
+=head1 DIGITS
+
+For radices from 2-10, standard ASCII decimal digits 0-9 are used for
+both input and output. For radices from 11-36, the ASCII letters A-Z
+are also included, following the convention used in hexadecimal. In
+this range, input is accepted in either upper or lower case, although
+on output only lower-case letters are used.
+
+For radices from 37-62, the output includes both upper- and lower-case
+ASCII letters, and case matters. In this range, case is distinguished
+both for input and for output values.
+
+For radices 63 and 64, the characters '+' (plus) and '/' (forward
+solidus) are also used. These are derived from the MIME base64
+encoding scheme. The overall encoding is not the same as base64,
+because the ASCII digits are used for the bottom of the range, and the
+letters are shifted upward; however, the output will consist of the
+same character set.
+
+This input and output behaviour is inherited from the MPI library used
+by B<basecvt>, and so is not configurable at runtime.
+
+=head1 SEE ALSO
+
+ dec2hex(1), hex2dec(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build
new file mode 100755
index 0000000000..4d75b1e5a2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/build
@@ -0,0 +1,30 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+VERS="1.7p6"
+SECT="1"
+NAME="MPI Tools"
+
+echo "Building manual pages ..."
+case $# in
+ 0)
+ files=`ls *.pod`
+ ;;
+ *)
+ files=$*
+ ;;
+esac
+
+for name in $files
+do
+ echo -n "$name ... "
+# sname=`noext $name`
+ sname=`basename $name .pod`
+ pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
+ echo "(done)"
+done
+
+echo "Finished building."
+
diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt
new file mode 100644
index 0000000000..c13fb6ef18
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/div.txt
@@ -0,0 +1,64 @@
+Division
+
+This describes the division algorithm used by the MPI library.
+
+Input: a, b; a > b
+Compute: Q, R; a = Qb + R
+
+The input numbers are normalized so that the high-order digit of b is
+at least half the radix. This guarantees that we have a reasonable
+way to guess at the digits of the quotient (this method was taken from
+Knuth, vol. 2, with adaptations).
+
+To normalize, test the high-order digit of b. If it is less than half
+the radix, multiply both a and b by d, where:
+
+ radix - 1
+ d = -----------
+ bmax + 1
+
+...where bmax is the high-order digit of b. Otherwise, set d = 1.
+
+Given normalize values for a and b, let the notation a[n] denote the
+nth digit of a. Let #a be the number of significant figures of a (not
+including any leading zeroes).
+
+ Let R = 0
+ Let p = #a - 1
+
+ while(p >= 0)
+ do
+ R = (R * radix) + a[p]
+ p = p - 1
+ while(R < b and p >= 0)
+
+ if(R < b)
+ break
+
+ q = (R[#R - 1] * radix) + R[#R - 2]
+ q = q / b[#b - 1]
+
+ T = b * q
+
+ while(T > L)
+ q = q - 1
+ T = T - b
+ endwhile
+
+ L = L - T
+
+ Q = (Q * radix) + q
+
+ endwhile
+
+At this point, Q is the quotient, and R is the normalized remainder.
+To denormalize R, compute:
+
+ R = (R / d)
+
+At this point, you are finished.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt
new file mode 100644
index 0000000000..bd9d6f1960
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/expt.txt
@@ -0,0 +1,94 @@
+Exponentiation
+
+For exponentiation, the MPI library uses a simple and fairly standard
+square-and-multiply method. The algorithm is this:
+
+Input: a, b
+Output: a ** b
+
+ s = 1
+
+ while(b != 0)
+ if(b is odd)
+ s = s * a
+ endif
+
+ b = b / 2
+
+ x = x * x
+ endwhile
+
+ return s
+
+The modular exponentiation is done the same way, except replacing:
+
+ s = s * a
+
+with
+ s = (s * a) mod m
+
+and replacing
+
+ x = x * x
+
+with
+
+ x = (x * x) mod m
+
+Here is a sample exponentiation using the MPI library, as compared to
+the same problem solved by the Unix 'bc' program on my system:
+
+Computation of 2,381,283 ** 235
+
+'bc' says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+MPI says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+Diff says:
+% diff bc.txt mp.txt
+%
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod
new file mode 100644
index 0000000000..b5b8fa34fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/gcd.pod
@@ -0,0 +1,28 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ gcd - compute greatest common divisor of two integers
+
+=head1 SYNOPSIS
+
+ gcd <a> <b>
+
+=head1 DESCRIPTION
+
+The B<gcd> program computes the greatest common divisor of two
+arbitrary-precision integers I<a> and I<b>. The result is written in
+standard decimal notation to the standard output.
+
+If I<b> is zero, B<gcd> will print an error message and exit.
+
+=head1 SEE ALSO
+
+invmod(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod
new file mode 100644
index 0000000000..0194f44884
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/invmod.pod
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ invmod - compute modular inverse of an integer
+
+=head1 SYNOPSIS
+
+ invmod <a> <m>
+
+=head1 DESCRIPTION
+
+The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
+that inverse exists. Both I<a> and I<m> are arbitrary-precision
+integers in decimal notation. The result is written in standard
+decimal notation to the standard output.
+
+If there is no inverse, the message:
+
+ No inverse
+
+...will be printed to the standard output (an inverse exists if and
+only if the greatest common divisor of I<a> and I<m> is 1).
+
+=head1 SEE ALSO
+
+gcd(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod
new file mode 100644
index 0000000000..a8ec1f7ee3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/isprime.pod
@@ -0,0 +1,63 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ isprime - probabilistic primality testing
+
+=head1 SYNOPSIS
+
+ isprime <a>
+
+=head1 DESCRIPTION
+
+The B<isprime> program attempts to determine whether the arbitrary
+precision integer I<a> is prime. It first tests I<a> for divisibility
+by the first 170 or so small primes, and assuming I<a> is not
+divisible by any of these, applies 15 iterations of the Rabin-Miller
+probabilistic primality test.
+
+If the program discovers that the number is composite, it will print:
+
+ Not prime (reason)
+
+Where I<reason> is either:
+
+ divisible by small prime x
+
+Or:
+
+ failed nth pseudoprime test
+
+In the first case, I<x> indicates the first small prime factor that
+was found. In the second case, I<n> indicates which of the
+pseudoprime tests failed (numbered from 1)
+
+If this happens, the number is definitely not prime. However, if the
+number succeeds, this message results:
+
+ Probably prime, 1 in 4^15 chance of false positive
+
+If this happens, the number is prime with very high probability, but
+its primality has not been absolutely proven, only demonstrated to a
+very convincing degree.
+
+The value I<a> can be input in standard decimal notation, or, if it is
+prefixed with I<Ox>, it will be read as hexadecimal.
+
+=head1 ENVIRONMENT
+
+You can control how many iterations of Rabin-Miller are performed on
+the candidate number by setting the I<RM_TESTS> environment variable
+to an integer value before starting up B<isprime>. This will change
+the output slightly if the number passes all the tests.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod
new file mode 100644
index 0000000000..47539fbbf9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/lap.pod
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ lap - compute least annihilating power of a number
+
+=head1 SYNOPSIS
+
+ lap <a> <m>
+
+=head1 DESCRIPTION
+
+The B<lap> program computes the order of I<a> modulo I<m>, for
+arbitrary precision integers I<a> and I<m>. The B<order> of I<a>
+modulo I<m> is defined as the smallest positive value I<n> for which
+I<a> raised to the I<n>th power, modulo I<m>, is equal to 1. The
+order may not exist, if I<m> is composite.
+
+=head1 RESTRICTIONS
+
+This program is very slow, especially for large moduli. It is
+intended as a way to help find primitive elements in a modular field,
+but it does not do so in a particularly inefficient manner. It was
+written simply to help verify that a particular candidate does not
+have an obviously short cycle mod I<m>.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), isprime(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
new file mode 100644
index 0000000000..b05f866e5e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ mpi-test - automated test program for MPI library
+
+=head1 SYNOPSIS
+
+ mpi-test <suite-name> [quiet]
+ mpi-test list
+ mpi-test help
+
+=head1 DESCRIPTION
+
+The B<mpi-test> program is a general unit test driver for the MPI
+library. It is used to verify that the library works as it is
+supposed to on your architecture. As with most such things, passing
+all the tests in B<mpi-test> does not guarantee the code is correct,
+but if any of them fail, there are certainly problems.
+
+Each major function of the library can be tested individually. For a
+list of the test suites understood by B<mpi-test>, run it with the
+I<list> command line option:
+
+ mpi-test list
+
+This will display a list of the available test suites and a brief
+synopsis of what each one does. For a brief overview of this
+document, run B<mpi-test> I<help>.
+
+B<mpi-test> exits with a zero status if the selected test succeeds, or
+a nonzero status if it fails. If a I<suite-name> which is not
+understood by B<mpi-test> is given, a diagnostic is printed to the
+standard error, and the program exits with a result code of 2. If a
+test fails, the result code will be 1, and a diagnostic is ordinarily
+printed to the standard error. However, if the I<quiet> option is
+provided, these diagnostics will be suppressed.
+
+=head1 RESTRICTIONS
+
+Only a few canned test cases are provided. The solutions have been
+verified using the GNU bc(1) program, so bugs there may cause problems
+here; however, this is very unlikely, so if a test fails, it is almost
+certainly my fault, not bc(1)'s.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt
new file mode 100644
index 0000000000..975f56ddbe
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mul.txt
@@ -0,0 +1,77 @@
+Multiplication
+
+This describes the multiplication algorithm used by the MPI library.
+
+This is basically a standard "schoolbook" algorithm. It is slow --
+O(mn) for m = #a, n = #b -- but easy to implement and verify.
+Basically, we run two nested loops, as illustrated here (R is the
+radix):
+
+k = 0
+for j <- 0 to (#b - 1)
+ for i <- 0 to (#a - 1)
+ w = (a[j] * b[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+It is necessary that 'w' have room for at least two radix R digits.
+The product of any two digits in radix R is at most:
+
+ (R - 1)(R - 1) = R^2 - 2R + 1
+
+Since a two-digit radix-R number can hold R^2 - 1 distinct values,
+this insures that the product will fit into the two-digit register.
+
+To insure that two digits is enough for w, we must also show that
+there is room for the carry-in from the previous multiplication, and
+the current value of the product digit that is being recomputed.
+Assuming each of these may be as big as R - 1 (and no larger,
+certainly), two digits will be enough if and only if:
+
+ (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
+
+Solving this equation shows that, indeed, this is the case:
+
+ R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
+
+ R^2 - 1 <= R^2 - 1
+
+This suggests that a good radix would be one more than the largest
+value that can be held in half a machine word -- so, for example, as
+in this implementation, where we used a radix of 65536 on a machine
+with 4-byte words. Another advantage of a radix of this sort is that
+binary-level operations are easy on numbers in this representation.
+
+Here's an example multiplication worked out longhand in radix-10,
+using the above algorithm:
+
+ a = 999
+ b = x 999
+ -------------
+ p = 98001
+
+w = (a[jx] * b[ix]) + kin + c[ix + jx]
+c[ix+jx] = w % RADIX
+k = w / RADIX
+ product
+ix jx a[jx] b[ix] kin w c[i+j] kout 000000
+0 0 9 9 0 81+0+0 1 8 000001
+0 1 9 9 8 81+8+0 9 8 000091
+0 2 9 9 8 81+8+0 9 8 000991
+ 8 0 008991
+1 0 9 9 0 81+0+9 0 9 008901
+1 1 9 9 9 81+9+9 9 9 008901
+1 2 9 9 9 81+9+8 8 9 008901
+ 9 0 098901
+2 0 9 9 0 81+0+9 0 9 098001
+2 1 9 9 9 81+9+8 8 9 098001
+2 2 9 9 9 81+9+9 9 9 098001
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt
new file mode 100644
index 0000000000..a6ef91137f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/pi.txt
@@ -0,0 +1,53 @@
+This file describes how pi is computed by the program in 'pi.c' (see
+the utils subdirectory).
+
+Basically, we use Machin's formula, which is what everyone in the
+world uses as a simple method for computing approximations to pi.
+This works for up to a few thousand digits without too much effort.
+Beyond that, though, it gets too slow.
+
+Machin's formula states:
+
+ pi := 16 * arctan(1/5) - 4 * arctan(1/239)
+
+We compute this in integer arithmetic by first multiplying everything
+through by 10^d, where 'd' is the number of digits of pi we wanted to
+compute. It turns out, the last few digits will be wrong, but the
+number that are wrong is usually very small (ordinarly only 2-3).
+Having done this, we compute the arctan() function using the formula:
+
+ 1 1 1 1 1
+ arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
+ x 3 x^3 5 x^5 7 x^7 9 x^9
+
+This is done iteratively by computing the first term manually, and
+then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
+current figure. This is then added to (or subtracted from) a running
+sum, as appropriate. The iteration continues until we overflow our
+available precision and the current figure goes to zero under integer
+division. At that point, we're finished.
+
+Actually, we get a couple extra bits of precision out of the fact that
+we know we're computing y * arctan(1/x), by setting up the multiplier
+as:
+
+ y * 10^d
+
+... instead of just 10^d. There is also a bit of cleverness in how
+the loop is constructed, to avoid special-casing the first term.
+Check out the code for arctan() in 'pi.c', if you are interested in
+seeing how it is set up.
+
+Thanks to Jason P. for this algorithm, which I assembled from notes
+and programs found on his cool "Pile of Pi Programs" page, at:
+
+ http://www.isr.umd.edu/~jasonp/pipage.html
+
+Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
+whose pi program I borrowed the clever idea of pre-multiplying by x in
+order to avoid a special case on the loop iteration.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt
new file mode 100644
index 0000000000..694797d5f3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prime.txt
@@ -0,0 +1,6542 @@
+2
+3
+5
+7
+11
+13
+17
+19
+23
+29
+31
+37
+41
+43
+47
+53
+59
+61
+67
+71
+73
+79
+83
+89
+97
+101
+103
+107
+109
+113
+127
+131
+137
+139
+149
+151
+157
+163
+167
+173
+179
+181
+191
+193
+197
+199
+211
+223
+227
+229
+233
+239
+241
+251
+257
+263
+269
+271
+277
+281
+283
+293
+307
+311
+313
+317
+331
+337
+347
+349
+353
+359
+367
+373
+379
+383
+389
+397
+401
+409
+419
+421
+431
+433
+439
+443
+449
+457
+461
+463
+467
+479
+487
+491
+499
+503
+509
+521
+523
+541
+547
+557
+563
+569
+571
+577
+587
+593
+599
+601
+607
+613
+617
+619
+631
+641
+643
+647
+653
+659
+661
+673
+677
+683
+691
+701
+709
+719
+727
+733
+739
+743
+751
+757
+761
+769
+773
+787
+797
+809
+811
+821
+823
+827
+829
+839
+853
+857
+859
+863
+877
+881
+883
+887
+907
+911
+919
+929
+937
+941
+947
+953
+967
+971
+977
+983
+991
+997
+1009
+1013
+1019
+1021
+1031
+1033
+1039
+1049
+1051
+1061
+1063
+1069
+1087
+1091
+1093
+1097
+1103
+1109
+1117
+1123
+1129
+1151
+1153
+1163
+1171
+1181
+1187
+1193
+1201
+1213
+1217
+1223
+1229
+1231
+1237
+1249
+1259
+1277
+1279
+1283
+1289
+1291
+1297
+1301
+1303
+1307
+1319
+1321
+1327
+1361
+1367
+1373
+1381
+1399
+1409
+1423
+1427
+1429
+1433
+1439
+1447
+1451
+1453
+1459
+1471
+1481
+1483
+1487
+1489
+1493
+1499
+1511
+1523
+1531
+1543
+1549
+1553
+1559
+1567
+1571
+1579
+1583
+1597
+1601
+1607
+1609
+1613
+1619
+1621
+1627
+1637
+1657
+1663
+1667
+1669
+1693
+1697
+1699
+1709
+1721
+1723
+1733
+1741
+1747
+1753
+1759
+1777
+1783
+1787
+1789
+1801
+1811
+1823
+1831
+1847
+1861
+1867
+1871
+1873
+1877
+1879
+1889
+1901
+1907
+1913
+1931
+1933
+1949
+1951
+1973
+1979
+1987
+1993
+1997
+1999
+2003
+2011
+2017
+2027
+2029
+2039
+2053
+2063
+2069
+2081
+2083
+2087
+2089
+2099
+2111
+2113
+2129
+2131
+2137
+2141
+2143
+2153
+2161
+2179
+2203
+2207
+2213
+2221
+2237
+2239
+2243
+2251
+2267
+2269
+2273
+2281
+2287
+2293
+2297
+2309
+2311
+2333
+2339
+2341
+2347
+2351
+2357
+2371
+2377
+2381
+2383
+2389
+2393
+2399
+2411
+2417
+2423
+2437
+2441
+2447
+2459
+2467
+2473
+2477
+2503
+2521
+2531
+2539
+2543
+2549
+2551
+2557
+2579
+2591
+2593
+2609
+2617
+2621
+2633
+2647
+2657
+2659
+2663
+2671
+2677
+2683
+2687
+2689
+2693
+2699
+2707
+2711
+2713
+2719
+2729
+2731
+2741
+2749
+2753
+2767
+2777
+2789
+2791
+2797
+2801
+2803
+2819
+2833
+2837
+2843
+2851
+2857
+2861
+2879
+2887
+2897
+2903
+2909
+2917
+2927
+2939
+2953
+2957
+2963
+2969
+2971
+2999
+3001
+3011
+3019
+3023
+3037
+3041
+3049
+3061
+3067
+3079
+3083
+3089
+3109
+3119
+3121
+3137
+3163
+3167
+3169
+3181
+3187
+3191
+3203
+3209
+3217
+3221
+3229
+3251
+3253
+3257
+3259
+3271
+3299
+3301
+3307
+3313
+3319
+3323
+3329
+3331
+3343
+3347
+3359
+3361
+3371
+3373
+3389
+3391
+3407
+3413
+3433
+3449
+3457
+3461
+3463
+3467
+3469
+3491
+3499
+3511
+3517
+3527
+3529
+3533
+3539
+3541
+3547
+3557
+3559
+3571
+3581
+3583
+3593
+3607
+3613
+3617
+3623
+3631
+3637
+3643
+3659
+3671
+3673
+3677
+3691
+3697
+3701
+3709
+3719
+3727
+3733
+3739
+3761
+3767
+3769
+3779
+3793
+3797
+3803
+3821
+3823
+3833
+3847
+3851
+3853
+3863
+3877
+3881
+3889
+3907
+3911
+3917
+3919
+3923
+3929
+3931
+3943
+3947
+3967
+3989
+4001
+4003
+4007
+4013
+4019
+4021
+4027
+4049
+4051
+4057
+4073
+4079
+4091
+4093
+4099
+4111
+4127
+4129
+4133
+4139
+4153
+4157
+4159
+4177
+4201
+4211
+4217
+4219
+4229
+4231
+4241
+4243
+4253
+4259
+4261
+4271
+4273
+4283
+4289
+4297
+4327
+4337
+4339
+4349
+4357
+4363
+4373
+4391
+4397
+4409
+4421
+4423
+4441
+4447
+4451
+4457
+4463
+4481
+4483
+4493
+4507
+4513
+4517
+4519
+4523
+4547
+4549
+4561
+4567
+4583
+4591
+4597
+4603
+4621
+4637
+4639
+4643
+4649
+4651
+4657
+4663
+4673
+4679
+4691
+4703
+4721
+4723
+4729
+4733
+4751
+4759
+4783
+4787
+4789
+4793
+4799
+4801
+4813
+4817
+4831
+4861
+4871
+4877
+4889
+4903
+4909
+4919
+4931
+4933
+4937
+4943
+4951
+4957
+4967
+4969
+4973
+4987
+4993
+4999
+5003
+5009
+5011
+5021
+5023
+5039
+5051
+5059
+5077
+5081
+5087
+5099
+5101
+5107
+5113
+5119
+5147
+5153
+5167
+5171
+5179
+5189
+5197
+5209
+5227
+5231
+5233
+5237
+5261
+5273
+5279
+5281
+5297
+5303
+5309
+5323
+5333
+5347
+5351
+5381
+5387
+5393
+5399
+5407
+5413
+5417
+5419
+5431
+5437
+5441
+5443
+5449
+5471
+5477
+5479
+5483
+5501
+5503
+5507
+5519
+5521
+5527
+5531
+5557
+5563
+5569
+5573
+5581
+5591
+5623
+5639
+5641
+5647
+5651
+5653
+5657
+5659
+5669
+5683
+5689
+5693
+5701
+5711
+5717
+5737
+5741
+5743
+5749
+5779
+5783
+5791
+5801
+5807
+5813
+5821
+5827
+5839
+5843
+5849
+5851
+5857
+5861
+5867
+5869
+5879
+5881
+5897
+5903
+5923
+5927
+5939
+5953
+5981
+5987
+6007
+6011
+6029
+6037
+6043
+6047
+6053
+6067
+6073
+6079
+6089
+6091
+6101
+6113
+6121
+6131
+6133
+6143
+6151
+6163
+6173
+6197
+6199
+6203
+6211
+6217
+6221
+6229
+6247
+6257
+6263
+6269
+6271
+6277
+6287
+6299
+6301
+6311
+6317
+6323
+6329
+6337
+6343
+6353
+6359
+6361
+6367
+6373
+6379
+6389
+6397
+6421
+6427
+6449
+6451
+6469
+6473
+6481
+6491
+6521
+6529
+6547
+6551
+6553
+6563
+6569
+6571
+6577
+6581
+6599
+6607
+6619
+6637
+6653
+6659
+6661
+6673
+6679
+6689
+6691
+6701
+6703
+6709
+6719
+6733
+6737
+6761
+6763
+6779
+6781
+6791
+6793
+6803
+6823
+6827
+6829
+6833
+6841
+6857
+6863
+6869
+6871
+6883
+6899
+6907
+6911
+6917
+6947
+6949
+6959
+6961
+6967
+6971
+6977
+6983
+6991
+6997
+7001
+7013
+7019
+7027
+7039
+7043
+7057
+7069
+7079
+7103
+7109
+7121
+7127
+7129
+7151
+7159
+7177
+7187
+7193
+7207
+7211
+7213
+7219
+7229
+7237
+7243
+7247
+7253
+7283
+7297
+7307
+7309
+7321
+7331
+7333
+7349
+7351
+7369
+7393
+7411
+7417
+7433
+7451
+7457
+7459
+7477
+7481
+7487
+7489
+7499
+7507
+7517
+7523
+7529
+7537
+7541
+7547
+7549
+7559
+7561
+7573
+7577
+7583
+7589
+7591
+7603
+7607
+7621
+7639
+7643
+7649
+7669
+7673
+7681
+7687
+7691
+7699
+7703
+7717
+7723
+7727
+7741
+7753
+7757
+7759
+7789
+7793
+7817
+7823
+7829
+7841
+7853
+7867
+7873
+7877
+7879
+7883
+7901
+7907
+7919
+7927
+7933
+7937
+7949
+7951
+7963
+7993
+8009
+8011
+8017
+8039
+8053
+8059
+8069
+8081
+8087
+8089
+8093
+8101
+8111
+8117
+8123
+8147
+8161
+8167
+8171
+8179
+8191
+8209
+8219
+8221
+8231
+8233
+8237
+8243
+8263
+8269
+8273
+8287
+8291
+8293
+8297
+8311
+8317
+8329
+8353
+8363
+8369
+8377
+8387
+8389
+8419
+8423
+8429
+8431
+8443
+8447
+8461
+8467
+8501
+8513
+8521
+8527
+8537
+8539
+8543
+8563
+8573
+8581
+8597
+8599
+8609
+8623
+8627
+8629
+8641
+8647
+8663
+8669
+8677
+8681
+8689
+8693
+8699
+8707
+8713
+8719
+8731
+8737
+8741
+8747
+8753
+8761
+8779
+8783
+8803
+8807
+8819
+8821
+8831
+8837
+8839
+8849
+8861
+8863
+8867
+8887
+8893
+8923
+8929
+8933
+8941
+8951
+8963
+8969
+8971
+8999
+9001
+9007
+9011
+9013
+9029
+9041
+9043
+9049
+9059
+9067
+9091
+9103
+9109
+9127
+9133
+9137
+9151
+9157
+9161
+9173
+9181
+9187
+9199
+9203
+9209
+9221
+9227
+9239
+9241
+9257
+9277
+9281
+9283
+9293
+9311
+9319
+9323
+9337
+9341
+9343
+9349
+9371
+9377
+9391
+9397
+9403
+9413
+9419
+9421
+9431
+9433
+9437
+9439
+9461
+9463
+9467
+9473
+9479
+9491
+9497
+9511
+9521
+9533
+9539
+9547
+9551
+9587
+9601
+9613
+9619
+9623
+9629
+9631
+9643
+9649
+9661
+9677
+9679
+9689
+9697
+9719
+9721
+9733
+9739
+9743
+9749
+9767
+9769
+9781
+9787
+9791
+9803
+9811
+9817
+9829
+9833
+9839
+9851
+9857
+9859
+9871
+9883
+9887
+9901
+9907
+9923
+9929
+9931
+9941
+9949
+9967
+9973
+10007
+10009
+10037
+10039
+10061
+10067
+10069
+10079
+10091
+10093
+10099
+10103
+10111
+10133
+10139
+10141
+10151
+10159
+10163
+10169
+10177
+10181
+10193
+10211
+10223
+10243
+10247
+10253
+10259
+10267
+10271
+10273
+10289
+10301
+10303
+10313
+10321
+10331
+10333
+10337
+10343
+10357
+10369
+10391
+10399
+10427
+10429
+10433
+10453
+10457
+10459
+10463
+10477
+10487
+10499
+10501
+10513
+10529
+10531
+10559
+10567
+10589
+10597
+10601
+10607
+10613
+10627
+10631
+10639
+10651
+10657
+10663
+10667
+10687
+10691
+10709
+10711
+10723
+10729
+10733
+10739
+10753
+10771
+10781
+10789
+10799
+10831
+10837
+10847
+10853
+10859
+10861
+10867
+10883
+10889
+10891
+10903
+10909
+10937
+10939
+10949
+10957
+10973
+10979
+10987
+10993
+11003
+11027
+11047
+11057
+11059
+11069
+11071
+11083
+11087
+11093
+11113
+11117
+11119
+11131
+11149
+11159
+11161
+11171
+11173
+11177
+11197
+11213
+11239
+11243
+11251
+11257
+11261
+11273
+11279
+11287
+11299
+11311
+11317
+11321
+11329
+11351
+11353
+11369
+11383
+11393
+11399
+11411
+11423
+11437
+11443
+11447
+11467
+11471
+11483
+11489
+11491
+11497
+11503
+11519
+11527
+11549
+11551
+11579
+11587
+11593
+11597
+11617
+11621
+11633
+11657
+11677
+11681
+11689
+11699
+11701
+11717
+11719
+11731
+11743
+11777
+11779
+11783
+11789
+11801
+11807
+11813
+11821
+11827
+11831
+11833
+11839
+11863
+11867
+11887
+11897
+11903
+11909
+11923
+11927
+11933
+11939
+11941
+11953
+11959
+11969
+11971
+11981
+11987
+12007
+12011
+12037
+12041
+12043
+12049
+12071
+12073
+12097
+12101
+12107
+12109
+12113
+12119
+12143
+12149
+12157
+12161
+12163
+12197
+12203
+12211
+12227
+12239
+12241
+12251
+12253
+12263
+12269
+12277
+12281
+12289
+12301
+12323
+12329
+12343
+12347
+12373
+12377
+12379
+12391
+12401
+12409
+12413
+12421
+12433
+12437
+12451
+12457
+12473
+12479
+12487
+12491
+12497
+12503
+12511
+12517
+12527
+12539
+12541
+12547
+12553
+12569
+12577
+12583
+12589
+12601
+12611
+12613
+12619
+12637
+12641
+12647
+12653
+12659
+12671
+12689
+12697
+12703
+12713
+12721
+12739
+12743
+12757
+12763
+12781
+12791
+12799
+12809
+12821
+12823
+12829
+12841
+12853
+12889
+12893
+12899
+12907
+12911
+12917
+12919
+12923
+12941
+12953
+12959
+12967
+12973
+12979
+12983
+13001
+13003
+13007
+13009
+13033
+13037
+13043
+13049
+13063
+13093
+13099
+13103
+13109
+13121
+13127
+13147
+13151
+13159
+13163
+13171
+13177
+13183
+13187
+13217
+13219
+13229
+13241
+13249
+13259
+13267
+13291
+13297
+13309
+13313
+13327
+13331
+13337
+13339
+13367
+13381
+13397
+13399
+13411
+13417
+13421
+13441
+13451
+13457
+13463
+13469
+13477
+13487
+13499
+13513
+13523
+13537
+13553
+13567
+13577
+13591
+13597
+13613
+13619
+13627
+13633
+13649
+13669
+13679
+13681
+13687
+13691
+13693
+13697
+13709
+13711
+13721
+13723
+13729
+13751
+13757
+13759
+13763
+13781
+13789
+13799
+13807
+13829
+13831
+13841
+13859
+13873
+13877
+13879
+13883
+13901
+13903
+13907
+13913
+13921
+13931
+13933
+13963
+13967
+13997
+13999
+14009
+14011
+14029
+14033
+14051
+14057
+14071
+14081
+14083
+14087
+14107
+14143
+14149
+14153
+14159
+14173
+14177
+14197
+14207
+14221
+14243
+14249
+14251
+14281
+14293
+14303
+14321
+14323
+14327
+14341
+14347
+14369
+14387
+14389
+14401
+14407
+14411
+14419
+14423
+14431
+14437
+14447
+14449
+14461
+14479
+14489
+14503
+14519
+14533
+14537
+14543
+14549
+14551
+14557
+14561
+14563
+14591
+14593
+14621
+14627
+14629
+14633
+14639
+14653
+14657
+14669
+14683
+14699
+14713
+14717
+14723
+14731
+14737
+14741
+14747
+14753
+14759
+14767
+14771
+14779
+14783
+14797
+14813
+14821
+14827
+14831
+14843
+14851
+14867
+14869
+14879
+14887
+14891
+14897
+14923
+14929
+14939
+14947
+14951
+14957
+14969
+14983
+15013
+15017
+15031
+15053
+15061
+15073
+15077
+15083
+15091
+15101
+15107
+15121
+15131
+15137
+15139
+15149
+15161
+15173
+15187
+15193
+15199
+15217
+15227
+15233
+15241
+15259
+15263
+15269
+15271
+15277
+15287
+15289
+15299
+15307
+15313
+15319
+15329
+15331
+15349
+15359
+15361
+15373
+15377
+15383
+15391
+15401
+15413
+15427
+15439
+15443
+15451
+15461
+15467
+15473
+15493
+15497
+15511
+15527
+15541
+15551
+15559
+15569
+15581
+15583
+15601
+15607
+15619
+15629
+15641
+15643
+15647
+15649
+15661
+15667
+15671
+15679
+15683
+15727
+15731
+15733
+15737
+15739
+15749
+15761
+15767
+15773
+15787
+15791
+15797
+15803
+15809
+15817
+15823
+15859
+15877
+15881
+15887
+15889
+15901
+15907
+15913
+15919
+15923
+15937
+15959
+15971
+15973
+15991
+16001
+16007
+16033
+16057
+16061
+16063
+16067
+16069
+16073
+16087
+16091
+16097
+16103
+16111
+16127
+16139
+16141
+16183
+16187
+16189
+16193
+16217
+16223
+16229
+16231
+16249
+16253
+16267
+16273
+16301
+16319
+16333
+16339
+16349
+16361
+16363
+16369
+16381
+16411
+16417
+16421
+16427
+16433
+16447
+16451
+16453
+16477
+16481
+16487
+16493
+16519
+16529
+16547
+16553
+16561
+16567
+16573
+16603
+16607
+16619
+16631
+16633
+16649
+16651
+16657
+16661
+16673
+16691
+16693
+16699
+16703
+16729
+16741
+16747
+16759
+16763
+16787
+16811
+16823
+16829
+16831
+16843
+16871
+16879
+16883
+16889
+16901
+16903
+16921
+16927
+16931
+16937
+16943
+16963
+16979
+16981
+16987
+16993
+17011
+17021
+17027
+17029
+17033
+17041
+17047
+17053
+17077
+17093
+17099
+17107
+17117
+17123
+17137
+17159
+17167
+17183
+17189
+17191
+17203
+17207
+17209
+17231
+17239
+17257
+17291
+17293
+17299
+17317
+17321
+17327
+17333
+17341
+17351
+17359
+17377
+17383
+17387
+17389
+17393
+17401
+17417
+17419
+17431
+17443
+17449
+17467
+17471
+17477
+17483
+17489
+17491
+17497
+17509
+17519
+17539
+17551
+17569
+17573
+17579
+17581
+17597
+17599
+17609
+17623
+17627
+17657
+17659
+17669
+17681
+17683
+17707
+17713
+17729
+17737
+17747
+17749
+17761
+17783
+17789
+17791
+17807
+17827
+17837
+17839
+17851
+17863
+17881
+17891
+17903
+17909
+17911
+17921
+17923
+17929
+17939
+17957
+17959
+17971
+17977
+17981
+17987
+17989
+18013
+18041
+18043
+18047
+18049
+18059
+18061
+18077
+18089
+18097
+18119
+18121
+18127
+18131
+18133
+18143
+18149
+18169
+18181
+18191
+18199
+18211
+18217
+18223
+18229
+18233
+18251
+18253
+18257
+18269
+18287
+18289
+18301
+18307
+18311
+18313
+18329
+18341
+18353
+18367
+18371
+18379
+18397
+18401
+18413
+18427
+18433
+18439
+18443
+18451
+18457
+18461
+18481
+18493
+18503
+18517
+18521
+18523
+18539
+18541
+18553
+18583
+18587
+18593
+18617
+18637
+18661
+18671
+18679
+18691
+18701
+18713
+18719
+18731
+18743
+18749
+18757
+18773
+18787
+18793
+18797
+18803
+18839
+18859
+18869
+18899
+18911
+18913
+18917
+18919
+18947
+18959
+18973
+18979
+19001
+19009
+19013
+19031
+19037
+19051
+19069
+19073
+19079
+19081
+19087
+19121
+19139
+19141
+19157
+19163
+19181
+19183
+19207
+19211
+19213
+19219
+19231
+19237
+19249
+19259
+19267
+19273
+19289
+19301
+19309
+19319
+19333
+19373
+19379
+19381
+19387
+19391
+19403
+19417
+19421
+19423
+19427
+19429
+19433
+19441
+19447
+19457
+19463
+19469
+19471
+19477
+19483
+19489
+19501
+19507
+19531
+19541
+19543
+19553
+19559
+19571
+19577
+19583
+19597
+19603
+19609
+19661
+19681
+19687
+19697
+19699
+19709
+19717
+19727
+19739
+19751
+19753
+19759
+19763
+19777
+19793
+19801
+19813
+19819
+19841
+19843
+19853
+19861
+19867
+19889
+19891
+19913
+19919
+19927
+19937
+19949
+19961
+19963
+19973
+19979
+19991
+19993
+19997
+20011
+20021
+20023
+20029
+20047
+20051
+20063
+20071
+20089
+20101
+20107
+20113
+20117
+20123
+20129
+20143
+20147
+20149
+20161
+20173
+20177
+20183
+20201
+20219
+20231
+20233
+20249
+20261
+20269
+20287
+20297
+20323
+20327
+20333
+20341
+20347
+20353
+20357
+20359
+20369
+20389
+20393
+20399
+20407
+20411
+20431
+20441
+20443
+20477
+20479
+20483
+20507
+20509
+20521
+20533
+20543
+20549
+20551
+20563
+20593
+20599
+20611
+20627
+20639
+20641
+20663
+20681
+20693
+20707
+20717
+20719
+20731
+20743
+20747
+20749
+20753
+20759
+20771
+20773
+20789
+20807
+20809
+20849
+20857
+20873
+20879
+20887
+20897
+20899
+20903
+20921
+20929
+20939
+20947
+20959
+20963
+20981
+20983
+21001
+21011
+21013
+21017
+21019
+21023
+21031
+21059
+21061
+21067
+21089
+21101
+21107
+21121
+21139
+21143
+21149
+21157
+21163
+21169
+21179
+21187
+21191
+21193
+21211
+21221
+21227
+21247
+21269
+21277
+21283
+21313
+21317
+21319
+21323
+21341
+21347
+21377
+21379
+21383
+21391
+21397
+21401
+21407
+21419
+21433
+21467
+21481
+21487
+21491
+21493
+21499
+21503
+21517
+21521
+21523
+21529
+21557
+21559
+21563
+21569
+21577
+21587
+21589
+21599
+21601
+21611
+21613
+21617
+21647
+21649
+21661
+21673
+21683
+21701
+21713
+21727
+21737
+21739
+21751
+21757
+21767
+21773
+21787
+21799
+21803
+21817
+21821
+21839
+21841
+21851
+21859
+21863
+21871
+21881
+21893
+21911
+21929
+21937
+21943
+21961
+21977
+21991
+21997
+22003
+22013
+22027
+22031
+22037
+22039
+22051
+22063
+22067
+22073
+22079
+22091
+22093
+22109
+22111
+22123
+22129
+22133
+22147
+22153
+22157
+22159
+22171
+22189
+22193
+22229
+22247
+22259
+22271
+22273
+22277
+22279
+22283
+22291
+22303
+22307
+22343
+22349
+22367
+22369
+22381
+22391
+22397
+22409
+22433
+22441
+22447
+22453
+22469
+22481
+22483
+22501
+22511
+22531
+22541
+22543
+22549
+22567
+22571
+22573
+22613
+22619
+22621
+22637
+22639
+22643
+22651
+22669
+22679
+22691
+22697
+22699
+22709
+22717
+22721
+22727
+22739
+22741
+22751
+22769
+22777
+22783
+22787
+22807
+22811
+22817
+22853
+22859
+22861
+22871
+22877
+22901
+22907
+22921
+22937
+22943
+22961
+22963
+22973
+22993
+23003
+23011
+23017
+23021
+23027
+23029
+23039
+23041
+23053
+23057
+23059
+23063
+23071
+23081
+23087
+23099
+23117
+23131
+23143
+23159
+23167
+23173
+23189
+23197
+23201
+23203
+23209
+23227
+23251
+23269
+23279
+23291
+23293
+23297
+23311
+23321
+23327
+23333
+23339
+23357
+23369
+23371
+23399
+23417
+23431
+23447
+23459
+23473
+23497
+23509
+23531
+23537
+23539
+23549
+23557
+23561
+23563
+23567
+23581
+23593
+23599
+23603
+23609
+23623
+23627
+23629
+23633
+23663
+23669
+23671
+23677
+23687
+23689
+23719
+23741
+23743
+23747
+23753
+23761
+23767
+23773
+23789
+23801
+23813
+23819
+23827
+23831
+23833
+23857
+23869
+23873
+23879
+23887
+23893
+23899
+23909
+23911
+23917
+23929
+23957
+23971
+23977
+23981
+23993
+24001
+24007
+24019
+24023
+24029
+24043
+24049
+24061
+24071
+24077
+24083
+24091
+24097
+24103
+24107
+24109
+24113
+24121
+24133
+24137
+24151
+24169
+24179
+24181
+24197
+24203
+24223
+24229
+24239
+24247
+24251
+24281
+24317
+24329
+24337
+24359
+24371
+24373
+24379
+24391
+24407
+24413
+24419
+24421
+24439
+24443
+24469
+24473
+24481
+24499
+24509
+24517
+24527
+24533
+24547
+24551
+24571
+24593
+24611
+24623
+24631
+24659
+24671
+24677
+24683
+24691
+24697
+24709
+24733
+24749
+24763
+24767
+24781
+24793
+24799
+24809
+24821
+24841
+24847
+24851
+24859
+24877
+24889
+24907
+24917
+24919
+24923
+24943
+24953
+24967
+24971
+24977
+24979
+24989
+25013
+25031
+25033
+25037
+25057
+25073
+25087
+25097
+25111
+25117
+25121
+25127
+25147
+25153
+25163
+25169
+25171
+25183
+25189
+25219
+25229
+25237
+25243
+25247
+25253
+25261
+25301
+25303
+25307
+25309
+25321
+25339
+25343
+25349
+25357
+25367
+25373
+25391
+25409
+25411
+25423
+25439
+25447
+25453
+25457
+25463
+25469
+25471
+25523
+25537
+25541
+25561
+25577
+25579
+25583
+25589
+25601
+25603
+25609
+25621
+25633
+25639
+25643
+25657
+25667
+25673
+25679
+25693
+25703
+25717
+25733
+25741
+25747
+25759
+25763
+25771
+25793
+25799
+25801
+25819
+25841
+25847
+25849
+25867
+25873
+25889
+25903
+25913
+25919
+25931
+25933
+25939
+25943
+25951
+25969
+25981
+25997
+25999
+26003
+26017
+26021
+26029
+26041
+26053
+26083
+26099
+26107
+26111
+26113
+26119
+26141
+26153
+26161
+26171
+26177
+26183
+26189
+26203
+26209
+26227
+26237
+26249
+26251
+26261
+26263
+26267
+26293
+26297
+26309
+26317
+26321
+26339
+26347
+26357
+26371
+26387
+26393
+26399
+26407
+26417
+26423
+26431
+26437
+26449
+26459
+26479
+26489
+26497
+26501
+26513
+26539
+26557
+26561
+26573
+26591
+26597
+26627
+26633
+26641
+26647
+26669
+26681
+26683
+26687
+26693
+26699
+26701
+26711
+26713
+26717
+26723
+26729
+26731
+26737
+26759
+26777
+26783
+26801
+26813
+26821
+26833
+26839
+26849
+26861
+26863
+26879
+26881
+26891
+26893
+26903
+26921
+26927
+26947
+26951
+26953
+26959
+26981
+26987
+26993
+27011
+27017
+27031
+27043
+27059
+27061
+27067
+27073
+27077
+27091
+27103
+27107
+27109
+27127
+27143
+27179
+27191
+27197
+27211
+27239
+27241
+27253
+27259
+27271
+27277
+27281
+27283
+27299
+27329
+27337
+27361
+27367
+27397
+27407
+27409
+27427
+27431
+27437
+27449
+27457
+27479
+27481
+27487
+27509
+27527
+27529
+27539
+27541
+27551
+27581
+27583
+27611
+27617
+27631
+27647
+27653
+27673
+27689
+27691
+27697
+27701
+27733
+27737
+27739
+27743
+27749
+27751
+27763
+27767
+27773
+27779
+27791
+27793
+27799
+27803
+27809
+27817
+27823
+27827
+27847
+27851
+27883
+27893
+27901
+27917
+27919
+27941
+27943
+27947
+27953
+27961
+27967
+27983
+27997
+28001
+28019
+28027
+28031
+28051
+28057
+28069
+28081
+28087
+28097
+28099
+28109
+28111
+28123
+28151
+28163
+28181
+28183
+28201
+28211
+28219
+28229
+28277
+28279
+28283
+28289
+28297
+28307
+28309
+28319
+28349
+28351
+28387
+28393
+28403
+28409
+28411
+28429
+28433
+28439
+28447
+28463
+28477
+28493
+28499
+28513
+28517
+28537
+28541
+28547
+28549
+28559
+28571
+28573
+28579
+28591
+28597
+28603
+28607
+28619
+28621
+28627
+28631
+28643
+28649
+28657
+28661
+28663
+28669
+28687
+28697
+28703
+28711
+28723
+28729
+28751
+28753
+28759
+28771
+28789
+28793
+28807
+28813
+28817
+28837
+28843
+28859
+28867
+28871
+28879
+28901
+28909
+28921
+28927
+28933
+28949
+28961
+28979
+29009
+29017
+29021
+29023
+29027
+29033
+29059
+29063
+29077
+29101
+29123
+29129
+29131
+29137
+29147
+29153
+29167
+29173
+29179
+29191
+29201
+29207
+29209
+29221
+29231
+29243
+29251
+29269
+29287
+29297
+29303
+29311
+29327
+29333
+29339
+29347
+29363
+29383
+29387
+29389
+29399
+29401
+29411
+29423
+29429
+29437
+29443
+29453
+29473
+29483
+29501
+29527
+29531
+29537
+29567
+29569
+29573
+29581
+29587
+29599
+29611
+29629
+29633
+29641
+29663
+29669
+29671
+29683
+29717
+29723
+29741
+29753
+29759
+29761
+29789
+29803
+29819
+29833
+29837
+29851
+29863
+29867
+29873
+29879
+29881
+29917
+29921
+29927
+29947
+29959
+29983
+29989
+30011
+30013
+30029
+30047
+30059
+30071
+30089
+30091
+30097
+30103
+30109
+30113
+30119
+30133
+30137
+30139
+30161
+30169
+30181
+30187
+30197
+30203
+30211
+30223
+30241
+30253
+30259
+30269
+30271
+30293
+30307
+30313
+30319
+30323
+30341
+30347
+30367
+30389
+30391
+30403
+30427
+30431
+30449
+30467
+30469
+30491
+30493
+30497
+30509
+30517
+30529
+30539
+30553
+30557
+30559
+30577
+30593
+30631
+30637
+30643
+30649
+30661
+30671
+30677
+30689
+30697
+30703
+30707
+30713
+30727
+30757
+30763
+30773
+30781
+30803
+30809
+30817
+30829
+30839
+30841
+30851
+30853
+30859
+30869
+30871
+30881
+30893
+30911
+30931
+30937
+30941
+30949
+30971
+30977
+30983
+31013
+31019
+31033
+31039
+31051
+31063
+31069
+31079
+31081
+31091
+31121
+31123
+31139
+31147
+31151
+31153
+31159
+31177
+31181
+31183
+31189
+31193
+31219
+31223
+31231
+31237
+31247
+31249
+31253
+31259
+31267
+31271
+31277
+31307
+31319
+31321
+31327
+31333
+31337
+31357
+31379
+31387
+31391
+31393
+31397
+31469
+31477
+31481
+31489
+31511
+31513
+31517
+31531
+31541
+31543
+31547
+31567
+31573
+31583
+31601
+31607
+31627
+31643
+31649
+31657
+31663
+31667
+31687
+31699
+31721
+31723
+31727
+31729
+31741
+31751
+31769
+31771
+31793
+31799
+31817
+31847
+31849
+31859
+31873
+31883
+31891
+31907
+31957
+31963
+31973
+31981
+31991
+32003
+32009
+32027
+32029
+32051
+32057
+32059
+32063
+32069
+32077
+32083
+32089
+32099
+32117
+32119
+32141
+32143
+32159
+32173
+32183
+32189
+32191
+32203
+32213
+32233
+32237
+32251
+32257
+32261
+32297
+32299
+32303
+32309
+32321
+32323
+32327
+32341
+32353
+32359
+32363
+32369
+32371
+32377
+32381
+32401
+32411
+32413
+32423
+32429
+32441
+32443
+32467
+32479
+32491
+32497
+32503
+32507
+32531
+32533
+32537
+32561
+32563
+32569
+32573
+32579
+32587
+32603
+32609
+32611
+32621
+32633
+32647
+32653
+32687
+32693
+32707
+32713
+32717
+32719
+32749
+32771
+32779
+32783
+32789
+32797
+32801
+32803
+32831
+32833
+32839
+32843
+32869
+32887
+32909
+32911
+32917
+32933
+32939
+32941
+32957
+32969
+32971
+32983
+32987
+32993
+32999
+33013
+33023
+33029
+33037
+33049
+33053
+33071
+33073
+33083
+33091
+33107
+33113
+33119
+33149
+33151
+33161
+33179
+33181
+33191
+33199
+33203
+33211
+33223
+33247
+33287
+33289
+33301
+33311
+33317
+33329
+33331
+33343
+33347
+33349
+33353
+33359
+33377
+33391
+33403
+33409
+33413
+33427
+33457
+33461
+33469
+33479
+33487
+33493
+33503
+33521
+33529
+33533
+33547
+33563
+33569
+33577
+33581
+33587
+33589
+33599
+33601
+33613
+33617
+33619
+33623
+33629
+33637
+33641
+33647
+33679
+33703
+33713
+33721
+33739
+33749
+33751
+33757
+33767
+33769
+33773
+33791
+33797
+33809
+33811
+33827
+33829
+33851
+33857
+33863
+33871
+33889
+33893
+33911
+33923
+33931
+33937
+33941
+33961
+33967
+33997
+34019
+34031
+34033
+34039
+34057
+34061
+34123
+34127
+34129
+34141
+34147
+34157
+34159
+34171
+34183
+34211
+34213
+34217
+34231
+34253
+34259
+34261
+34267
+34273
+34283
+34297
+34301
+34303
+34313
+34319
+34327
+34337
+34351
+34361
+34367
+34369
+34381
+34403
+34421
+34429
+34439
+34457
+34469
+34471
+34483
+34487
+34499
+34501
+34511
+34513
+34519
+34537
+34543
+34549
+34583
+34589
+34591
+34603
+34607
+34613
+34631
+34649
+34651
+34667
+34673
+34679
+34687
+34693
+34703
+34721
+34729
+34739
+34747
+34757
+34759
+34763
+34781
+34807
+34819
+34841
+34843
+34847
+34849
+34871
+34877
+34883
+34897
+34913
+34919
+34939
+34949
+34961
+34963
+34981
+35023
+35027
+35051
+35053
+35059
+35069
+35081
+35083
+35089
+35099
+35107
+35111
+35117
+35129
+35141
+35149
+35153
+35159
+35171
+35201
+35221
+35227
+35251
+35257
+35267
+35279
+35281
+35291
+35311
+35317
+35323
+35327
+35339
+35353
+35363
+35381
+35393
+35401
+35407
+35419
+35423
+35437
+35447
+35449
+35461
+35491
+35507
+35509
+35521
+35527
+35531
+35533
+35537
+35543
+35569
+35573
+35591
+35593
+35597
+35603
+35617
+35671
+35677
+35729
+35731
+35747
+35753
+35759
+35771
+35797
+35801
+35803
+35809
+35831
+35837
+35839
+35851
+35863
+35869
+35879
+35897
+35899
+35911
+35923
+35933
+35951
+35963
+35969
+35977
+35983
+35993
+35999
+36007
+36011
+36013
+36017
+36037
+36061
+36067
+36073
+36083
+36097
+36107
+36109
+36131
+36137
+36151
+36161
+36187
+36191
+36209
+36217
+36229
+36241
+36251
+36263
+36269
+36277
+36293
+36299
+36307
+36313
+36319
+36341
+36343
+36353
+36373
+36383
+36389
+36433
+36451
+36457
+36467
+36469
+36473
+36479
+36493
+36497
+36523
+36527
+36529
+36541
+36551
+36559
+36563
+36571
+36583
+36587
+36599
+36607
+36629
+36637
+36643
+36653
+36671
+36677
+36683
+36691
+36697
+36709
+36713
+36721
+36739
+36749
+36761
+36767
+36779
+36781
+36787
+36791
+36793
+36809
+36821
+36833
+36847
+36857
+36871
+36877
+36887
+36899
+36901
+36913
+36919
+36923
+36929
+36931
+36943
+36947
+36973
+36979
+36997
+37003
+37013
+37019
+37021
+37039
+37049
+37057
+37061
+37087
+37097
+37117
+37123
+37139
+37159
+37171
+37181
+37189
+37199
+37201
+37217
+37223
+37243
+37253
+37273
+37277
+37307
+37309
+37313
+37321
+37337
+37339
+37357
+37361
+37363
+37369
+37379
+37397
+37409
+37423
+37441
+37447
+37463
+37483
+37489
+37493
+37501
+37507
+37511
+37517
+37529
+37537
+37547
+37549
+37561
+37567
+37571
+37573
+37579
+37589
+37591
+37607
+37619
+37633
+37643
+37649
+37657
+37663
+37691
+37693
+37699
+37717
+37747
+37781
+37783
+37799
+37811
+37813
+37831
+37847
+37853
+37861
+37871
+37879
+37889
+37897
+37907
+37951
+37957
+37963
+37967
+37987
+37991
+37993
+37997
+38011
+38039
+38047
+38053
+38069
+38083
+38113
+38119
+38149
+38153
+38167
+38177
+38183
+38189
+38197
+38201
+38219
+38231
+38237
+38239
+38261
+38273
+38281
+38287
+38299
+38303
+38317
+38321
+38327
+38329
+38333
+38351
+38371
+38377
+38393
+38431
+38447
+38449
+38453
+38459
+38461
+38501
+38543
+38557
+38561
+38567
+38569
+38593
+38603
+38609
+38611
+38629
+38639
+38651
+38653
+38669
+38671
+38677
+38693
+38699
+38707
+38711
+38713
+38723
+38729
+38737
+38747
+38749
+38767
+38783
+38791
+38803
+38821
+38833
+38839
+38851
+38861
+38867
+38873
+38891
+38903
+38917
+38921
+38923
+38933
+38953
+38959
+38971
+38977
+38993
+39019
+39023
+39041
+39043
+39047
+39079
+39089
+39097
+39103
+39107
+39113
+39119
+39133
+39139
+39157
+39161
+39163
+39181
+39191
+39199
+39209
+39217
+39227
+39229
+39233
+39239
+39241
+39251
+39293
+39301
+39313
+39317
+39323
+39341
+39343
+39359
+39367
+39371
+39373
+39383
+39397
+39409
+39419
+39439
+39443
+39451
+39461
+39499
+39503
+39509
+39511
+39521
+39541
+39551
+39563
+39569
+39581
+39607
+39619
+39623
+39631
+39659
+39667
+39671
+39679
+39703
+39709
+39719
+39727
+39733
+39749
+39761
+39769
+39779
+39791
+39799
+39821
+39827
+39829
+39839
+39841
+39847
+39857
+39863
+39869
+39877
+39883
+39887
+39901
+39929
+39937
+39953
+39971
+39979
+39983
+39989
+40009
+40013
+40031
+40037
+40039
+40063
+40087
+40093
+40099
+40111
+40123
+40127
+40129
+40151
+40153
+40163
+40169
+40177
+40189
+40193
+40213
+40231
+40237
+40241
+40253
+40277
+40283
+40289
+40343
+40351
+40357
+40361
+40387
+40423
+40427
+40429
+40433
+40459
+40471
+40483
+40487
+40493
+40499
+40507
+40519
+40529
+40531
+40543
+40559
+40577
+40583
+40591
+40597
+40609
+40627
+40637
+40639
+40693
+40697
+40699
+40709
+40739
+40751
+40759
+40763
+40771
+40787
+40801
+40813
+40819
+40823
+40829
+40841
+40847
+40849
+40853
+40867
+40879
+40883
+40897
+40903
+40927
+40933
+40939
+40949
+40961
+40973
+40993
+41011
+41017
+41023
+41039
+41047
+41051
+41057
+41077
+41081
+41113
+41117
+41131
+41141
+41143
+41149
+41161
+41177
+41179
+41183
+41189
+41201
+41203
+41213
+41221
+41227
+41231
+41233
+41243
+41257
+41263
+41269
+41281
+41299
+41333
+41341
+41351
+41357
+41381
+41387
+41389
+41399
+41411
+41413
+41443
+41453
+41467
+41479
+41491
+41507
+41513
+41519
+41521
+41539
+41543
+41549
+41579
+41593
+41597
+41603
+41609
+41611
+41617
+41621
+41627
+41641
+41647
+41651
+41659
+41669
+41681
+41687
+41719
+41729
+41737
+41759
+41761
+41771
+41777
+41801
+41809
+41813
+41843
+41849
+41851
+41863
+41879
+41887
+41893
+41897
+41903
+41911
+41927
+41941
+41947
+41953
+41957
+41959
+41969
+41981
+41983
+41999
+42013
+42017
+42019
+42023
+42043
+42061
+42071
+42073
+42083
+42089
+42101
+42131
+42139
+42157
+42169
+42179
+42181
+42187
+42193
+42197
+42209
+42221
+42223
+42227
+42239
+42257
+42281
+42283
+42293
+42299
+42307
+42323
+42331
+42337
+42349
+42359
+42373
+42379
+42391
+42397
+42403
+42407
+42409
+42433
+42437
+42443
+42451
+42457
+42461
+42463
+42467
+42473
+42487
+42491
+42499
+42509
+42533
+42557
+42569
+42571
+42577
+42589
+42611
+42641
+42643
+42649
+42667
+42677
+42683
+42689
+42697
+42701
+42703
+42709
+42719
+42727
+42737
+42743
+42751
+42767
+42773
+42787
+42793
+42797
+42821
+42829
+42839
+42841
+42853
+42859
+42863
+42899
+42901
+42923
+42929
+42937
+42943
+42953
+42961
+42967
+42979
+42989
+43003
+43013
+43019
+43037
+43049
+43051
+43063
+43067
+43093
+43103
+43117
+43133
+43151
+43159
+43177
+43189
+43201
+43207
+43223
+43237
+43261
+43271
+43283
+43291
+43313
+43319
+43321
+43331
+43391
+43397
+43399
+43403
+43411
+43427
+43441
+43451
+43457
+43481
+43487
+43499
+43517
+43541
+43543
+43573
+43577
+43579
+43591
+43597
+43607
+43609
+43613
+43627
+43633
+43649
+43651
+43661
+43669
+43691
+43711
+43717
+43721
+43753
+43759
+43777
+43781
+43783
+43787
+43789
+43793
+43801
+43853
+43867
+43889
+43891
+43913
+43933
+43943
+43951
+43961
+43963
+43969
+43973
+43987
+43991
+43997
+44017
+44021
+44027
+44029
+44041
+44053
+44059
+44071
+44087
+44089
+44101
+44111
+44119
+44123
+44129
+44131
+44159
+44171
+44179
+44189
+44201
+44203
+44207
+44221
+44249
+44257
+44263
+44267
+44269
+44273
+44279
+44281
+44293
+44351
+44357
+44371
+44381
+44383
+44389
+44417
+44449
+44453
+44483
+44491
+44497
+44501
+44507
+44519
+44531
+44533
+44537
+44543
+44549
+44563
+44579
+44587
+44617
+44621
+44623
+44633
+44641
+44647
+44651
+44657
+44683
+44687
+44699
+44701
+44711
+44729
+44741
+44753
+44771
+44773
+44777
+44789
+44797
+44809
+44819
+44839
+44843
+44851
+44867
+44879
+44887
+44893
+44909
+44917
+44927
+44939
+44953
+44959
+44963
+44971
+44983
+44987
+45007
+45013
+45053
+45061
+45077
+45083
+45119
+45121
+45127
+45131
+45137
+45139
+45161
+45179
+45181
+45191
+45197
+45233
+45247
+45259
+45263
+45281
+45289
+45293
+45307
+45317
+45319
+45329
+45337
+45341
+45343
+45361
+45377
+45389
+45403
+45413
+45427
+45433
+45439
+45481
+45491
+45497
+45503
+45523
+45533
+45541
+45553
+45557
+45569
+45587
+45589
+45599
+45613
+45631
+45641
+45659
+45667
+45673
+45677
+45691
+45697
+45707
+45737
+45751
+45757
+45763
+45767
+45779
+45817
+45821
+45823
+45827
+45833
+45841
+45853
+45863
+45869
+45887
+45893
+45943
+45949
+45953
+45959
+45971
+45979
+45989
+46021
+46027
+46049
+46051
+46061
+46073
+46091
+46093
+46099
+46103
+46133
+46141
+46147
+46153
+46171
+46181
+46183
+46187
+46199
+46219
+46229
+46237
+46261
+46271
+46273
+46279
+46301
+46307
+46309
+46327
+46337
+46349
+46351
+46381
+46399
+46411
+46439
+46441
+46447
+46451
+46457
+46471
+46477
+46489
+46499
+46507
+46511
+46523
+46549
+46559
+46567
+46573
+46589
+46591
+46601
+46619
+46633
+46639
+46643
+46649
+46663
+46679
+46681
+46687
+46691
+46703
+46723
+46727
+46747
+46751
+46757
+46769
+46771
+46807
+46811
+46817
+46819
+46829
+46831
+46853
+46861
+46867
+46877
+46889
+46901
+46919
+46933
+46957
+46993
+46997
+47017
+47041
+47051
+47057
+47059
+47087
+47093
+47111
+47119
+47123
+47129
+47137
+47143
+47147
+47149
+47161
+47189
+47207
+47221
+47237
+47251
+47269
+47279
+47287
+47293
+47297
+47303
+47309
+47317
+47339
+47351
+47353
+47363
+47381
+47387
+47389
+47407
+47417
+47419
+47431
+47441
+47459
+47491
+47497
+47501
+47507
+47513
+47521
+47527
+47533
+47543
+47563
+47569
+47581
+47591
+47599
+47609
+47623
+47629
+47639
+47653
+47657
+47659
+47681
+47699
+47701
+47711
+47713
+47717
+47737
+47741
+47743
+47777
+47779
+47791
+47797
+47807
+47809
+47819
+47837
+47843
+47857
+47869
+47881
+47903
+47911
+47917
+47933
+47939
+47947
+47951
+47963
+47969
+47977
+47981
+48017
+48023
+48029
+48049
+48073
+48079
+48091
+48109
+48119
+48121
+48131
+48157
+48163
+48179
+48187
+48193
+48197
+48221
+48239
+48247
+48259
+48271
+48281
+48299
+48311
+48313
+48337
+48341
+48353
+48371
+48383
+48397
+48407
+48409
+48413
+48437
+48449
+48463
+48473
+48479
+48481
+48487
+48491
+48497
+48523
+48527
+48533
+48539
+48541
+48563
+48571
+48589
+48593
+48611
+48619
+48623
+48647
+48649
+48661
+48673
+48677
+48679
+48731
+48733
+48751
+48757
+48761
+48767
+48779
+48781
+48787
+48799
+48809
+48817
+48821
+48823
+48847
+48857
+48859
+48869
+48871
+48883
+48889
+48907
+48947
+48953
+48973
+48989
+48991
+49003
+49009
+49019
+49031
+49033
+49037
+49043
+49057
+49069
+49081
+49103
+49109
+49117
+49121
+49123
+49139
+49157
+49169
+49171
+49177
+49193
+49199
+49201
+49207
+49211
+49223
+49253
+49261
+49277
+49279
+49297
+49307
+49331
+49333
+49339
+49363
+49367
+49369
+49391
+49393
+49409
+49411
+49417
+49429
+49433
+49451
+49459
+49463
+49477
+49481
+49499
+49523
+49529
+49531
+49537
+49547
+49549
+49559
+49597
+49603
+49613
+49627
+49633
+49639
+49663
+49667
+49669
+49681
+49697
+49711
+49727
+49739
+49741
+49747
+49757
+49783
+49787
+49789
+49801
+49807
+49811
+49823
+49831
+49843
+49853
+49871
+49877
+49891
+49919
+49921
+49927
+49937
+49939
+49943
+49957
+49991
+49993
+49999
+50021
+50023
+50033
+50047
+50051
+50053
+50069
+50077
+50087
+50093
+50101
+50111
+50119
+50123
+50129
+50131
+50147
+50153
+50159
+50177
+50207
+50221
+50227
+50231
+50261
+50263
+50273
+50287
+50291
+50311
+50321
+50329
+50333
+50341
+50359
+50363
+50377
+50383
+50387
+50411
+50417
+50423
+50441
+50459
+50461
+50497
+50503
+50513
+50527
+50539
+50543
+50549
+50551
+50581
+50587
+50591
+50593
+50599
+50627
+50647
+50651
+50671
+50683
+50707
+50723
+50741
+50753
+50767
+50773
+50777
+50789
+50821
+50833
+50839
+50849
+50857
+50867
+50873
+50891
+50893
+50909
+50923
+50929
+50951
+50957
+50969
+50971
+50989
+50993
+51001
+51031
+51043
+51047
+51059
+51061
+51071
+51109
+51131
+51133
+51137
+51151
+51157
+51169
+51193
+51197
+51199
+51203
+51217
+51229
+51239
+51241
+51257
+51263
+51283
+51287
+51307
+51329
+51341
+51343
+51347
+51349
+51361
+51383
+51407
+51413
+51419
+51421
+51427
+51431
+51437
+51439
+51449
+51461
+51473
+51479
+51481
+51487
+51503
+51511
+51517
+51521
+51539
+51551
+51563
+51577
+51581
+51593
+51599
+51607
+51613
+51631
+51637
+51647
+51659
+51673
+51679
+51683
+51691
+51713
+51719
+51721
+51749
+51767
+51769
+51787
+51797
+51803
+51817
+51827
+51829
+51839
+51853
+51859
+51869
+51871
+51893
+51899
+51907
+51913
+51929
+51941
+51949
+51971
+51973
+51977
+51991
+52009
+52021
+52027
+52051
+52057
+52067
+52069
+52081
+52103
+52121
+52127
+52147
+52153
+52163
+52177
+52181
+52183
+52189
+52201
+52223
+52237
+52249
+52253
+52259
+52267
+52289
+52291
+52301
+52313
+52321
+52361
+52363
+52369
+52379
+52387
+52391
+52433
+52453
+52457
+52489
+52501
+52511
+52517
+52529
+52541
+52543
+52553
+52561
+52567
+52571
+52579
+52583
+52609
+52627
+52631
+52639
+52667
+52673
+52691
+52697
+52709
+52711
+52721
+52727
+52733
+52747
+52757
+52769
+52783
+52807
+52813
+52817
+52837
+52859
+52861
+52879
+52883
+52889
+52901
+52903
+52919
+52937
+52951
+52957
+52963
+52967
+52973
+52981
+52999
+53003
+53017
+53047
+53051
+53069
+53077
+53087
+53089
+53093
+53101
+53113
+53117
+53129
+53147
+53149
+53161
+53171
+53173
+53189
+53197
+53201
+53231
+53233
+53239
+53267
+53269
+53279
+53281
+53299
+53309
+53323
+53327
+53353
+53359
+53377
+53381
+53401
+53407
+53411
+53419
+53437
+53441
+53453
+53479
+53503
+53507
+53527
+53549
+53551
+53569
+53591
+53593
+53597
+53609
+53611
+53617
+53623
+53629
+53633
+53639
+53653
+53657
+53681
+53693
+53699
+53717
+53719
+53731
+53759
+53773
+53777
+53783
+53791
+53813
+53819
+53831
+53849
+53857
+53861
+53881
+53887
+53891
+53897
+53899
+53917
+53923
+53927
+53939
+53951
+53959
+53987
+53993
+54001
+54011
+54013
+54037
+54049
+54059
+54083
+54091
+54101
+54121
+54133
+54139
+54151
+54163
+54167
+54181
+54193
+54217
+54251
+54269
+54277
+54287
+54293
+54311
+54319
+54323
+54331
+54347
+54361
+54367
+54371
+54377
+54401
+54403
+54409
+54413
+54419
+54421
+54437
+54443
+54449
+54469
+54493
+54497
+54499
+54503
+54517
+54521
+54539
+54541
+54547
+54559
+54563
+54577
+54581
+54583
+54601
+54617
+54623
+54629
+54631
+54647
+54667
+54673
+54679
+54709
+54713
+54721
+54727
+54751
+54767
+54773
+54779
+54787
+54799
+54829
+54833
+54851
+54869
+54877
+54881
+54907
+54917
+54919
+54941
+54949
+54959
+54973
+54979
+54983
+55001
+55009
+55021
+55049
+55051
+55057
+55061
+55073
+55079
+55103
+55109
+55117
+55127
+55147
+55163
+55171
+55201
+55207
+55213
+55217
+55219
+55229
+55243
+55249
+55259
+55291
+55313
+55331
+55333
+55337
+55339
+55343
+55351
+55373
+55381
+55399
+55411
+55439
+55441
+55457
+55469
+55487
+55501
+55511
+55529
+55541
+55547
+55579
+55589
+55603
+55609
+55619
+55621
+55631
+55633
+55639
+55661
+55663
+55667
+55673
+55681
+55691
+55697
+55711
+55717
+55721
+55733
+55763
+55787
+55793
+55799
+55807
+55813
+55817
+55819
+55823
+55829
+55837
+55843
+55849
+55871
+55889
+55897
+55901
+55903
+55921
+55927
+55931
+55933
+55949
+55967
+55987
+55997
+56003
+56009
+56039
+56041
+56053
+56081
+56087
+56093
+56099
+56101
+56113
+56123
+56131
+56149
+56167
+56171
+56179
+56197
+56207
+56209
+56237
+56239
+56249
+56263
+56267
+56269
+56299
+56311
+56333
+56359
+56369
+56377
+56383
+56393
+56401
+56417
+56431
+56437
+56443
+56453
+56467
+56473
+56477
+56479
+56489
+56501
+56503
+56509
+56519
+56527
+56531
+56533
+56543
+56569
+56591
+56597
+56599
+56611
+56629
+56633
+56659
+56663
+56671
+56681
+56687
+56701
+56711
+56713
+56731
+56737
+56747
+56767
+56773
+56779
+56783
+56807
+56809
+56813
+56821
+56827
+56843
+56857
+56873
+56891
+56893
+56897
+56909
+56911
+56921
+56923
+56929
+56941
+56951
+56957
+56963
+56983
+56989
+56993
+56999
+57037
+57041
+57047
+57059
+57073
+57077
+57089
+57097
+57107
+57119
+57131
+57139
+57143
+57149
+57163
+57173
+57179
+57191
+57193
+57203
+57221
+57223
+57241
+57251
+57259
+57269
+57271
+57283
+57287
+57301
+57329
+57331
+57347
+57349
+57367
+57373
+57383
+57389
+57397
+57413
+57427
+57457
+57467
+57487
+57493
+57503
+57527
+57529
+57557
+57559
+57571
+57587
+57593
+57601
+57637
+57641
+57649
+57653
+57667
+57679
+57689
+57697
+57709
+57713
+57719
+57727
+57731
+57737
+57751
+57773
+57781
+57787
+57791
+57793
+57803
+57809
+57829
+57839
+57847
+57853
+57859
+57881
+57899
+57901
+57917
+57923
+57943
+57947
+57973
+57977
+57991
+58013
+58027
+58031
+58043
+58049
+58057
+58061
+58067
+58073
+58099
+58109
+58111
+58129
+58147
+58151
+58153
+58169
+58171
+58189
+58193
+58199
+58207
+58211
+58217
+58229
+58231
+58237
+58243
+58271
+58309
+58313
+58321
+58337
+58363
+58367
+58369
+58379
+58391
+58393
+58403
+58411
+58417
+58427
+58439
+58441
+58451
+58453
+58477
+58481
+58511
+58537
+58543
+58549
+58567
+58573
+58579
+58601
+58603
+58613
+58631
+58657
+58661
+58679
+58687
+58693
+58699
+58711
+58727
+58733
+58741
+58757
+58763
+58771
+58787
+58789
+58831
+58889
+58897
+58901
+58907
+58909
+58913
+58921
+58937
+58943
+58963
+58967
+58979
+58991
+58997
+59009
+59011
+59021
+59023
+59029
+59051
+59053
+59063
+59069
+59077
+59083
+59093
+59107
+59113
+59119
+59123
+59141
+59149
+59159
+59167
+59183
+59197
+59207
+59209
+59219
+59221
+59233
+59239
+59243
+59263
+59273
+59281
+59333
+59341
+59351
+59357
+59359
+59369
+59377
+59387
+59393
+59399
+59407
+59417
+59419
+59441
+59443
+59447
+59453
+59467
+59471
+59473
+59497
+59509
+59513
+59539
+59557
+59561
+59567
+59581
+59611
+59617
+59621
+59627
+59629
+59651
+59659
+59663
+59669
+59671
+59693
+59699
+59707
+59723
+59729
+59743
+59747
+59753
+59771
+59779
+59791
+59797
+59809
+59833
+59863
+59879
+59887
+59921
+59929
+59951
+59957
+59971
+59981
+59999
+60013
+60017
+60029
+60037
+60041
+60077
+60083
+60089
+60091
+60101
+60103
+60107
+60127
+60133
+60139
+60149
+60161
+60167
+60169
+60209
+60217
+60223
+60251
+60257
+60259
+60271
+60289
+60293
+60317
+60331
+60337
+60343
+60353
+60373
+60383
+60397
+60413
+60427
+60443
+60449
+60457
+60493
+60497
+60509
+60521
+60527
+60539
+60589
+60601
+60607
+60611
+60617
+60623
+60631
+60637
+60647
+60649
+60659
+60661
+60679
+60689
+60703
+60719
+60727
+60733
+60737
+60757
+60761
+60763
+60773
+60779
+60793
+60811
+60821
+60859
+60869
+60887
+60889
+60899
+60901
+60913
+60917
+60919
+60923
+60937
+60943
+60953
+60961
+61001
+61007
+61027
+61031
+61043
+61051
+61057
+61091
+61099
+61121
+61129
+61141
+61151
+61153
+61169
+61211
+61223
+61231
+61253
+61261
+61283
+61291
+61297
+61331
+61333
+61339
+61343
+61357
+61363
+61379
+61381
+61403
+61409
+61417
+61441
+61463
+61469
+61471
+61483
+61487
+61493
+61507
+61511
+61519
+61543
+61547
+61553
+61559
+61561
+61583
+61603
+61609
+61613
+61627
+61631
+61637
+61643
+61651
+61657
+61667
+61673
+61681
+61687
+61703
+61717
+61723
+61729
+61751
+61757
+61781
+61813
+61819
+61837
+61843
+61861
+61871
+61879
+61909
+61927
+61933
+61949
+61961
+61967
+61979
+61981
+61987
+61991
+62003
+62011
+62017
+62039
+62047
+62053
+62057
+62071
+62081
+62099
+62119
+62129
+62131
+62137
+62141
+62143
+62171
+62189
+62191
+62201
+62207
+62213
+62219
+62233
+62273
+62297
+62299
+62303
+62311
+62323
+62327
+62347
+62351
+62383
+62401
+62417
+62423
+62459
+62467
+62473
+62477
+62483
+62497
+62501
+62507
+62533
+62539
+62549
+62563
+62581
+62591
+62597
+62603
+62617
+62627
+62633
+62639
+62653
+62659
+62683
+62687
+62701
+62723
+62731
+62743
+62753
+62761
+62773
+62791
+62801
+62819
+62827
+62851
+62861
+62869
+62873
+62897
+62903
+62921
+62927
+62929
+62939
+62969
+62971
+62981
+62983
+62987
+62989
+63029
+63031
+63059
+63067
+63073
+63079
+63097
+63103
+63113
+63127
+63131
+63149
+63179
+63197
+63199
+63211
+63241
+63247
+63277
+63281
+63299
+63311
+63313
+63317
+63331
+63337
+63347
+63353
+63361
+63367
+63377
+63389
+63391
+63397
+63409
+63419
+63421
+63439
+63443
+63463
+63467
+63473
+63487
+63493
+63499
+63521
+63527
+63533
+63541
+63559
+63577
+63587
+63589
+63599
+63601
+63607
+63611
+63617
+63629
+63647
+63649
+63659
+63667
+63671
+63689
+63691
+63697
+63703
+63709
+63719
+63727
+63737
+63743
+63761
+63773
+63781
+63793
+63799
+63803
+63809
+63823
+63839
+63841
+63853
+63857
+63863
+63901
+63907
+63913
+63929
+63949
+63977
+63997
+64007
+64013
+64019
+64033
+64037
+64063
+64067
+64081
+64091
+64109
+64123
+64151
+64153
+64157
+64171
+64187
+64189
+64217
+64223
+64231
+64237
+64271
+64279
+64283
+64301
+64303
+64319
+64327
+64333
+64373
+64381
+64399
+64403
+64433
+64439
+64451
+64453
+64483
+64489
+64499
+64513
+64553
+64567
+64577
+64579
+64591
+64601
+64609
+64613
+64621
+64627
+64633
+64661
+64663
+64667
+64679
+64693
+64709
+64717
+64747
+64763
+64781
+64783
+64793
+64811
+64817
+64849
+64853
+64871
+64877
+64879
+64891
+64901
+64919
+64921
+64927
+64937
+64951
+64969
+64997
+65003
+65011
+65027
+65029
+65033
+65053
+65063
+65071
+65089
+65099
+65101
+65111
+65119
+65123
+65129
+65141
+65147
+65167
+65171
+65173
+65179
+65183
+65203
+65213
+65239
+65257
+65267
+65269
+65287
+65293
+65309
+65323
+65327
+65353
+65357
+65371
+65381
+65393
+65407
+65413
+65419
+65423
+65437
+65447
+65449
+65479
+65497
+65519
+65521
diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod
new file mode 100644
index 0000000000..6da4d4a9c4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prng.pod
@@ -0,0 +1,38 @@
+=head1 NAME
+
+ prng - pseudo-random number generator
+
+=head1 SYNOPSIS
+
+ prng [count]
+
+=head1 DESCRIPTION
+
+B<Prng> generates 32-bit pseudo-random integers using the
+Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using
+the standard C library's rand() function, which itself seeded from the
+system clock and the process ID number. Thus, the values generated
+are not particularly useful for cryptographic applications, but they
+are in general much better than the typical output of the usual
+multiplicative congruency generator used by most runtime libraries.
+
+You may optionally specify how many random values should be generated
+by giving a I<count> argument on the command line. If you do not
+specify a count, only one random value will be generated. The results
+are output to the standard output in decimal notation, one value per
+line.
+
+=head1 RESTRICTIONS
+
+As stated above, B<prng> uses the C library's rand() function to seed
+the generator, so it is not terribly suitable for cryptographic
+applications. Also note that each time you run the program, a new
+seed is generated, so it is better to run it once with a I<count>
+parameter than it is to run it multiple times to generate several
+values.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
+ Thayer School of Engineering, Dartmouth College, Hanover, NH USA
diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt
new file mode 100644
index 0000000000..0df0f0390a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/redux.txt
@@ -0,0 +1,86 @@
+Modular Reduction
+
+Usually, modular reduction is accomplished by long division, using the
+mp_div() or mp_mod() functions. However, when performing modular
+exponentiation, you spend a lot of time reducing by the same modulus
+again and again. For this purpose, doing a full division for each
+multiplication is quite inefficient.
+
+For this reason, the mp_exptmod() function does not perform modular
+reductions in the usual way, but instead takes advantage of an
+algorithm due to Barrett, as described by Menezes, Oorschot and
+VanStone in their book _Handbook of Applied Cryptography_, published
+by the CRC Press (see Chapter 14 for details). This method reduces
+most of the computation of reduction to efficient shifting and masking
+operations, and avoids the multiple-precision division entirely.
+
+Here is a brief synopsis of Barrett reduction, as it is implemented in
+this library.
+
+Let b denote the radix of the computation (one more than the maximum
+value that can be denoted by an mp_digit). Let m be the modulus, and
+let k be the number of significant digits of m. Let x be the value to
+be reduced modulo m. By the Division Theorem, there exist unique
+integers Q and R such that:
+
+ x = Qm + R, 0 <= R < m
+
+Barrett reduction takes advantage of the fact that you can easily
+approximate Q to within two, given a value M such that:
+
+ 2k
+ b
+ M = floor( ----- )
+ m
+
+Computation of M requires a full-precision division step, so if you
+are only doing a single reduction by m, you gain no advantage.
+However, when multiple reductions by the same m are required, this
+division need only be done once, beforehand. Using this, we can use
+the following equation to compute Q', an approximation of Q:
+
+ x
+ floor( ------ ) M
+ k-1
+ b
+Q' = floor( ----------------- )
+ k+1
+ b
+
+The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
+efficiently implemented with shifts and masks, leaving only a single
+multiplication to be performed to get this approximation. It can be
+shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
+two additional subtractions to bring the value into line with the
+actual value of Q.
+
+Once we've got Q', we basically multiply that by m and subtract from
+x, yielding:
+
+ x - Q'm = Qm + R - Q'm
+
+Since we know the constraint on Q', this is one of:
+
+ R
+ m + R
+ 2m + R
+
+Since R < m by the Division Theorem, we can simply subtract off m
+until we get a value in the correct range, which will happen with no
+more than 2 subtractions:
+
+ v = x - Q'm
+
+ while(v >= m)
+ v = v - m
+ endwhile
+
+
+In random performance trials, modular exponentiation using this method
+of reduction gave around a 40% speedup over using the division for
+reduction.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt
new file mode 100644
index 0000000000..4529cbfc46
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt
@@ -0,0 +1,50 @@
+Square Root
+
+A simple iterative algorithm is used to compute the greatest integer
+less than or equal to the square root. Essentially, this is Newton's
+linear approximation, computed by finding successive values of the
+equation:
+
+ x[k]^2 - V
+x[k+1] = x[k] - ------------
+ 2 x[k]
+
+...where V is the value for which the square root is being sought. In
+essence, what is happening here is that we guess a value for the
+square root, then figure out how far off we were by squaring our guess
+and subtracting the target. Using this value, we compute a linear
+approximation for the error, and adjust the "guess". We keep doing
+this until the precision gets low enough that the above equation
+yields a quotient of zero. At this point, our last guess is one
+greater than the square root we're seeking.
+
+The initial guess is computed by dividing V by 4, which is a heuristic
+I have found to be fairly good on average. This also has the
+advantage of being very easy to compute efficiently, even for large
+values.
+
+So, the resulting algorithm works as follows:
+
+ x = V / 4 /* compute initial guess */
+
+ loop
+ t = (x * x) - V /* Compute absolute error */
+ u = 2 * x /* Adjust by tangent slope */
+ t = t / u
+
+ /* Loop is done if error is zero */
+ if(t == 0)
+ break
+
+ /* Adjust guess by error term */
+ x = x - t
+ end
+
+ x = x - 1
+
+The result of the computation is the value of x.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt
new file mode 100644
index 0000000000..edbb97882c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/square.txt
@@ -0,0 +1,72 @@
+Squaring Algorithm
+
+When you are squaring a value, you can take advantage of the fact that
+half the multiplications performed by the more general multiplication
+algorithm (see 'mul.txt' for a description) are redundant when the
+multiplicand equals the multiplier.
+
+In particular, the modified algorithm is:
+
+k = 0
+for j <- 0 to (#a - 1)
+ w = c[2*j] + (a[j] ^ 2);
+ k = w div R
+
+ for i <- j+1 to (#a - 1)
+ w = (2 * a[j] * a[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+On the surface, this looks identical to the multiplication algorithm;
+however, note the following differences:
+
+ - precomputation of the leading term in the outer loop
+
+ - i runs from j+1 instead of from zero
+
+ - doubling of a[i] * a[j] in the inner product
+
+Unfortunately, the construction of the inner product is such that we
+need more than two digits to represent the inner product, in some
+cases. In a C implementation, this means that some gymnastics must be
+performed in order to handle overflow, for which C has no direct
+abstraction. We do this by observing the following:
+
+If we have multiplied a[i] and a[j], and the product is more than half
+the maximum value expressible in two digits, then doubling this result
+will overflow into a third digit. If this occurs, we take note of the
+overflow, and double it anyway -- C integer arithmetic ignores
+overflow, so the two digits we get back should still be valid, modulo
+the overflow.
+
+Having doubled this value, we now have to add in the remainders and
+the digits already computed by earlier steps. If we did not overflow
+in the previous step, we might still cause an overflow here. That
+will happen whenever the maximum value expressible in two digits, less
+the amount we have to add, is greater than the result of the previous
+step. Thus, the overflow computation is:
+
+
+ u = 0
+ w = a[i] * a[j]
+
+ if(w > (R - 1)/ 2)
+ u = 1;
+
+ w = w * 2
+ v = c[i + j] + k
+
+ if(u == 0 && (R - 1 - v) < w)
+ u = 1
+
+If there is an overflow, u will be 1, otherwise u will be 0. The rest
+of the parameters are the same as they are in the above description.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt
new file mode 100644
index 0000000000..58f37c9dff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/timing.txt
@@ -0,0 +1,213 @@
+MPI Library Timing Tests
+
+Hardware/OS
+(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
+(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
+(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
+(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
+(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
+(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
+
+Compiler
+(1) MIPSpro C 7.2.1 -O3 optimizations
+(2) GCC 2.95.1 -O3 optimizations
+(3) IBM AIX xlc -O3 optimizations (version unknown)
+(4) EGCS 2.91.66 -O3 optimizations
+(5) Metrowerks CodeWarrior 5.0 C, all optimizations
+(6) MIPSpro C 7.30 -O3 optimizations
+(7) same as (6), with optimized libmalloc.so
+
+Timings are given in seconds, computed using the C library's clock()
+function. The first column gives the hardware and compiler
+configuration used for the test. The second column indicates the
+number of tests that were aggregated to get the statistics for that
+size. These were compiled using 16 bit digits.
+
+Source data were generated randomly using a fixed seed, so they should
+be internally consistent, but may vary on different systems depending
+on the C library. Also, since the resolution of the timer accessed by
+clock() varies, there may be some variance in the precision of these
+measurements.
+
+Prime Generation (primegen)
+
+128 bits:
+A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46
+A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55
+B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29
+C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14
+D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70
+A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48
+A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07
+
+192 bits:
+A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96
+A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55
+B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97
+C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24
+D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63
+A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84
+A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88
+
+256 bits:
+A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79
+A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11
+B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35
+C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91
+D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00
+A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46
+A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60
+
+320 bits:
+A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81
+A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03
+B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80
+C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59
+D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73
+A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01
+A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78
+
+384 bits:
+A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89
+A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14
+B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78
+C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13
+D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81
+A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55
+A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02
+
+448 bits:
+A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63
+A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86
+B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86
+C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36
+D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17
+A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58
+A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16
+
+512 bits:
+A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35
+A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18
+B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45
+C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22
+D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11
+A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83
+A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02
+
+Modular Exponentation (metime)
+
+The following results are aggregated from 200 pseudo-randomly
+generated tests, based on a fixed seed.
+
+ base, exponent, and modulus size (bits)
+P/C 128 192 256 320 384 448 512 640 768 896 1024
+------- -----------------------------------------------------------------
+A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
+A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
+B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
+C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
+D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
+E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
+A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
+A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
+
+Multiplication and Squaring tests, (mulsqr)
+
+The following results are aggregated from 500000 pseudo-randomly
+generated tests, based on a per-run wall-clock seed. Times are given
+in seconds, except where indicated in microseconds (us).
+
+(A1)
+
+bits multiply square ad percent time/mult time/square
+64 9.33 9.15 > 1.9 18.7us 18.3us
+128 10.88 10.44 > 4.0 21.8us 20.9us
+192 13.30 11.89 > 10.6 26.7us 23.8us
+256 14.88 12.64 > 15.1 29.8us 25.3us
+320 18.64 15.01 > 19.5 37.3us 30.0us
+384 23.11 17.70 > 23.4 46.2us 35.4us
+448 28.28 20.88 > 26.2 56.6us 41.8us
+512 34.09 24.51 > 28.1 68.2us 49.0us
+640 47.86 33.25 > 30.5 95.7us 66.5us
+768 64.91 43.54 > 32.9 129.8us 87.1us
+896 84.49 55.48 > 34.3 169.0us 111.0us
+1024 107.25 69.21 > 35.5 214.5us 138.4us
+1536 227.97 141.91 > 37.8 456.0us 283.8us
+2048 394.05 242.15 > 38.5 788.1us 484.3us
+
+(A2)
+
+bits multiply square ad percent time/mult time/square
+64 7.87 7.95 < 1.0 15.7us 15.9us
+128 9.40 9.19 > 2.2 18.8us 18.4us
+192 11.15 10.59 > 5.0 22.3us 21.2us
+256 12.02 11.16 > 7.2 24.0us 22.3us
+320 14.62 13.43 > 8.1 29.2us 26.9us
+384 17.72 15.80 > 10.8 35.4us 31.6us
+448 21.24 18.51 > 12.9 42.5us 37.0us
+512 25.36 21.78 > 14.1 50.7us 43.6us
+640 34.57 29.00 > 16.1 69.1us 58.0us
+768 46.10 37.60 > 18.4 92.2us 75.2us
+896 58.94 47.72 > 19.0 117.9us 95.4us
+1024 73.76 59.12 > 19.8 147.5us 118.2us
+1536 152.00 118.80 > 21.8 304.0us 237.6us
+2048 259.41 199.57 > 23.1 518.8us 399.1us
+
+(B3)
+
+bits multiply square ad percent time/mult time/square
+64 2.60 2.47 > 5.0 5.20us 4.94us
+128 4.43 4.06 > 8.4 8.86us 8.12us
+192 7.03 6.10 > 13.2 14.1us 12.2us
+256 10.44 8.59 > 17.7 20.9us 17.2us
+320 14.44 11.64 > 19.4 28.9us 23.3us
+384 19.12 15.08 > 21.1 38.2us 30.2us
+448 24.55 19.09 > 22.2 49.1us 38.2us
+512 31.03 23.53 > 24.2 62.1us 47.1us
+640 45.05 33.80 > 25.0 90.1us 67.6us
+768 63.02 46.05 > 26.9 126.0us 92.1us
+896 83.74 60.29 > 28.0 167.5us 120.6us
+1024 106.73 76.65 > 28.2 213.5us 153.3us
+1536 228.94 160.98 > 29.7 457.9us 322.0us
+2048 398.08 275.93 > 30.7 796.2us 551.9us
+
+(C4)
+
+bits multiply square ad percent time/mult time/square
+64 1.34 1.28 > 4.5 2.68us 2.56us
+128 2.76 2.59 > 6.2 5.52us 5.18us
+192 4.52 4.16 > 8.0 9.04us 8.32us
+256 6.64 5.99 > 9.8 13.3us 12.0us
+320 9.20 8.13 > 11.6 18.4us 16.3us
+384 12.01 10.58 > 11.9 24.0us 21.2us
+448 15.24 13.33 > 12.5 30.5us 26.7us
+512 19.02 16.46 > 13.5 38.0us 32.9us
+640 27.56 23.54 > 14.6 55.1us 47.1us
+768 37.89 31.78 > 16.1 75.8us 63.6us
+896 49.24 41.42 > 15.9 98.5us 82.8us
+1024 62.59 52.18 > 16.6 125.2us 104.3us
+1536 131.66 107.72 > 18.2 263.3us 215.4us
+2048 226.45 182.95 > 19.2 453.0us 365.9us
+
+(A7)
+
+bits multiply square ad percent time/mult time/square
+64 1.74 1.71 > 1.7 3.48us 3.42us
+128 3.48 2.96 > 14.9 6.96us 5.92us
+192 5.74 4.60 > 19.9 11.5us 9.20us
+256 8.75 6.61 > 24.5 17.5us 13.2us
+320 12.5 8.99 > 28.1 25.0us 18.0us
+384 16.9 11.9 > 29.6 33.8us 23.8us
+448 22.2 15.2 > 31.7 44.4us 30.4us
+512 28.3 19.0 > 32.7 56.6us 38.0us
+640 42.4 28.0 > 34.0 84.8us 56.0us
+768 59.4 38.5 > 35.2 118.8us 77.0us
+896 79.5 51.2 > 35.6 159.0us 102.4us
+1024 102.6 65.5 > 36.2 205.2us 131.0us
+1536 224.3 140.6 > 37.3 448.6us 281.2us
+2048 393.4 244.3 > 37.9 786.8us 488.6us
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s
new file mode 100644
index 0000000000..ae9da630d1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hpma512.s
@@ -0,0 +1,615 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ *
+ * This PA-RISC 2.0 function computes the product of two unsigned integers,
+ * and adds the result to a previously computed integer. The multiplicand
+ * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
+ * memory in little-double-wordian order. The multiplier is an unsigned
+ * 64-bit integer. The previously computed integer to which the product is
+ * added is located in the result ("res") area, and is assumed to be a
+ * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
+ * in little-double-wordian order. This value normally will be the result
+ * of a previously computed nine doubleword result. It is not necessary
+ * to pad the multiplicand with an additional 64-bit zero doubleword.
+ *
+ * Multiplicand, multiplier, and addend ideally should be aligned at
+ * 16-byte boundaries for best performance. The code will function
+ * correctly for alignment at eight-byte boundaries which are not 16-byte
+ * boundaries, but the execution may be slightly slower due to even/odd
+ * bank conflicts on PA-RISC 8000 processors.
+ *
+ * This function is designed to accept the same calling sequence as Bill
+ * Ackerman's "maxpy_little" function. The carry from the ninth doubleword
+ * of the result is written to the tenth word of the result, as is done by
+ * Bill Ackerman's function. The final carry also is returned as an
+ * integer, which may be ignored. The function prototype may be either
+ * of the following:
+ *
+ * void multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ * or
+ * int multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ *
+ * where: "l" originally denoted vector lengths. This parameter is
+ * ignored. This function always assumes a multiplicand length of
+ * 512 bits (eight doublewords), and addend and result lengths of
+ * 576 bits (nine doublewords).
+ *
+ * "m" is a pointer to the doubleword multiplier, ideally aligned
+ * on a 16-byte boundary.
+ *
+ * "a" is a pointer to the eight-doubleword multiplicand, stored
+ * in little-double-wordian order, and ideally aligned on a 16-byte
+ * boundary.
+ *
+ * "res" is a pointer to the nine doubleword addend, and to the
+ * nine-doubleword product computed by this function. The result
+ * also is stored in little-double-wordian order, and ideally is
+ * aligned on a 16-byte boundary. It is expected that the alignment
+ * of the "res" area may alternate between even/odd doubleword
+ * boundaries for successive calls for 512-bit x 512-bit
+ * multiplications.
+ *
+ * The code for this function has been scheduled to use the parallelism
+ * of the PA-RISC 8000 series microprocessors as well as the author was
+ * able. Comments and/or suggestions for improvement are welcomed.
+ *
+ * The code is "64-bit safe". This means it may be called in either
+ * the 32ILP context or the 64LP context. All 64-bits of registers are
+ * saved and restored.
+ *
+ * This code is self-contained. It requires no other header files in order
+ * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic
+ * definitions for registers and stack offsets are included within this
+ * one source file.
+ *
+ * This is a leaf routine. As such, minimal use is made of the stack area.
+ * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
+ * general registers, and 128 bytes are used to move intermediate products
+ * from the floating-point registers to the general registers. Stack
+ * protocols assure proper alignment of these areas.
+ *
+ */
+
+
+/* ====================================================================*/
+/* symbolic definitions for PA-RISC registers */
+/* in the MIPS style, avoids lots of case shifts */
+/* assigments (except t4) preserve register number parity */
+/* ====================================================================*/
+
+#define zero %r0 /* permanent zero */
+#define t5 %r1 /* temp register, altered by addil */
+
+#define rp %r2 /* return pointer */
+
+#define s1 %r3 /* callee saves register*/
+#define s0 %r4 /* callee saves register*/
+#define s3 %r5 /* callee saves register*/
+#define s2 %r6 /* callee saves register*/
+#define s5 %r7 /* callee saves register*/
+#define s4 %r8 /* callee saves register*/
+#define s7 %r9 /* callee saves register*/
+#define s6 %r10 /* callee saves register*/
+
+#define t1 %r19 /* caller saves register*/
+#define t0 %r20 /* caller saves register*/
+#define t3 %r21 /* caller saves register*/
+#define t2 %r22 /* caller saves register*/
+
+#define a3 %r23 /* fourth argument register, high word */
+#define a2 %r24 /* third argument register, low word*/
+#define a1 %r25 /* second argument register, high word*/
+#define a0 %r26 /* first argument register, low word*/
+
+#define v0 %r28 /* high order return value*/
+#define v1 %r29 /* low order return value*/
+
+#define sp %r30 /* stack pointer*/
+#define t4 %r31 /* temporary register */
+
+#define fa0 %fr4 /* first argument register*/
+#define fa1 %fr5 /* second argument register*/
+#define fa2 %fr6 /* third argument register*/
+#define fa3 %fr7 /* fourth argument register*/
+
+#define fa0r %fr4R /* first argument register*/
+#define fa1r %fr5R /* second argument register*/
+#define fa2r %fr6R /* third argument register*/
+#define fa3r %fr7R /* fourth argument register*/
+
+#define ft0 %fr8 /* caller saves register*/
+#define ft1 %fr9 /* caller saves register*/
+#define ft2 %fr10 /* caller saves register*/
+#define ft3 %fr11 /* caller saves register*/
+
+#define ft0r %fr8R /* caller saves register*/
+#define ft1r %fr9R /* caller saves register*/
+#define ft2r %fr10R /* caller saves register*/
+#define ft3r %fr11R /* caller saves register*/
+
+#define ft4 %fr22 /* caller saves register*/
+#define ft5 %fr23 /* caller saves register*/
+#define ft6 %fr24 /* caller saves register*/
+#define ft7 %fr25 /* caller saves register*/
+#define ft8 %fr26 /* caller saves register*/
+#define ft9 %fr27 /* caller saves register*/
+#define ft10 %fr28 /* caller saves register*/
+#define ft11 %fr29 /* caller saves register*/
+#define ft12 %fr30 /* caller saves register*/
+#define ft13 %fr31 /* caller saves register*/
+
+#define ft4r %fr22R /* caller saves register*/
+#define ft5r %fr23R /* caller saves register*/
+#define ft6r %fr24R /* caller saves register*/
+#define ft7r %fr25R /* caller saves register*/
+#define ft8r %fr26R /* caller saves register*/
+#define ft9r %fr27R /* caller saves register*/
+#define ft10r %fr28R /* caller saves register*/
+#define ft11r %fr29R /* caller saves register*/
+#define ft12r %fr30R /* caller saves register*/
+#define ft13r %fr31R /* caller saves register*/
+
+
+
+/* ================================================================== */
+/* functional definitions for PA-RISC registers */
+/* ================================================================== */
+
+/* general registers */
+
+#define T1 a0 /* temp, (length parameter ignored) */
+
+#define pM a1 /* -> 64-bit multiplier */
+#define T2 a1 /* temp, (after fetching multiplier) */
+
+#define pA a2 /* -> multiplicand vector (8 64-bit words) */
+#define T3 a2 /* temp, (after fetching multiplicand) */
+
+#define pR a3 /* -> addend vector (8 64-bit doublewords,
+ result vector (9 64-bit words) */
+
+#define S0 s0 /* callee saves summand registers */
+#define S1 s1
+#define S2 s2
+#define S3 s3
+#define S4 s4
+#define S5 s5
+#define S6 s6
+#define S7 s7
+
+#define S8 v0 /* caller saves summand registers */
+#define S9 v1
+#define S10 t0
+#define S11 t1
+#define S12 t2
+#define S13 t3
+#define S14 t4
+#define S15 t5
+
+
+
+/* floating-point registers */
+
+#define M fa0 /* multiplier double word */
+#define MR fa0r /* low order half of multiplier double word */
+#define ML fa0 /* high order half of multiplier double word */
+
+#define A0 fa2 /* multiplicand double word 0 */
+#define A0R fa2r /* low order half of multiplicand double word */
+#define A0L fa2 /* high order half of multiplicand double word */
+
+#define A1 fa3 /* multiplicand double word 1 */
+#define A1R fa3r /* low order half of multiplicand double word */
+#define A1L fa3 /* high order half of multiplicand double word */
+
+#define A2 ft0 /* multiplicand double word 2 */
+#define A2R ft0r /* low order half of multiplicand double word */
+#define A2L ft0 /* high order half of multiplicand double word */
+
+#define A3 ft1 /* multiplicand double word 3 */
+#define A3R ft1r /* low order half of multiplicand double word */
+#define A3L ft1 /* high order half of multiplicand double word */
+
+#define A4 ft2 /* multiplicand double word 4 */
+#define A4R ft2r /* low order half of multiplicand double word */
+#define A4L ft2 /* high order half of multiplicand double word */
+
+#define A5 ft3 /* multiplicand double word 5 */
+#define A5R ft3r /* low order half of multiplicand double word */
+#define A5L ft3 /* high order half of multiplicand double word */
+
+#define A6 ft4 /* multiplicand double word 6 */
+#define A6R ft4r /* low order half of multiplicand double word */
+#define A6L ft4 /* high order half of multiplicand double word */
+
+#define A7 ft5 /* multiplicand double word 7 */
+#define A7R ft5r /* low order half of multiplicand double word */
+#define A7L ft5 /* high order half of multiplicand double word */
+
+#define P0 ft6 /* product word 0 */
+#define P1 ft7 /* product word 0 */
+#define P2 ft8 /* product word 0 */
+#define P3 ft9 /* product word 0 */
+#define P4 ft10 /* product word 0 */
+#define P5 ft11 /* product word 0 */
+#define P6 ft12 /* product word 0 */
+#define P7 ft13 /* product word 0 */
+
+
+
+
+/* ====================================================================== */
+/* symbolic definitions for HP-UX stack offsets */
+/* symbolic definitions for memory NOPs */
+/* ====================================================================== */
+
+#define ST_SZ 192 /* stack area total size */
+
+#define SV0 -192(sp) /* general register save area */
+#define SV1 -184(sp)
+#define SV2 -176(sp)
+#define SV3 -168(sp)
+#define SV4 -160(sp)
+#define SV5 -152(sp)
+#define SV6 -144(sp)
+#define SV7 -136(sp)
+
+#define XF0 -128(sp) /* data transfer area */
+#define XF1 -120(sp) /* for floating-pt to integer regs */
+#define XF2 -112(sp)
+#define XF3 -104(sp)
+#define XF4 -96(sp)
+#define XF5 -88(sp)
+#define XF6 -80(sp)
+#define XF7 -72(sp)
+#define XF8 -64(sp)
+#define XF9 -56(sp)
+#define XF10 -48(sp)
+#define XF11 -40(sp)
+#define XF12 -32(sp)
+#define XF13 -24(sp)
+#define XF14 -16(sp)
+#define XF15 -8(sp)
+
+#define mnop proberi (sp),3,zero /* memory NOP */
+
+
+
+
+/* ====================================================================== */
+/* assembler formalities */
+/* ====================================================================== */
+
+#ifdef __LP64__
+ .level 2.0W
+#else
+ .level 2.0
+#endif
+ .space $TEXT$
+ .subspa $CODE$
+ .align 16
+
+/* ====================================================================== */
+/* here to compute 64-bit x 512-bit product + 512-bit addend */
+/* ====================================================================== */
+
+multacc512
+ .PROC
+ .CALLINFO
+ .ENTRY
+ fldd 0(pM),M ; multiplier double word
+ ldo ST_SZ(sp),sp ; push stack
+
+ fldd 0(pA),A0 ; multiplicand double word 0
+ std S1,SV1 ; save s1
+
+ fldd 16(pA),A2 ; multiplicand double word 2
+ std S3,SV3 ; save s3
+
+ fldd 32(pA),A4 ; multiplicand double word 4
+ std S5,SV5 ; save s5
+
+ fldd 48(pA),A6 ; multiplicand double word 6
+ std S7,SV7 ; save s7
+
+
+ std S0,SV0 ; save s0
+ fldd 8(pA),A1 ; multiplicand double word 1
+ xmpyu MR,A0L,P0 ; A0 cross 32-bit word products
+ xmpyu ML,A0R,P2
+
+ std S2,SV2 ; save s2
+ fldd 24(pA),A3 ; multiplicand double word 3
+ xmpyu MR,A2L,P4 ; A2 cross 32-bit word products
+ xmpyu ML,A2R,P6
+
+ std S4,SV4 ; save s4
+ fldd 40(pA),A5 ; multiplicand double word 5
+
+ std S6,SV6 ; save s6
+ fldd 56(pA),A7 ; multiplicand double word 7
+
+
+ fstd P0,XF0 ; MR * A0L
+ xmpyu MR,A0R,P0 ; A0 right 32-bit word product
+ xmpyu MR,A1L,P1 ; A1 cross 32-bit word product
+
+ fstd P2,XF2 ; ML * A0R
+ xmpyu ML,A0L,P2 ; A0 left 32-bit word product
+ xmpyu ML,A1R,P3 ; A1 cross 32-bit word product
+
+ fstd P4,XF4 ; MR * A2L
+ xmpyu MR,A2R,P4 ; A2 right 32-bit word product
+ xmpyu MR,A3L,P5 ; A3 cross 32-bit word product
+
+ fstd P6,XF6 ; ML * A2R
+ xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product
+ xmpyu ML,A3R,P7 ; A3 cross 32-bit word product
+
+
+ ldd XF0,S0 ; MR * A0L
+ fstd P1,XF1 ; MR * A1L
+
+ ldd XF2,S2 ; ML * A0R
+ fstd P3,XF3 ; ML * A1R
+
+ ldd XF4,S4 ; MR * A2L
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products
+ xmpyu ML,A1L,P3
+
+ ldd XF6,S6 ; ML * A2R
+ fstd P7,XF7 ; ML * A3R
+ xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products
+ xmpyu ML,A3L,P7
+
+
+ fstd P0,XF0 ; MR * A0R
+ ldd XF1,S1 ; MR * A1L
+ nop
+ add S0,S2,T1 ; A0 cross product sum
+
+ fstd P2,XF2 ; ML * A0L
+ ldd XF3,S3 ; ML * A1R
+ add,dc zero,zero,S0 ; A0 cross product sum carry
+ depd,z T1,31,32,S2 ; A0 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A2R
+ ldd XF5,S5 ; MR * A3L
+ shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32
+ add S4,S6,T3 ; A2 cross product sum
+
+ fstd P6,XF6 ; ML * A2L
+ ldd XF7,S7 ; ML * A3R
+ add,dc zero,zero,S4 ; A2 cross product sum carry
+ depd,z T3,31,32,S6 ; A2 cross product sum << 32
+
+
+ ldd XF0,S8 ; MR * A0R
+ fstd P1,XF1 ; MR * A1R
+ xmpyu MR,A4L,P0 ; A4 cross 32-bit word product
+ xmpyu MR,A5L,P1 ; A5 cross 32-bit word product
+
+ ldd XF2,S10 ; ML * A0L
+ fstd P3,XF3 ; ML * A1L
+ xmpyu ML,A4R,P2 ; A4 cross 32-bit word product
+ xmpyu ML,A5R,P3 ; A5 cross 32-bit word product
+
+ ldd XF4,S12 ; MR * A2R
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A6L,P4 ; A6 cross 32-bit word product
+ xmpyu MR,A7L,P5 ; A7 cross 32-bit word product
+
+ ldd XF6,S14 ; ML * A2L
+ fstd P7,XF7 ; ML * A3L
+ xmpyu ML,A6R,P6 ; A6 cross 32-bit word product
+ xmpyu ML,A7R,P7 ; A7 cross 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4L
+ ldd XF1,S9 ; MR * A1R
+ shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32
+ add S1,S3,T1 ; A1 cross product sum
+
+ fstd P2,XF2 ; ML * A4R
+ ldd XF3,S11 ; ML * A1L
+ add,dc zero,zero,S1 ; A1 cross product sum carry
+ depd,z T1,31,32,S3 ; A1 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A6L
+ ldd XF5,S13 ; MR * A3R
+ shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32
+ add S5,S7,T3 ; A3 cross product sum
+
+ fstd P6,XF6 ; ML * A6R
+ ldd XF7,S15 ; ML * A3L
+ add,dc zero,zero,S5 ; A3 cross product sum carry
+ depd,z T3,31,32,S7 ; A3 cross product sum << 32
+
+
+ shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword
+
+ add,dc S0,S10,S10 ; M * A0 left doubleword
+ add S3,S9,S9 ; M * A1 right doubleword
+
+ add,dc S1,S11,S11 ; M * A1 left doubleword
+ add S6,S12,S12 ; M * A2 right doubleword
+
+
+ ldd 24(pR),S3 ; Addend word 3
+ fstd P1,XF1 ; MR * A5L
+ add,dc S4,S14,S14 ; M * A2 left doubleword
+ xmpyu MR,A5R,P1 ; A5 right 32-bit word product
+
+ ldd 8(pR),S1 ; Addend word 1
+ fstd P3,XF3 ; ML * A5R
+ add S7,S13,S13 ; M * A3 right doubleword
+ xmpyu ML,A5L,P3 ; A5 left 32-bit word product
+
+ ldd 0(pR),S7 ; Addend word 0
+ fstd P5,XF5 ; MR * A7L
+ add,dc S5,S15,S15 ; M * A3 left doubleword
+ xmpyu MR,A7R,P5 ; A7 right 32-bit word product
+
+ ldd 16(pR),S5 ; Addend word 2
+ fstd P7,XF7 ; ML * A7R
+ add S10,S9,S9 ; P1 doubleword
+ xmpyu ML,A7L,P7 ; A7 left 32-bit word products
+
+
+ ldd XF0,S0 ; MR * A4L
+ fstd P1,XF9 ; MR * A5R
+ add,dc S11,S12,S12 ; P2 doubleword
+ xmpyu MR,A4R,P0 ; A4 right 32-bit word product
+
+ ldd XF2,S2 ; ML * A4R
+ fstd P3,XF11 ; ML * A5L
+ add,dc S14,S13,S13 ; P3 doubleword
+ xmpyu ML,A4L,P2 ; A4 left 32-bit word product
+
+ ldd XF6,S6 ; ML * A6R
+ fstd P5,XF13 ; MR * A7R
+ add,dc zero,S15,T2 ; P4 partial doubleword
+ xmpyu MR,A6R,P4 ; A6 right 32-bit word product
+
+ ldd XF4,S4 ; MR * A6L
+ fstd P7,XF15 ; ML * A7L
+ add S7,S8,S8 ; R0 + P0, new R0 doubleword
+ xmpyu ML,A6L,P6 ; A6 left 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4R
+ ldd XF7,S7 ; ML * A7R
+ add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword
+
+ fstd P2,XF2 ; ML * A4L
+ ldd XF1,S1 ; MR * A5L
+ add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword
+
+ fstd P4,XF4 ; MR * A6R
+ ldd XF5,S5 ; MR * A7L
+ add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword
+
+ fstd P6,XF6 ; ML * A6L
+ ldd XF3,S3 ; ML * A5R
+ add,dc zero,T2,T2 ; c + partial P4
+ add S0,S2,T1 ; A4 cross product sum
+
+
+ std S8,0(pR) ; save R0
+ add,dc zero,zero,S0 ; A4 cross product sum carry
+ depd,z T1,31,32,S2 ; A4 cross product sum << 32
+
+ std S9,8(pR) ; save R1
+ shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32
+ add S4,S6,T3 ; A6 cross product sum
+
+ std S12,16(pR) ; save R2
+ add,dc zero,zero,S4 ; A6 cross product sum carry
+ depd,z T3,31,32,S6 ; A6 cross product sum << 32
+
+
+ std S13,24(pR) ; save R3
+ shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32
+ add S1,S3,T1 ; A5 cross product sum
+
+ ldd XF0,S8 ; MR * A4R
+ add,dc zero,zero,S1 ; A5 cross product sum carry
+ depd,z T1,31,32,S3 ; A5 cross product sum << 32
+
+ ldd XF2,S10 ; ML * A4L
+ ldd XF9,S9 ; MR * A5R
+ shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32
+ add S5,S7,T3 ; A7 cross product sum
+
+ ldd XF4,S12 ; MR * A6R
+ ldd XF11,S11 ; ML * A5L
+ add,dc zero,zero,S5 ; A7 cross product sum carry
+ depd,z T3,31,32,S7 ; A7 cross product sum << 32
+
+ ldd XF6,S14 ; ML * A6L
+ ldd XF13,S13 ; MR * A7R
+ shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A4 right doubleword
+
+
+ ldd XF15,S15 ; ML * A7L
+ add,dc S0,S10,S10 ; M * A4 left doubleword
+ add S3,S9,S9 ; M * A5 right doubleword
+
+ add,dc S1,S11,S11 ; M * A5 left doubleword
+ add S6,S12,S12 ; M * A6 right doubleword
+
+ ldd 32(pR),S0 ; Addend word 4
+ ldd 40(pR),S1 ; Addend word 5
+ add,dc S4,S14,S14 ; M * A6 left doubleword
+ add S7,S13,S13 ; M * A7 right doubleword
+
+ ldd 48(pR),S2 ; Addend word 6
+ ldd 56(pR),S3 ; Addend word 7
+ add,dc S5,S15,S15 ; M * A7 left doubleword
+ add S8,T2,S8 ; P4 doubleword
+
+ ldd 64(pR),S4 ; Addend word 8
+ ldd SV5,s5 ; restore s5
+ add,dc S10,S9,S9 ; P5 doubleword
+ add,dc S11,S12,S12 ; P6 doubleword
+
+
+ ldd SV6,s6 ; restore s6
+ ldd SV7,s7 ; restore s7
+ add,dc S14,S13,S13 ; P7 doubleword
+ add,dc zero,S15,S15 ; P8 doubleword
+
+ add S0,S8,S8 ; new R4 doubleword
+
+ ldd SV0,s0 ; restore s0
+ std S8,32(pR) ; save R4
+ add,dc S1,S9,S9 ; new R5 doubleword
+
+ ldd SV1,s1 ; restore s1
+ std S9,40(pR) ; save R5
+ add,dc S2,S12,S12 ; new R6 doubleword
+
+ ldd SV2,s2 ; restore s2
+ std S12,48(pR) ; save R6
+ add,dc S3,S13,S13 ; new R7 doubleword
+
+ ldd SV3,s3 ; restore s3
+ std S13,56(pR) ; save R7
+ add,dc S4,S15,S15 ; new R8 doubleword
+
+ ldd SV4,s4 ; restore s4
+ std S15,64(pR) ; save result[8]
+ add,dc zero,zero,v0 ; return carry from R8
+
+ CMPIB,*= 0,v0,$L0 ; if no overflow, exit
+ LDO 8(pR),pR
+
+$FINAL1 ; Final carry propagation
+ LDD 64(pR),v0
+ LDO 8(pR),pR
+ ADDI 1,v0,v0
+ CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry.
+ STD v0,56(pR)
+$L0
+ bv zero(rp) ; -> caller
+ ldo -ST_SZ(sp),sp ; pop stack
+
+/* ====================================================================== */
+/* end of module */
+/* ====================================================================== */
+
+
+ bve (rp)
+ .EXIT
+ nop
+ .PROCEND
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+ .EXPORT multacc512,ENTRY
+
+ .end
diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s
new file mode 100644
index 0000000000..c72de8a12b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppa20.s
@@ -0,0 +1,904 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef __LP64__
+ .LEVEL 2.0W
+#else
+; .LEVEL 1.1
+; .ALLOW 2.0N
+ .LEVEL 2.0
+#endif
+ .SPACE $TEXT$,SORT=8
+ .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+; ***************************************************************
+;
+; maxpy_[little/big]
+;
+; ***************************************************************
+
+; There is no default -- you must specify one or the other.
+#define LITTLE_WORDIAN 1
+
+#ifdef LITTLE_WORDIAN
+#define EIGHT 8
+#define SIXTEEN 16
+#define THIRTY_TWO 32
+#define UN_EIGHT -8
+#define UN_SIXTEEN -16
+#define UN_TWENTY_FOUR -24
+#endif
+
+#ifdef BIG_WORDIAN
+#define EIGHT -8
+#define SIXTEEN -16
+#define THIRTY_TWO -32
+#define UN_EIGHT 8
+#define UN_SIXTEEN 16
+#define UN_TWENTY_FOUR 24
+#endif
+
+; This performs a multiple-precision integer version of "daxpy",
+; Using the selected addressing direction. "Little-wordian" means that
+; the least significant word of a number is stored at the lowest address.
+; "Big-wordian" means that the most significant word is at the lowest
+; address. Either way, the incoming address of the vector is that
+; of the least significant word. That means that, for little-wordian
+; addressing, we move the address upward as we propagate carries
+; from the least significant word to the most significant. For
+; big-wordian we move the address downward.
+
+; We use the following registers:
+;
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = address of scalar
+; r24 = arg3 = multiplicand vector
+; r23 = arg4 = result vector
+;
+; fr9 = scalar loaded once only from r25
+
+; The cycle counts shown in the bodies below are simply the result of a
+; scheduling by hand. The actual PCX-U hardware does it differently.
+; The intention is that the overall speed is the same.
+
+; The pipeline startup and shutdown code is constructed in the usual way,
+; by taking the loop bodies and removing unnecessary instructions.
+; We have left the comments describing cycle numbers in the code.
+; These are intended for reference when comparing with the main loop,
+; and have no particular relationship to actual cycle numbers.
+
+#ifdef LITTLE_WORDIAN
+maxpy_little
+#else
+maxpy_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately.
+ FLDD 0(%r25),%fr9 ; fr9 = scalar
+
+; First startup
+
+ FLDD 0(%r24),%fr24 ; Cycle 1
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Second startup
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ FSTD %fr26,-88(%sp) ; Cycle 2
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ LDD -104(%sp),%r31
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,-112(%sp)
+
+; Here is the loop.
+
+$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ ADD %r3,%r1,%r1
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ FSTD %fr29,-72(%sp)
+ LDD -96(%sp),%r3
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ ADD %r21,%r3,%r3
+ LDD -56(%sp),%r20
+ STD %r1,UN_SIXTEEN(%r23)
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ FLDD EIGHT(%r24),%fr28
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ STD %r28,UN_EIGHT(%r23)
+ LDD -72(%sp),%r29
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr25,-80(%sp)
+ LDD 0(%r23),%r3
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r22,%r1,%r1
+ ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+
+$ENDLOOP
+
+; Shutdown code, first stage.
+
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,UN_SIXTEEN(%r23)
+; LDD -112(%sp),%r21
+; STD %r21,UN_EIGHT(%r23)
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ CMPIB,= 0,%r26,$ONEMORE
+ FSTD %fr30,-56(%sp)
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ ADD %r3,%r1,%r1 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9
+ LDD -96(%sp),%r3
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN4
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ ADD %r21,%r3,%r3 ; Cycle 6
+ LDD -56(%sp),%r20
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ LDD -72(%sp),%r29
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, second stage.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+
+ STD %r28,UN_EIGHT(%r23) ; Cycle 9
+
+ LDD 0(%r23),%r3 ; Cycle 11
+
+; Shutdown code, third stage.
+
+ LDO SIXTEEN(%r23),%r23
+ ADD %r3,%r22,%r1
+$JOIN1 ADD,DC %r0,%r0,%r21
+ CMPIB,*= 0,%r21,$L0 ; if no overflow, exit
+ STD %r1,UN_SIXTEEN(%r23)
+
+; Final carry propagation
+
+$FINAL1 LDO EIGHT(%r23),%r23
+ LDD UN_SIXTEEN(%r23),%r21
+ ADDI 1,%r21,%r21
+ CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry.
+ STD %r21,UN_SIXTEEN(%r23)
+ B $L0
+ NOP
+
+; Here is the code that handles the difficult cases N=1, N=2, and N=3.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$N_IS_SMALL
+ CMPIB,= 0,%r26,$N_IS_ONE
+ FSTD %fr24,-96(%sp) ; Cycle 10
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ FSTD %fr31,-64(%sp) ; Cycle 12
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ CMPIB,= 2,%r26,$N_IS_THREE
+ FSTD %fr30,-56(%sp)
+
+; N = 2
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ FSTD %fr28,-104(%sp) ; Cycle 3
+ LDD -96(%sp),%r3 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ B $JOIN4
+ ADD %r0,%r0,%r22
+
+$N_IS_THREE
+ FLDD SIXTEEN(%r24),%fr24
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+ B $JOIN3
+ ADD %r0,%r0,%r22
+
+$N_IS_ONE
+ FSTD %fr25,-80(%sp)
+ FSTD %fr27,-48(%sp)
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ B $JOIN5
+ ADD %r0,%r0,%r22
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is quite tricky, because of the way the
+; carry chains and SHRPD chains have been chopped up.
+
+$ONEMORE
+
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+ ADD %r3,%r1,%r1
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+$JOIN3
+ XMPYU %fr9L,%fr24R,%fr24
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ LDD -104(%sp),%r31 ; Cycle 8
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Shutdown code, stage 1-1/2.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN5
+ LDD -96(%sp),%r3 ; moved from cycle 4
+ LDD -80(%sp),%r21
+ ADD %r21,%r3,%r3 ; Cycle 6
+ ADD,DC %r0,%r0,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+ SHRPD %r19,%r3,32,%r3 ; Cycle 8
+ ADD %r21,%r1,%r1 ; Cycle 9
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ LDD 0(%r23),%r3 ; Cycle 11
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, stage 2-1/2.
+
+ ADD,DC %r0,%r4,%r4 ; Cycle 1
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+ STD %r1,UN_SIXTEEN(%r23)
+ ADD,DC %r21,%r4,%r1
+ B $JOIN1
+ LDO EIGHT(%r23),%r23
+
+; exit
+
+$L0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+
+ .PROCEND
+
+; ***************************************************************
+;
+; add_diag_[little/big]
+;
+; ***************************************************************
+
+; The arguments are as follows:
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = vector to square
+; r24 = arg3 = result vector
+
+#ifdef LITTLE_WORDIAN
+add_diag_little
+#else
+add_diag_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately.
+ NOP
+
+; Startup code
+
+ FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body)
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
+ FSTD %fr30,-96(%sp)
+ FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2
+ LDD -88(%sp),%r22 ; Cycle 3
+ LDD -72(%sp),%r31 ; Cycle 4
+ XMPYU %fr7R,%fr7R,%fr28
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -96(%sp),%r20 ; Cycle 6
+ FSTD %fr28,-80(%sp)
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
+ FSTD %fr24,-64(%sp)
+
+; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body".
+
+$DIAGLOOP
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDO SIXTEEN(%r25),%r25
+ LDD 0(%r24),%r1
+ FSTD %fr31,-104(%sp)
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD,DC %r22,%r3,%r3
+ FLDD UN_SIXTEEN(%r25),%fr7
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r1,%r3,%r3
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ LDD -80(%sp),%r21
+ STD %r3,0(%r24)
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -64(%sp),%r29
+ LDD EIGHT(%r24),%r1
+ ADD,DC %r4,%r20,%r20 ; Cycle 6
+ LDD -104(%sp),%r19
+ FSTD %fr29,-88(%sp)
+ ADD %r20,%r1,%r1 ; Cycle 7
+ FSTD %fr27,-72(%sp)
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r28
+ FSTD %fr30,-96(%sp)
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD,DC %r21,%r4,%r4
+ FLDD UN_EIGHT(%r25),%fr7
+ STD %r1,UN_TWENTY_FOUR(%r24)
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r28,%r4,%r4
+ XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4
+ LDD -88(%sp),%r22
+ STD %r4,UN_SIXTEEN(%r24)
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -72(%sp),%r31
+ LDD UN_EIGHT(%r24),%r28
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ LDD -96(%sp),%r20
+ FSTD %fr28,-80(%sp)
+ ADD %r19,%r28,%r28 ; Cycle 7
+ FSTD %fr24,-64(%sp)
+ ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+$ENDDIAGLOOP
+
+ ADD,DC %r0,%r22,%r22
+ CMPIB,= 0,%r26,$ONEMOREDIAG
+ SHRPD %r31,%r0,31,%r3
+
+; Shutdown code, first stage.
+
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ LDD -104(%sp),%r19
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r4,%r1,%r4
+ STD %r4,UN_SIXTEEN(%r24); Cycle 4
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r0,%r22 ; Cycle 8
+ CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit
+ STD %r28,UN_EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG2
+ LDO EIGHT(%r24),%r24
+ LDD UN_EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry.
+ STD %r26,UN_EIGHT(%r24)
+
+ B $Z0
+ NOP
+
+; Here is the code that handles the difficult case N=1.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$DIAG_N_IS_ONE
+
+ LDD -88(%sp),%r22
+ LDD -72(%sp),%r31
+ B $JOINDIAG
+ LDD -96(%sp),%r20
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is the "alternate body". It will, of course,
+; give us opposite registers from the other case, so we need
+; completely different shutdown code.
+
+$ONEMOREDIAG
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ FLDD 0(%r25),%fr7 ; Cycle 2
+ SHRPD %r0,%r31,31,%r4
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ XMPYU %fr7R,%fr7R,%fr29
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ XMPYU %fr7L,%fr7R,%fr27
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -104(%sp),%r19 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ ADD,DC %r4,%r20,%r20
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ ADD %r20,%r1,%r1
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, first stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ FSTD %fr30,-96(%sp)
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ LDD -88(%sp),%r22
+ ADD %r4,%r1,%r4
+ LDD -72(%sp),%r31 ; Cycle 4
+ STD %r4,UN_SIXTEEN(%r24)
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ LDD -96(%sp),%r20 ; Cycle 6
+ ADD,DC %r3,%r19,%r19
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r22,%r22 ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+$JOINDIAG
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r3,%r28,%r3
+ STD %r3,0(%r24) ; Cycle 4
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r0,%r21 ; Cycle 8
+ CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit
+ STD %r1,EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG1
+ LDO EIGHT(%r24),%r24
+ LDD EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry.
+ STD %r26,EIGHT(%r24)
+
+$Z0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+ .PROCEND
+; .ALLOW
+
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+#ifdef LITTLE_WORDIAN
+#ifdef __GNUC__
+; GNU-as (as of 2.19) does not support LONG_RETURN
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+#else
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+#else
+ .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+ .END
+
+
+; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
+;
+; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
+; performs a 64-bit x any-size multiply, and adds the
+; result to an area of memory. That is, it performs
+; something like
+;
+; A B C D
+; * Z
+; __________
+; P Q R S T
+;
+; and then adds the "PQRST" vector into an area of memory,
+; handling all carries.
+;
+; Digression on nomenclature and endian-ness:
+;
+; Each of the capital letters in the above represents a 64-bit
+; quantity. That is, you could think of the discussion as
+; being in terms of radix-16-quintillion arithmetic. The data
+; type being manipulated is "unsigned long long int". This
+; requires the 64-bit extension of the HP-UX C compiler,
+; available at release 10. You need these compiler flags to
+; enable these extensions:
+;
+; -Aa +e +DA2.0 +DS2.0
+;
+; (The first specifies ANSI C, the second enables the
+; extensions, which are beyond ANSI C, and the third and
+; fourth tell the compiler to use whatever features of the
+; PA2.0 architecture it wishes, in order to made the code more
+; efficient. Since the presence of the assembly code will
+; make the program unable to run on anything less than PA2.0,
+; you might as well gain the performance enhancements in the C
+; code as well.)
+;
+; Questions of "endian-ness" often come up, usually in the
+; context of byte ordering in a word. These routines have a
+; similar issue, that could be called "wordian-ness".
+; Independent of byte ordering (PA is always big-endian), one
+; can make two choices when representing extremely large
+; numbers as arrays of 64-bit doublewords in memory.
+;
+; "Little-wordian" layout means that the least significant
+; word of a number is stored at the lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 0
+; | |
+; | |_______address 8
+; |
+; address 32
+;
+; "Big-wordian" means that the most significant word is at the
+; lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 32
+; | |
+; | |_______address 24
+; |
+; address 0
+;
+; When you compile the file, you must specify one or the other, with
+; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
+;
+; Incidentally, you assemble this file as part of your
+; project with the same C compiler as the rest of the program.
+; My "makefile" for a superprecision arithmetic package has
+; the following stuff:
+;
+; # definitions:
+; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
+; CFLAGS = +O3
+; LDFLAGS = -L /usr/lib -Wl,-aarchive
+;
+; # general build rule for ".s" files:
+; .s.o:
+; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
+;
+; # Now any bind step that calls for pa20.o will assemble pa20.s
+;
+; End of digression, back to arithmetic:
+;
+; The way we multiply two huge numbers is, of course, to multiply
+; the "ABCD" vector by each of the "WXYZ" doublewords, adding
+; the result vectors with increasing offsets, the way we learned
+; in school, back before we all used calculators:
+;
+; A B C D
+; * W X Y Z
+; __________
+; P Q R S T
+; E F G H I
+; M N O P Q
+; + R S T U V
+; _______________
+; F I N A L S U M
+;
+; So we call maxpy_PA20_big (in my case; my package is
+; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
+; in turn as the "scalar", and giving the "ABCD" vector each
+; time. We direct it to add its result into an area of memory
+; that we have cleared at the start. We skew the exact
+; location into that area with each call.
+;
+; The prototype for the function is
+;
+; extern void maxpy_PA20_big(
+; int length, /* Number of doublewords in the multiplicand vector. */
+; const long long int *scalaraddr, /* Address to fetch the scalar. */
+; const long long int *multiplicand, /* The multiplicand vector. */
+; long long int *result); /* Where to accumulate the result. */
+;
+; (You should place a copy of this prototype in an include file
+; or in your C file.)
+;
+; Now, IN ALL CASES, the given address for the multiplicand or
+; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
+; That word is, of course, the word at which the routine
+; starts processing. "maxpy_PA20_little" then increases the
+; addresses as it computes. "maxpy_PA20_big" decreases them.
+;
+; In our example above, "length" would be 4 in each case.
+; "multiplicand" would be the "ABCD" vector. Specifically,
+; the address of the element "D". "scalaraddr" would be the
+; address of "W", "X", "Y", or "Z" on the four calls that we
+; would make. (The order doesn't matter, of course.)
+; "result" would be the appropriate address in the result
+; area. When multiplying by "Z", that would be the least
+; significant word. When multiplying by "Y", it would be the
+; next higher word (8 bytes higher if little-wordian; 8 bytes
+; lower if big-wordian), and so on. The size of the result
+; area must be the the sum of the sizes of the multiplicand
+; and multiplier vectors, and must be initialized to zero
+; before we start.
+;
+; Whenever the routine adds its partial product into the result
+; vector, it follows carry chains as far as they need to go.
+;
+; Here is the super-precision multiply routine that I use for
+; my package. The package is big-wordian. I have taken out
+; handling of exponents (it's a floating point package):
+;
+; static void mul_PA20(
+; int size,
+; const long long int *arg1,
+; const long long int *arg2,
+; long long int *result)
+; {
+; int i;
+;
+; for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
+;
+; for (i=0 ; i<size ; i++) {
+; maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
+; }
+; }
diff --git a/security/nss/lib/freebl/mpi/logtab.h b/security/nss/lib/freebl/mpi/logtab.h
new file mode 100644
index 0000000000..24cb13c5b7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/logtab.h
@@ -0,0 +1,28 @@
+/*
+ * logtab.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const float s_logv_2[] = {
+ 0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /* 0 1 2 3 */
+ 0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /* 4 5 6 7 */
+ 0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /* 8 9 10 11 */
+ 0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
+ 0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
+ 0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
+ 0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
+ 0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
+ 0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
+ 0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
+ 0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
+ 0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
+ 0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
+ 0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
+ 0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
+ 0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
+ 0.166666667f
+};
diff --git a/security/nss/lib/freebl/mpi/montmulf.c b/security/nss/lib/freebl/mpi/montmulf.c
new file mode 100644
index 0000000000..ce8fbc31d2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.c
@@ -0,0 +1,286 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef SOLARIS
+#define RF_INLINE_MACROS 1
+#endif
+
+static const double TwoTo16 = 65536.0;
+static const double TwoToMinus16 = 1.0 / 65536.0;
+static const double Zero = 0.0;
+static const double TwoTo32 = 65536.0 * 65536.0;
+static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
+
+#ifdef RF_INLINE_MACROS
+
+double upper32(double);
+double lower32(double, double);
+double mod(double, double, double);
+
+void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+ const double * /* 2^16*/,
+ const double * /* 0 */,
+ double * /*result16*/,
+ double * /* result32 */,
+ float * /*source - should be unsigned int* converted to float* */);
+
+#else
+#ifdef MP_USE_FLOOR
+#include <math.h>
+#else
+#define floor(d) ((double)((unsigned long long)(d)))
+#endif
+
+static double
+upper32(double x)
+{
+ return floor(x * TwoToMinus32);
+}
+
+static double
+lower32(double x, double y)
+{
+ return x - TwoTo32 * floor(x * TwoToMinus32);
+}
+
+static double
+mod(double x, double oneoverm, double m)
+{
+ return x - m * floor(x * oneoverm);
+}
+
+#endif
+
+static void
+cleanup(double *dt, int from, int tlen)
+{
+ int i;
+ double tmp, tmp1, x, x1;
+
+ tmp = tmp1 = Zero;
+ /* original code **
+ for(i=2*from;i<2*tlen-2;i++)
+ {
+ x=dt[i];
+ dt[i]=lower32(x,Zero)+tmp1;
+ tmp1=tmp;
+ tmp=upper32(x);
+ }
+ dt[tlen-2]+=tmp1;
+ dt[tlen-1]+=tmp;
+ **end original code ***/
+ /* new code ***/
+ for (i = 2 * from; i < 2 * tlen; i += 2) {
+ x = dt[i];
+ x1 = dt[i + 1];
+ dt[i] = lower32(x, Zero) + tmp;
+ dt[i + 1] = lower32(x1, Zero) + tmp1;
+ tmp = upper32(x);
+ tmp1 = upper32(x1);
+ }
+ /** end new code **/
+}
+
+void
+conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+{
+ int i;
+ long long t, t1, a, b, c, d;
+
+ t1 = 0;
+ a = (long long)d16[0];
+ b = (long long)d16[1];
+ for (i = 0; i < ilen - 1; i++) {
+ c = (long long)d16[2 * i + 2];
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ d = (long long)d16[2 * i + 3];
+ t1 += (b & 0xffff) << 16;
+ t += (b >> 16) + (t1 >> 32);
+ i32[i] = (unsigned int)t1;
+ t1 = t;
+ a = c;
+ b = d;
+ }
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ t1 += (b & 0xffff) << 16;
+ i32[i] = (unsigned int)t1;
+}
+
+void
+conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+{
+ int i;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++)
+ d32[i] = (double)(i32[i]);
+}
+
+void
+conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+{
+ int i;
+ unsigned int a;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++) {
+ a = i32[i];
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len)
+{
+ int i = 0;
+ unsigned int a;
+
+#pragma pipeloop(0)
+#ifdef RF_INLINE_MACROS
+ for (; i < len - 3; i += 4) {
+ i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+ &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i])));
+ }
+#endif
+ for (; i < len; i++) {
+ a = i32[i];
+ d32[i] = (double)(i32[i]);
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+{
+ long long acc;
+ int i;
+
+ if (i32[len] > 0)
+ i = -1;
+ else {
+ for (i = len - 1; i >= 0; i--) {
+ if (i32[i] != nint[i])
+ break;
+ }
+ }
+ if ((i < 0) || (i32[i] > nint[i])) {
+ acc = 0;
+ for (i = 0; i < len; i++) {
+ acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]);
+ i32[i] = (unsigned int)acc;
+ acc = acc >> 32;
+ }
+ }
+}
+
+/*
+** the lengths of the input arrays should be at least the following:
+** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+** all of them should be different from one another
+**
+*/
+void
+mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0)
+{
+ int i, j, jj;
+ int tmp;
+ double digit, m2j, nextm2j, a, b;
+ double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+
+ pdm1 = &(dm1[0]);
+ pdm2 = &(dm2[0]);
+ pdn = &(dn[0]);
+ pdm2[2 * nlen] = Zero;
+
+ if (nlen != 16) {
+ for (i = 0; i < 4 * nlen + 2; i++)
+ dt[i] = Zero;
+
+ a = dt[0] = pdm1[0] * pdm2[0];
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+
+ pdtj = &(dt[0]);
+ for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn[0] * digit;
+ b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+#pragma pipeloop(0)
+ for (i = 1; i < nlen; i++) {
+ pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
+ }
+ if ((jj == 30)) {
+ cleanup(dt, j / 2 + 1, 2 * nlen + 1);
+ jj = 0;
+ }
+
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ } else {
+ a = dt[0] = pdm1[0] * pdm2[0];
+
+ dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
+ dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] =
+ dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] =
+ dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] =
+ dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] =
+ dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
+ dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] =
+ dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] =
+ dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] =
+ dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] =
+ dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero;
+
+ pdn_0 = pdn[0];
+ pdm1_0 = pdm1[0];
+
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+ pdtj = &(dt[0]);
+
+ for (j = 0; j < 32; j++, pdtj++) {
+
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn_0 * digit;
+ b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+ /**** this loop will be fully unrolled:
+ for(i=1;i<16;i++)
+ {
+ pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+ }
+ *************************************/
+ pdtj[2] += pdm1[1] * m2j + pdn[1] * digit;
+ pdtj[4] += pdm1[2] * m2j + pdn[2] * digit;
+ pdtj[6] += pdm1[3] * m2j + pdn[3] * digit;
+ pdtj[8] += pdm1[4] * m2j + pdn[4] * digit;
+ pdtj[10] += pdm1[5] * m2j + pdn[5] * digit;
+ pdtj[12] += pdm1[6] * m2j + pdn[6] * digit;
+ pdtj[14] += pdm1[7] * m2j + pdn[7] * digit;
+ pdtj[16] += pdm1[8] * m2j + pdn[8] * digit;
+ pdtj[18] += pdm1[9] * m2j + pdn[9] * digit;
+ pdtj[20] += pdm1[10] * m2j + pdn[10] * digit;
+ pdtj[22] += pdm1[11] * m2j + pdn[11] * digit;
+ pdtj[24] += pdm1[12] * m2j + pdn[12] * digit;
+ pdtj[26] += pdm1[13] * m2j + pdn[13] * digit;
+ pdtj[28] += pdm1[14] * m2j + pdn[14] * digit;
+ pdtj[30] += pdm1[15] * m2j + pdn[15] * digit;
+ /* no need for cleenup, cannot overflow */
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ }
+
+ conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1);
+
+ adjust_montf_result(result, nint, nlen);
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h
new file mode 100644
index 0000000000..69bed4acb1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* The functions that are to be called from outside of the .s file have the
+ * following interfaces and array size requirements:
+ */
+
+void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles, so that each double
+ * corresponds to an int. len is the number of items converted.
+ * Does not allocate the output array.
+ * The pointers d32 and i32 should point to arrays of size at least len
+ * (doubles and unsigned ints, respectively)
+ */
+
+void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles so that each element
+ * of the int array is converted to a pair of doubles, the first one
+ * corresponding to the lower (least significant) 16 bits of the int and
+ * the second one corresponding to the upper (most significant) 16 bits of
+ * the 32-bit int. len is the number of ints converted.
+ * Does not allocate the output array.
+ * The pointer d16 should point to an array of doubles of size at least
+ * 2*len and i32 should point an array of ints of size at least len
+ */
+
+void conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len);
+
+/* Does the above two conversions together, it is much faster than doing
+ * both of those in succession
+ */
+
+void mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0);
+
+/* Does the Montgomery multiplication of the numbers stored in the arrays
+ * pointed to by dm1 and dm2, writing the result to the array pointed to by
+ * result. It uses the array pointed to by dt as a temporary work area.
+ * nint should point to the modulus in the array-of-integers representation,
+ * dn should point to its array-of-doubles as obtained as a result of the
+ * function call conv_i32_to_d32(dn, nint, nlen);
+ * nlen is the length of the array containing the modulus.
+ * The representation used for dm1 is the one that is a result of the function
+ * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
+ * result of the function call conv_i32_to_d16(dm2, m2, nlen).
+ * Note that m1 and m2 should both be of length nlen, so they should be
+ * padded with 0's if necessary before the conversion. The result comes in
+ * this form (int representation, padded with 0's).
+ * dn0 is the value of the 16 least significant bits of n0'.
+ * The function does not allocate memory for any of the arrays, so the
+ * pointers should point to arrays with the following minimal sizes:
+ * result - nlen+1
+ * dm1 - nlen
+ * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons )
+ * dt - 4*nlen+2
+ * dn - nlen
+ * nint - nlen
+ * No two arrays should point to overlapping areas of memory.
+ */
diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il
new file mode 100644
index 0000000000..4952d0fb82
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s
new file mode 100644
index 0000000000..69d2a3c51b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.s
@@ -0,0 +1,1938 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".data",#alloc,#write
+ .align 8
+TwoTo16: /* frequency 1.0 confidence 0.0 */
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+TwoToMinus16: /* frequency 1.0 confidence 0.0 */
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+Zero: /* frequency 1.0 confidence 0.0 */
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+TwoTo32: /* frequency 1.0 confidence 0.0 */
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+TwoToMinus32: /* frequency 1.0 confidence 0.0 */
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE cleanup
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global cleanup
+ cleanup: /* frequency 1.0 confidence 0.0 */
+! FILE montmulf.c
+
+! 1 !#define RF_INLINE_MACROS
+! 3 !static double TwoTo16=65536.0;
+! 4 !static double TwoToMinus16=1.0/65536.0;
+! 5 !static double Zero=0.0;
+! 6 !static double TwoTo32=65536.0*65536.0;
+! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0);
+! 9 !#ifdef RF_INLINE_MACROS
+! 11 !double upper32(double);
+! 12 !double lower32(double, double);
+! 13 !double mod(double, double, double);
+! 15 !#else
+! 17 !static double upper32(double x)
+! 18 !{
+! 19 ! return floor(x*TwoToMinus32);
+! 20 !}
+! 22 !static double lower32(double x, double y)
+! 23 !{
+! 24 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 25 !}
+! 27 !static double mod(double x, double oneoverm, double m)
+! 28 !{
+! 29 ! return x-m*floor(x*oneoverm);
+! 30 !}
+! 32 !#endif
+! 35 !void cleanup(double *dt, int from, int tlen)
+! 36 !{
+! 37 ! int i;
+! 38 ! double tmp,tmp1,x,x1;
+! 40 ! tmp=tmp1=Zero;
+
+/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2
+
+! 41 ! /* original code **
+! 42 ! for(i=2*from;i<2*tlen-2;i++)
+! 43 ! {
+! 44 ! x=dt[i];
+! 45 ! dt[i]=lower32(x,Zero)+tmp1;
+! 46 ! tmp1=tmp;
+! 47 ! tmp=upper32(x);
+! 48 ! }
+! 49 ! dt[tlen-2]+=tmp1;
+! 50 ! dt[tlen-1]+=tmp;
+! 51 ! **end original code ***/
+! 52 ! /* new code ***/
+! 53 ! for(i=2*from;i<2*tlen;i+=2)
+
+/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3
+/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0
+/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2
+/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4
+/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1
+/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4
+/* 0x001c 53 ( 3 4) */ cmp %g4,%g3
+/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56
+/* 0x0024 ( 4 5) */ fmovd %f0,%f2
+/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1
+/* 0x002c ( 4 5) */ sub %g3,1,%g3
+
+! 54 ! {
+! 55 ! x=dt[i];
+
+/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8
+ .L900000114: /* frequency 6.4 confidence 0.0 */
+/* 0x0034 ( 0 3) */ fdtox %f8,%f6
+
+! 56 ! x1=dt[i+1];
+
+/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10
+
+! 57 ! dt[i]=lower32(x,Zero)+tmp;
+! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 59 ! tmp=upper32(x);
+! 60 ! tmp1=upper32(x1);
+
+/* 0x003c 60 ( 0 1) */ add %g4,2,%g4
+/* 0x0040 ( 1 4) */ fdtox %f8,%f8
+/* 0x0044 ( 1 2) */ cmp %g4,%g3
+/* 0x0048 ( 5 6) */ fmovs %f0,%f6
+/* 0x004c ( 7 10) */ fxtod %f6,%f6
+/* 0x0050 ( 8 11) */ fdtox %f10,%f0
+/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2
+/* 0x0058 (10 11) */ std %f2,[%g1]
+/* 0x005c (12 15) */ ldd [%g2],%f2
+/* 0x0060 (14 15) */ fmovs %f2,%f0
+/* 0x0064 (16 19) */ fxtod %f0,%f6
+/* 0x0068 (17 20) */ fdtox %f10,%f0
+/* 0x006c (18 21) */ fitod %f8,%f2
+/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4
+/* 0x0074 (19 20) */ std %f4,[%g1+8]
+/* 0x0078 60 (19 20) */ add %g1,16,%g1
+/* 0x007c (20 23) */ fitod %f0,%f4
+/* 0x0080 (20 23) */ ldd [%g2],%f0
+/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86
+/* 0x0088 (21 24) */ ldd [%g1],%f8
+ .L77000116: /* frequency 1.0 confidence 0.0 */
+/* 0x008c ( 0 2) */ retl ! Result =
+/* 0x0090 ( 1 2) */ nop
+/* 0x0094 0 ( 0 0) */ .type cleanup,2
+/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-136,%sp
+
+! 61 ! }
+! 62 ! /** end new code **/
+! 63 !}
+! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 67 !{
+! 68 !int i;
+! 69 !long long t, t1, a, b, c, d;
+! 71 ! t1=0;
+! 72 ! a=(long long)d16[0];
+
+/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0
+
+! 73 ! b=(long long)d16[1];
+! 74 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 74 ( 1 2) */ sub %i3,1,%g2
+/* 0x000c 67 ( 1 2) */ or %g0,%i0,%g5
+/* 0x0010 74 ( 2 3) */ cmp %g2,0
+/* 0x0014 71 ( 2 3) */ or %g0,0,%o4
+/* 0x0018 72 ( 3 6) */ fdtox %f0,%f0
+/* 0x001c ( 3 4) */ std %f0,[%sp+120]
+/* 0x0020 74 ( 3 4) */ or %g0,0,%o7
+/* 0x0024 67 ( 4 5) */ or %g0,%i3,%o0
+/* 0x0028 ( 4 5) */ sub %i3,2,%o2
+/* 0x002c 73 ( 5 8) */ ldd [%i1+8],%f0
+/* 0x0030 67 ( 5 6) */ sethi %hi(0xfc00),%o0
+/* 0x0034 ( 5 6) */ add %o2,1,%g3
+/* 0x0038 ( 6 7) */ add %o0,1023,%o1
+/* 0x003c ( 6 7) */ or %g0,%g5,%o5
+/* 0x0040 73 ( 7 10) */ fdtox %f0,%f0
+/* 0x0044 ( 7 8) */ std %f0,[%sp+112]
+/* 0x0048 72 (11 13) */ ldx [%sp+120],%g4
+/* 0x004c 73 (12 14) */ ldx [%sp+112],%g1
+/* 0x0050 74 (12 13) */ ble,pt %icc,.L900000214 ! tprob=0.56
+/* 0x0054 (12 13) */ sethi %hi(0xfc00),%g2
+/* 0x0058 67 (13 14) */ or %g0,-1,%g2
+/* 0x005c 74 (13 14) */ cmp %g3,3
+/* 0x0060 67 (14 15) */ srl %g2,0,%o3
+/* 0x0064 (14 15) */ or %g0,%i1,%g2
+/* 0x0068 74 (14 15) */ bl,pn %icc,.L77000134 ! tprob=0.44
+/* 0x006c (15 18) */ ldd [%g2+16],%f0
+
+! 75 ! {
+! 76 ! c=(long long)d16[2*i+2];
+! 77 ! t1+=a&0xffffffff;
+! 78 ! t=(a>>32);
+! 79 ! d=(long long)d16[2*i+3];
+! 80 ! t1+=(b&0xffff)<<16;
+
+/* 0x0070 80 (15 16) */ and %g1,%o1,%o0
+
+! 81 ! t+=(b>>16)+(t1>>32);
+! 82 ! i32[i]=t1&0xffffffff;
+! 83 ! t1=t;
+! 84 ! a=c;
+! 85 ! b=d;
+
+/* 0x0074 85 (15 16) */ add %g2,16,%g2
+/* 0x0078 80 (16 17) */ sllx %o0,16,%g3
+/* 0x007c 77 (16 17) */ and %g4,%o3,%o0
+/* 0x0080 76 (17 20) */ fdtox %f0,%f0
+/* 0x0084 (17 18) */ std %f0,[%sp+104]
+/* 0x0088 74 (17 18) */ add %o0,%g3,%o4
+/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2
+/* 0x0090 81 (18 19) */ srax %g1,16,%o0
+/* 0x0094 82 (18 19) */ and %o4,%o3,%o7
+/* 0x0098 81 (19 20) */ stx %o0,[%sp+112]
+/* 0x009c (19 20) */ srax %o4,32,%o0
+/* 0x00a0 85 (19 20) */ add %g5,4,%o5
+/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120]
+/* 0x00a8 78 (20 21) */ srax %g4,32,%o4
+/* 0x00ac 79 (20 23) */ fdtox %f2,%f0
+/* 0x00b0 (21 22) */ std %f0,[%sp+96]
+/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0
+/* 0x00b8 (23 25) */ ldx [%sp+120],%g4
+/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3
+/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4
+/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1
+/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4
+/* 0x00cc 82 (27 28) */ st %o7,[%g5]
+/* 0x00d0 (27 28) */ or %g0,1,%o7
+/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4
+ .L900000209: /* frequency 64.0 confidence 0.0 */
+/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0
+/* 0x00dc 85 (17 18) */ add %o7,1,%o7
+/* 0x00e0 (17 18) */ add %o5,4,%o5
+/* 0x00e4 (18 18) */ cmp %o7,%o2
+/* 0x00e8 (18 19) */ add %g2,16,%g2
+/* 0x00ec 76 (19 22) */ fdtox %f0,%f0
+/* 0x00f0 (20 21) */ std %f0,[%sp+104]
+/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0
+/* 0x00f8 (23 26) */ fdtox %f0,%f0
+/* 0x00fc (24 25) */ std %f0,[%sp+96]
+/* 0x0100 80 (25 26) */ and %g1,%o1,%g3
+/* 0x0104 (26 27) */ sllx %g3,16,%g3
+/* 0x0108 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x010c 77 (26 27) */ and %g4,%o3,%g3
+/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128]
+/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x0118 (27 27) */ add %g3,%o7,%g3
+/* 0x011c ( 0 0) */ ldx [%sp+128],%o7
+/* 0x0120 81 (28 29) */ srax %g1,16,%g1
+/* 0x0124 74 (28 28) */ add %g3,%o4,%g3
+/* 0x0128 81 (29 30) */ srax %g3,32,%o4
+/* 0x012c ( 0 0) */ stx %o4,[%sp+112]
+/* 0x0130 78 (30 31) */ srax %g4,32,%o4
+/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4
+/* 0x0138 (30 31) */ add %g1,%g4,%g4
+/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1
+/* 0x0140 81 (31 32) */ add %o4,%g4,%o4
+/* 0x0144 82 (32 33) */ and %g3,%o3,%g3
+/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4
+/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50
+/* 0x0150 (33 34) */ st %g3,[%o5-4]
+ .L900000212: /* frequency 8.0 confidence 0.0 */
+/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00
+/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000134: /* frequency 0.7 confidence 0.0 */
+ .L900000213: /* frequency 6.4 confidence 0.0 */
+/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0
+/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3
+/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0
+/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0
+/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104]
+/* 0x0170 85 ( 1 2) */ add %o7,1,%o7
+/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x017c 85 ( 2 3) */ add %g2,16,%g2
+/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128]
+/* 0x0188 ( 4 5) */ srax %g1,16,%o0
+/* 0x018c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3
+/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0
+/* 0x0198 ( 5 6) */ stx %o0,[%sp+120]
+/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0
+/* 0x01a0 ( 6 7) */ std %f0,[%sp+96]
+/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4
+/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7
+/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4
+/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1
+/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4
+/* 0x01b8 (11 13) */ ldx [%sp+128],%o7
+/* 0x01bc (11 12) */ add %o4,%g4,%o4
+/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0
+/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4
+/* 0x01c8 82 (13 14) */ st %g3,[%o5]
+/* 0x01cc 85 (13 14) */ add %o5,4,%o5
+/* 0x01d0 (13 14) */ cmp %o7,%o2
+/* 0x01d4 (14 15) */ or %g0,%o0,%g1
+/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86
+/* 0x01dc (14 17) */ ldd [%g2+16],%f0
+ .L77000127: /* frequency 1.0 confidence 0.0 */
+
+! 86 ! }
+! 87 ! t1+=a&0xffffffff;
+! 88 ! t=(a>>32);
+! 89 ! t1+=(b&0xffff)<<16;
+! 90 ! i32[i]=t1&0xffffffff;
+
+/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000214: /* frequency 1.0 confidence 0.0 */
+/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3
+/* 0x01e8 ( 0 1) */ add %g2,1023,%g2
+/* 0x01ec ( 1 2) */ srl %g3,0,%g3
+/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2
+/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4
+/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2
+/* 0x01fc ( 3 4) */ add %o4,%g4,%g4
+/* 0x0200 ( 4 5) */ add %g4,%g2,%g2
+/* 0x0204 ( 5 6) */ sll %o7,2,%g4
+/* 0x0208 ( 5 6) */ and %g2,%g3,%g2
+/* 0x020c ( 6 7) */ st %g2,[%g5+%g4]
+/* 0x0210 ( 7 9) */ ret ! Result =
+/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0
+/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2
+/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ orcc %g0,%o2,%g1
+
+! 92 !}
+! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 95 !{
+! 96 !int i;
+! 98 !#pragma pipeloop(0)
+! 99 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 99 ( 0 1) */ ble,pt %icc,.L77000140 ! tprob=0.56
+/* 0x0008 ( 0 1) */ nop
+/* 0x000c ( 1 2) */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0010 95 ( 1 2) */ or %g0,%o1,%g4
+/* 0x0014 99 ( 2 3) */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x0018 ( 2 3) */ or %g0,0,%o5
+/* 0x001c 95 ( 3 4) */ or %g0,%o0,%g5
+/* 0x0020 99 ( 3 4) */ sub %o2,1,%g3
+/* 0x0024 ( 4 5) */ cmp %o2,9
+/* 0x0028 ( 4 5) */ bl,pn %icc,.L77000144 ! tprob=0.44
+/* 0x002c ( 4 7) */ ldd [%g2],%f8
+/* 0x0030 ( 5 8) */ ld [%o1],%f7
+/* 0x0034 ( 5 6) */ add %o1,16,%g4
+/* 0x0038 ( 5 6) */ sub %o2,5,%g1
+/* 0x003c ( 6 9) */ ld [%o1+4],%f5
+/* 0x0040 ( 6 7) */ or %g0,4,%o5
+/* 0x0044 ( 7 10) */ ld [%o1+8],%f3
+/* 0x0048 ( 7 8) */ fmovs %f8,%f6
+/* 0x004c ( 8 11) */ ld [%o1+12],%f1
+ .L900000305: /* frequency 64.0 confidence 0.0 */
+/* 0x0050 ( 8 16) */ ld [%g4],%f11
+/* 0x0054 ( 8 9) */ add %o5,5,%o5
+/* 0x0058 ( 8 9) */ add %g4,20,%g4
+/* 0x005c ( 8 11) */ fsubd %f6,%f8,%f6
+/* 0x0060 ( 9 10) */ std %f6,[%g5]
+/* 0x0064 ( 9 9) */ cmp %o5,%g1
+/* 0x0068 ( 9 10) */ add %g5,40,%g5
+/* 0x006c ( 0 0) */ fmovs %f8,%f4
+/* 0x0070 (10 18) */ ld [%g4-16],%f7
+/* 0x0074 (10 13) */ fsubd %f4,%f8,%f12
+/* 0x0078 ( 0 0) */ fmovs %f8,%f2
+/* 0x007c (11 12) */ std %f12,[%g5-32]
+/* 0x0080 (12 20) */ ld [%g4-12],%f5
+/* 0x0084 (12 15) */ fsubd %f2,%f8,%f12
+/* 0x0088 ( 0 0) */ fmovs %f8,%f0
+/* 0x008c (13 14) */ std %f12,[%g5-24]
+/* 0x0090 (14 22) */ ld [%g4-8],%f3
+/* 0x0094 (14 17) */ fsubd %f0,%f8,%f12
+/* 0x0098 ( 0 0) */ fmovs %f8,%f10
+/* 0x009c (15 16) */ std %f12,[%g5-16]
+/* 0x00a0 (16 24) */ ld [%g4-4],%f1
+/* 0x00a4 (16 19) */ fsubd %f10,%f8,%f10
+/* 0x00a8 ( 0 0) */ fmovs %f8,%f6
+/* 0x00ac (17 18) */ ble,pt %icc,.L900000305 ! tprob=0.50
+/* 0x00b0 (17 18) */ std %f10,[%g5-8]
+ .L900000308: /* frequency 8.0 confidence 0.0 */
+/* 0x00b4 ( 0 1) */ fmovs %f8,%f4
+/* 0x00b8 ( 0 1) */ add %g5,32,%g5
+/* 0x00bc ( 0 1) */ cmp %o5,%g3
+/* 0x00c0 ( 1 2) */ fmovs %f8,%f2
+/* 0x00c4 ( 2 3) */ fmovs %f8,%f0
+/* 0x00c8 ( 4 7) */ fsubd %f6,%f8,%f6
+/* 0x00cc ( 4 5) */ std %f6,[%g5-32]
+/* 0x00d0 ( 5 8) */ fsubd %f4,%f8,%f4
+/* 0x00d4 ( 5 6) */ std %f4,[%g5-24]
+/* 0x00d8 ( 6 9) */ fsubd %f2,%f8,%f2
+/* 0x00dc ( 6 7) */ std %f2,[%g5-16]
+/* 0x00e0 ( 7 10) */ fsubd %f0,%f8,%f0
+/* 0x00e4 ( 7 8) */ bg,pn %icc,.L77000140 ! tprob=0.14
+/* 0x00e8 ( 7 8) */ std %f0,[%g5-8]
+ .L77000144: /* frequency 0.7 confidence 0.0 */
+/* 0x00ec ( 0 3) */ ld [%g4],%f1
+ .L900000309: /* frequency 6.4 confidence 0.0 */
+/* 0x00f0 ( 0 3) */ ldd [%g2],%f8
+/* 0x00f4 ( 0 1) */ add %o5,1,%o5
+/* 0x00f8 ( 0 1) */ add %g4,4,%g4
+/* 0x00fc ( 1 2) */ cmp %o5,%g3
+/* 0x0100 ( 2 3) */ fmovs %f8,%f0
+/* 0x0104 ( 4 7) */ fsubd %f0,%f8,%f0
+/* 0x0108 ( 4 5) */ std %f0,[%g5]
+/* 0x010c ( 4 5) */ add %g5,8,%g5
+/* 0x0110 ( 4 5) */ ble,a,pt %icc,.L900000309 ! tprob=0.86
+/* 0x0114 ( 6 9) */ ld [%g4],%f1
+ .L77000140: /* frequency 1.0 confidence 0.0 */
+/* 0x0118 ( 0 2) */ retl ! Result =
+/* 0x011c ( 1 2) */ nop
+/* 0x0120 0 ( 0 0) */ .type conv_i32_to_d32,2
+/* 0x0120 ( 0 0) */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ orcc %g0,%i2,%o0
+
+! 100 !}
+! 103 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 104 !{
+! 105 !int i;
+! 106 !unsigned int a;
+! 108 !#pragma pipeloop(0)
+! 109 ! for(i=0;i<len;i++)
+
+/* 0x0008 109 ( 1 2) */ ble,pt %icc,.L77000150 ! tprob=0.56
+/* 0x000c ( 1 2) */ nop
+/* 0x0010 ( 2 3) */ sub %o0,1,%o5
+/* 0x0014 ( 2 3) */ sethi %hi(0xfc00),%g2
+
+! 110 ! {
+! 111 ! a=i32[i];
+! 112 ! d16[2*i]=(double)(a&0xffff);
+! 113 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0
+/* 0x001c ( 3 4) */ add %o5,1,%g3
+/* 0x0020 ( 4 5) */ add %g2,1023,%o4
+/* 0x0024 109 ( 4 5) */ or %g0,0,%g1
+/* 0x0028 ( 5 6) */ cmp %g3,3
+/* 0x002c ( 5 6) */ or %g0,%i1,%o7
+/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3
+/* 0x0034 ( 6 7) */ or %g0,%i0,%g2
+/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44
+/* 0x003c ( 7 8) */ add %o7,4,%o0
+/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0
+/* 0x0044 113 ( 7 8) */ or %g0,1,%g1
+/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1
+/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7
+/* 0x0050 112 (10 11) */ and %o1,%o4,%o0
+ .L900000406: /* frequency 64.0 confidence 0.0 */
+/* 0x0054 112 (22 23) */ st %o0,[%sp+96]
+/* 0x0058 113 (22 23) */ add %g1,1,%g1
+/* 0x005c (22 23) */ add %g2,16,%g2
+/* 0x0060 (23 23) */ cmp %g1,%o5
+/* 0x0064 (23 24) */ add %o7,4,%o7
+/* 0x0068 112 (29 31) */ ld [%sp+96],%f3
+/* 0x006c ( 0 0) */ fmovs %f0,%f2
+/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2
+/* 0x0074 113 (32 33) */ srl %o1,16,%o0
+/* 0x0078 112 (32 33) */ std %f2,[%g2-16]
+/* 0x007c 113 (33 34) */ st %o0,[%sp+92]
+/* 0x0080 (40 42) */ ld [%sp+92],%f3
+/* 0x0084 111 (41 43) */ ld [%o7-4],%o1
+/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2
+/* 0x008c (42 45) */ fsubd %f2,%f0,%f2
+/* 0x0090 112 (43 44) */ and %o1,%o4,%o0
+/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50
+/* 0x0098 (43 44) */ std %f2,[%g2-8]
+ .L900000409: /* frequency 8.0 confidence 0.0 */
+/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96]
+/* 0x00a0 ( 0 1) */ fmovs %f0,%f2
+/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2
+/* 0x00a8 ( 1 2) */ srl %o1,16,%o0
+/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x00b4 ( 6 7) */ std %f2,[%g2-16]
+/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x00bc (10 11) */ fmovs %f0,%f2
+/* 0x00c0 (11 14) */ ld [%sp+92],%f3
+/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x00c8 (13 14) */ std %f0,[%g2-8]
+/* 0x00cc (14 16) */ ret ! Result =
+/* 0x00d0 (16 17) */ restore %g0,%g0,%g0
+ .L77000154: /* frequency 0.7 confidence 0.0 */
+/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0
+ .L900000410: /* frequency 6.4 confidence 0.0 */
+/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1
+/* 0x00dc ( 0 1) */ st %o1,[%sp+96]
+/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1
+/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0
+/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0
+/* 0x00ec ( 1 2) */ add %o7,4,%o7
+/* 0x00f0 ( 2 3) */ cmp %g1,%o5
+/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2
+/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x0100 ( 6 7) */ std %f2,[%g2]
+/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x0108 (10 11) */ fmovs %f0,%f2
+/* 0x010c (11 14) */ ld [%sp+92],%f3
+/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x0114 (13 14) */ std %f0,[%g2+8]
+/* 0x0118 (13 14) */ add %g2,16,%g2
+/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86
+/* 0x0120 (14 17) */ ld [%o7],%o0
+ .L77000150: /* frequency 1.0 confidence 0.0 */
+/* 0x0124 ( 0 2) */ ret ! Result =
+/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2
+/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ or %g0,%i3,%i4
+/* 0x0008 ( 1 2) */ or %g0,%i2,%g1
+
+! 114 ! }
+! 115 !}
+! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! 119 ! double * /* 0 */,
+! 120 ! double * /*result16*/, double * /* result32 */,
+! 121 ! float * /*source - should be unsigned int*
+! 122 ! converted to float* */);
+! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 127 ! unsigned int *i32, int len)
+! 128 !{
+! 129 !int i;
+! 130 !unsigned int a;
+! 132 !#pragma pipeloop(0)
+! 133 ! for(i=0;i<len-3;i+=4)
+
+/* 0x000c 133 ( 2 3) */ sub %i4,3,%g2
+/* 0x0010 ( 2 3) */ or %g0,0,%o7
+/* 0x0014 ( 3 4) */ cmp %g2,0
+/* 0x0018 128 ( 3 4) */ or %g0,%i0,%i3
+/* 0x001c 133 ( 3 4) */ ble,pt %icc,.L900000515 ! tprob=0.56
+/* 0x0020 ( 4 5) */ cmp %o7,%i4
+
+! 134 ! {
+! 135 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 136 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0024 136 ( 4 5) */ sethi %hi(Zero),%g2
+/* 0x0028 133 ( 5 6) */ or %g0,%g1,%o3
+/* 0x002c ( 5 6) */ sub %i4,4,%o2
+/* 0x0030 136 ( 6 7) */ add %g2,%lo(Zero),%o1
+/* 0x0034 133 ( 6 7) */ or %g0,0,%o5
+/* 0x0038 ( 7 8) */ or %g0,0,%o4
+/* 0x003c 136 ( 7 8) */ or %g0,%o3,%g4
+ .L900000514: /* frequency 6.4 confidence 0.0 */
+/* 0x0040 ( 0 3) */ ldd [%o1],%f2
+/* 0x0044 136 ( 0 1) */ add %i3,%o5,%g2
+/* 0x0048 ( 0 1) */ add %i1,%o4,%g3
+/* 0x004c ( 1 4) */ ldd [%o1-8],%f0
+/* 0x0050 ( 1 2) */ add %o7,4,%o7
+/* 0x0054 ( 1 2) */ add %o3,16,%o3
+/* 0x0058 ( 2 3) */ fmovd %f2,%f14
+/* 0x005c ( 2 5) */ ld [%g4],%f15
+/* 0x0060 ( 2 3) */ cmp %o7,%o2
+/* 0x0064 ( 3 4) */ fmovd %f2,%f10
+/* 0x0068 ( 3 6) */ ld [%g4+4],%f11
+/* 0x006c ( 4 5) */ fmovd %f2,%f6
+/* 0x0070 ( 4 7) */ ld [%g4+8],%f7
+/* 0x0074 ( 5 8) */ ld [%g4+12],%f3
+/* 0x0078 ( 5 8) */ fxtod %f14,%f14
+/* 0x007c ( 6 9) */ fxtod %f10,%f10
+/* 0x0080 ( 6 9) */ ldd [%o1-16],%f16
+/* 0x0084 ( 7 10) */ fxtod %f6,%f6
+/* 0x0088 ( 7 8) */ std %f14,[%i3+%o5]
+/* 0x008c ( 7 8) */ add %o5,32,%o5
+/* 0x0090 ( 8 11) */ fxtod %f2,%f2
+/* 0x0094 ( 8 11) */ fmuld %f0,%f14,%f12
+/* 0x0098 ( 8 9) */ std %f10,[%g2+8]
+/* 0x009c ( 9 12) */ fmuld %f0,%f10,%f8
+/* 0x00a0 ( 9 10) */ std %f6,[%g2+16]
+/* 0x00a4 (10 13) */ fmuld %f0,%f6,%f4
+/* 0x00a8 (10 11) */ std %f2,[%g2+24]
+/* 0x00ac (11 14) */ fmuld %f0,%f2,%f0
+/* 0x00b0 (11 14) */ fdtox %f12,%f12
+/* 0x00b4 (12 15) */ fdtox %f8,%f8
+/* 0x00b8 (13 16) */ fdtox %f4,%f4
+/* 0x00bc (14 17) */ fdtox %f0,%f0
+/* 0x00c0 (15 18) */ fxtod %f12,%f12
+/* 0x00c4 (15 16) */ std %f12,[%g3+8]
+/* 0x00c8 (16 19) */ fxtod %f8,%f8
+/* 0x00cc (16 17) */ std %f8,[%g3+24]
+/* 0x00d0 (17 20) */ fxtod %f4,%f4
+/* 0x00d4 (17 18) */ std %f4,[%g3+40]
+/* 0x00d8 (18 21) */ fxtod %f0,%f0
+/* 0x00dc (18 21) */ fmuld %f12,%f16,%f12
+/* 0x00e0 (18 19) */ std %f0,[%g3+56]
+/* 0x00e4 (19 22) */ fmuld %f8,%f16,%f8
+/* 0x00e8 (20 23) */ fmuld %f4,%f16,%f4
+/* 0x00ec (21 24) */ fmuld %f0,%f16,%f0
+/* 0x00f0 (21 24) */ fsubd %f14,%f12,%f12
+/* 0x00f4 (21 22) */ std %f12,[%i1+%o4]
+/* 0x00f8 (22 25) */ fsubd %f10,%f8,%f8
+/* 0x00fc (22 23) */ std %f8,[%g3+16]
+/* 0x0100 (22 23) */ add %o4,64,%o4
+/* 0x0104 (23 26) */ fsubd %f6,%f4,%f4
+/* 0x0108 (23 24) */ std %f4,[%g3+32]
+/* 0x010c (24 27) */ fsubd %f2,%f0,%f0
+/* 0x0110 (24 25) */ std %f0,[%g3+48]
+/* 0x0114 (24 25) */ ble,pt %icc,.L900000514 ! tprob=0.86
+/* 0x0118 (25 26) */ or %g0,%o3,%g4
+ .L77000159: /* frequency 1.0 confidence 0.0 */
+
+! 137 ! }
+! 138 ! for(;i<len;i++)
+
+/* 0x011c 138 ( 0 1) */ cmp %o7,%i4
+ .L900000515: /* frequency 1.0 confidence 0.0 */
+/* 0x0120 138 ( 0 1) */ bge,pt %icc,.L77000164 ! tprob=0.56
+/* 0x0124 ( 0 1) */ nop
+
+! 139 ! {
+! 140 ! a=i32[i];
+! 141 ! d32[i]=(double)(i32[i]);
+! 142 ! d16[2*i]=(double)(a&0xffff);
+! 143 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1
+/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0
+/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0
+/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3
+/* 0x0138 ( 2 3) */ sll %o7,2,%g2
+/* 0x013c ( 2 3) */ add %o0,1023,%o3
+/* 0x0140 ( 3 4) */ sll %o7,3,%g4
+/* 0x0144 ( 3 4) */ cmp %g3,3
+/* 0x0148 ( 4 5) */ add %g1,%g2,%o0
+/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2
+/* 0x0150 ( 5 6) */ add %i3,%g4,%o4
+/* 0x0154 ( 5 6) */ sub %i4,1,%o1
+/* 0x0158 ( 6 7) */ sll %o7,4,%g5
+/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44
+/* 0x0160 ( 7 8) */ add %i1,%g5,%o5
+/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3
+/* 0x0168 143 ( 7 8) */ add %o4,8,%o4
+/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1
+/* 0x0170 143 ( 8 9) */ add %o5,16,%o5
+/* 0x0174 ( 8 9) */ add %o7,1,%o7
+/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2
+/* 0x017c 143 ( 9 10) */ add %o0,4,%o0
+/* 0x0180 142 (10 11) */ and %g1,%o3,%g2
+/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x0188 (11 12) */ std %f2,[%o4-8]
+/* 0x018c 143 (11 12) */ srl %g1,16,%g1
+/* 0x0190 142 (12 13) */ st %g2,[%sp+96]
+/* 0x0194 (15 16) */ fmovs %f0,%f2
+/* 0x0198 (16 19) */ ld [%sp+96],%f3
+/* 0x019c (18 21) */ fsubd %f2,%f0,%f2
+/* 0x01a0 (18 19) */ std %f2,[%o5-16]
+/* 0x01a4 143 (19 20) */ st %g1,[%sp+92]
+/* 0x01a8 (22 23) */ fmovs %f0,%f2
+/* 0x01ac (23 26) */ ld [%sp+92],%f3
+/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2
+/* 0x01b4 (25 26) */ std %f2,[%o5-8]
+ .L900000509: /* frequency 64.0 confidence 0.0 */
+/* 0x01b8 141 (26 28) */ ld [%o0],%f3
+/* 0x01bc 143 (26 27) */ add %o7,2,%o7
+/* 0x01c0 (26 27) */ add %o5,32,%o5
+/* 0x01c4 140 (27 29) */ ld [%o0],%g1
+/* 0x01c8 143 (27 27) */ cmp %o7,%o1
+/* 0x01cc (27 28) */ add %o4,16,%o4
+/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2
+/* 0x01d8 (29 30) */ std %f2,[%o4-16]
+/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2
+/* 0x01e0 (30 31) */ st %g2,[%sp+96]
+/* 0x01e4 (37 39) */ ld [%sp+96],%f3
+/* 0x01e8 ( 0 0) */ fmovs %f0,%f2
+/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2
+/* 0x01f0 143 (40 41) */ srl %g1,16,%g1
+/* 0x01f4 142 (40 41) */ std %f2,[%o5-32]
+/* 0x01f8 143 (41 42) */ st %g1,[%sp+92]
+/* 0x01fc (48 50) */ ld [%sp+92],%f3
+/* 0x0200 ( 0 0) */ fmovs %f0,%f2
+/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2
+/* 0x0208 (51 52) */ std %f2,[%o5-24]
+/* 0x020c (51 52) */ add %o0,4,%o0
+/* 0x0210 141 (52 54) */ ld [%o0],%f3
+/* 0x0214 140 (53 55) */ ld [%o0],%g1
+/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x021c (54 57) */ fsubd %f2,%f0,%f2
+/* 0x0220 (55 56) */ std %f2,[%o4-8]
+/* 0x0224 142 (55 56) */ and %g1,%o3,%g2
+/* 0x0228 (56 57) */ st %g2,[%sp+96]
+/* 0x022c (63 65) */ ld [%sp+96],%f3
+/* 0x0230 ( 0 0) */ fmovs %f0,%f2
+/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2
+/* 0x0238 143 (66 67) */ srl %g1,16,%g1
+/* 0x023c 142 (66 67) */ std %f2,[%o5-16]
+/* 0x0240 143 (67 68) */ st %g1,[%sp+92]
+/* 0x0244 (74 76) */ ld [%sp+92],%f3
+/* 0x0248 ( 0 0) */ fmovs %f0,%f2
+/* 0x024c (76 79) */ fsubd %f2,%f0,%f2
+/* 0x0250 (77 78) */ std %f2,[%o5-8]
+/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50
+/* 0x0258 (77 78) */ add %o0,4,%o0
+ .L900000512: /* frequency 8.0 confidence 0.0 */
+/* 0x025c 143 ( 0 1) */ cmp %o7,%i4
+/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14
+/* 0x0264 ( 0 1) */ nop
+ .L77000161: /* frequency 0.7 confidence 0.0 */
+/* 0x0268 141 ( 0 3) */ ld [%o0],%f3
+ .L900000513: /* frequency 6.4 confidence 0.0 */
+/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0
+/* 0x0270 143 ( 0 1) */ add %o7,1,%o7
+/* 0x0274 140 ( 1 4) */ ld [%o0],%o1
+/* 0x0278 143 ( 1 2) */ add %o0,4,%o0
+/* 0x027c ( 1 2) */ cmp %o7,%i4
+/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2
+/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1
+/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2
+/* 0x028c ( 4 5) */ std %f2,[%o4]
+/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1
+/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96]
+/* 0x0298 143 ( 5 6) */ add %o4,8,%o4
+/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2
+/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3
+/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x02a8 (11 12) */ std %f2,[%o5]
+/* 0x02ac 143 (12 13) */ st %o1,[%sp+92]
+/* 0x02b0 (15 16) */ fmovs %f0,%f2
+/* 0x02b4 (16 19) */ ld [%sp+92],%f3
+/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0
+/* 0x02bc (18 19) */ std %f0,[%o5+8]
+/* 0x02c0 (18 19) */ add %o5,16,%o5
+/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86
+/* 0x02c8 (19 22) */ ld [%o0],%f3
+ .L77000164: /* frequency 1.0 confidence 0.0 */
+/* 0x02cc ( 0 2) */ ret ! Result =
+/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2
+/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global adjust_montf_result
+ adjust_montf_result: /* frequency 1.0 confidence 0.0 */
+
+! 144 ! }
+! 145 !}
+! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 149 !{
+! 150 !long long acc;
+! 151 !int i;
+! 153 ! if(i32[len]>0) i=-1;
+
+/* 000000 153 ( 0 1) */ sll %o2,2,%g1
+/* 0x0004 ( 0 1) */ or %g0,-1,%g3
+/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1
+/* 0x000c ( 3 4) */ cmp %g1,0
+/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50
+/* 0x0014 ( 3 4) */ or %g0,%o1,%o3
+/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00
+/* 0x001c ( 4 5) */ cmp %g3,0
+ .L77000175: /* frequency 0.8 confidence 0.0 */
+
+! 154 ! else
+! 155 ! {
+! 156 ! for(i=len-1; i>=0; i++)
+
+/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3
+/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60
+/* 0x0028 ( 1 2) */ cmp %g3,0
+/* 0x002c ( 1 2) */ sll %g3,2,%g1
+/* 0x0030 ( 2 3) */ add %o0,%g1,%g2
+/* 0x0034 ( 2 3) */ add %o1,%g1,%g1
+
+! 157 ! {
+! 158 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0038 158 ( 3 6) */ ld [%g1],%g5
+ .L900000610: /* frequency 5.3 confidence 0.0 */
+/* 0x003c 158 ( 0 3) */ ld [%g2],%o5
+/* 0x0040 ( 0 1) */ add %g1,4,%g1
+/* 0x0044 ( 0 1) */ add %g2,4,%g2
+/* 0x0048 ( 2 3) */ cmp %o5,%g5
+/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16
+/* 0x0050 ( 2 3) */ nop
+/* 0x0054 ( 3 4) */ addcc %g3,1,%g3
+/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84
+/* 0x005c ( 3 6) */ ld [%g1],%g5
+ .L77000182: /* frequency 1.0 confidence 0.0 */
+
+! 159 ! }
+! 160 ! }
+! 161 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0060 161 ( 0 1) */ cmp %g3,0
+ .L900000611: /* frequency 1.0 confidence 0.0 */
+/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50
+/* 0x0068 ( 0 1) */ sll %g3,2,%g2
+/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1
+/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2
+/* 0x0074 ( 4 5) */ cmp %g2,%g1
+/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56
+/* 0x007c ( 4 5) */ nop
+ .L77000198: /* frequency 0.8 confidence 0.0 */
+
+! 162 ! {
+! 163 ! acc=0;
+! 164 ! for(i=0;i<len;i++)
+
+/* 0x0080 164 ( 0 1) */ cmp %o2,0
+/* 0x0084 ( 0 1) */ ble,pt %icc,.L77000191 ! tprob=0.60
+/* 0x0088 ( 0 1) */ nop
+/* 0x008c 161 ( 1 2) */ or %g0,-1,%g2
+/* 0x0090 ( 1 2) */ sub %o2,1,%g4
+/* 0x0094 ( 2 3) */ srl %g2,0,%g3
+/* 0x0098 163 ( 2 3) */ or %g0,0,%g5
+/* 0x009c 164 ( 3 4) */ or %g0,0,%o5
+/* 0x00a0 161 ( 3 4) */ or %g0,%o0,%o4
+/* 0x00a4 ( 4 5) */ cmp %o2,3
+/* 0x00a8 ( 4 5) */ add %o1,4,%g2
+/* 0x00ac 164 ( 4 5) */ bl,pn %icc,.L77000199 ! tprob=0.40
+/* 0x00b0 ( 5 6) */ add %o0,8,%g1
+
+! 165 ! {
+! 166 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b4 166 ( 5 8) */ ld [%o0],%o2
+/* 0x00b8 0 ( 5 6) */ or %g0,%g2,%o3
+/* 0x00bc 166 ( 6 9) */ ld [%o1],%o1
+/* 0x00c0 0 ( 6 7) */ or %g0,%g1,%o4
+
+! 167 ! i32[i]=acc&0xffffffff;
+! 168 ! acc=acc>>32;
+
+/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5
+/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1
+/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2
+/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5
+/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2
+/* 0x00d8 ( 9 10) */ st %o2,[%o0]
+/* 0x00dc 168 (10 11) */ srax %g5,32,%g5
+ .L900000605: /* frequency 64.0 confidence 0.0 */
+/* 0x00e0 166 (12 20) */ ld [%o3],%o2
+/* 0x00e4 168 (12 13) */ add %o5,1,%o5
+/* 0x00e8 (12 13) */ add %o3,4,%o3
+/* 0x00ec (13 13) */ cmp %o5,%g4
+/* 0x00f0 (13 14) */ add %o4,4,%o4
+/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1
+/* 0x00f8 (15 15) */ add %g1,%g5,%g5
+/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2
+/* 0x0100 166 (16 24) */ ld [%o4-4],%g1
+/* 0x0104 167 (17 18) */ st %o2,[%o4-8]
+/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50
+/* 0x010c (17 18) */ srax %g5,32,%g5
+ .L900000608: /* frequency 8.0 confidence 0.0 */
+/* 0x0110 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0118 ( 3 4) */ add %g1,%g5,%g1
+/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2
+/* 0x0120 ( 5 7) */ retl ! Result =
+/* 0x0124 ( 6 7) */ st %g2,[%o4-4]
+ .L77000199: /* frequency 0.6 confidence 0.0 */
+/* 0x0128 166 ( 0 3) */ ld [%o4],%g1
+ .L900000609: /* frequency 5.3 confidence 0.0 */
+/* 0x012c 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0130 ( 0 1) */ add %g5,%g1,%g1
+/* 0x0134 168 ( 0 1) */ add %o5,1,%o5
+/* 0x0138 ( 1 2) */ add %o3,4,%o3
+/* 0x013c ( 1 2) */ cmp %o5,%g4
+/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2
+/* 0x0148 ( 3 4) */ st %g2,[%o4]
+/* 0x014c 168 ( 3 4) */ add %o4,4,%o4
+/* 0x0150 ( 4 5) */ srax %g1,32,%g5
+/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84
+/* 0x0158 ( 4 7) */ ld [%o4],%g1
+ .L77000191: /* frequency 1.0 confidence 0.0 */
+/* 0x015c ( 0 2) */ retl ! Result =
+/* 0x0160 ( 1 2) */ nop
+/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2
+/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 32
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-144,%sp
+/* 0x0004 ( 1 2) */ st %i0,[%fp+68]
+
+! 169 ! }
+! 170 ! }
+! 171 !}
+! 175 !void cleanup(double *dt, int from, int tlen);
+! 177 !/*
+! 178 !** the lengths of the input arrays should be at least the following:
+! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 180 !** all of them should be different from one another
+! 181 !**
+! 182 !*/
+! 183 !void mont_mulf_noconv(unsigned int *result,
+! 184 ! double *dm1, double *dm2, double *dt,
+! 185 ! double *dn, unsigned int *nint,
+! 186 ! int nlen, double dn0)
+! 187 !{
+! 188 ! int i, j, jj;
+! 189 ! int tmp;
+! 190 ! double digit, m2j, nextm2j, a, b;
+! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 193 ! pdm1=&(dm1[0]);
+! 194 ! pdm2=&(dm2[0]);
+! 195 ! pdn=&(dn[0]);
+! 196 ! pdm2[2*nlen]=Zero;
+
+/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2
+/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1
+/* 0x0010 ( 2 3) */ st %i5,[%fp+88]
+/* 0x0014 ( 2 3) */ or %g0,%i3,%o2
+/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4
+/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2
+/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5
+/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0
+/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2
+
+! 198 ! if (nlen!=16)
+! 199 ! {
+! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 202 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 205 ! pdtj=&(dt[0]);
+! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 207 ! {
+! 208 ! m2j=pdm2[j];
+! 209 ! a=pdtj[0]+pdn[0]*digit;
+! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 211 ! pdtj[1]=b;
+! 213 !#pragma pipeloop(0)
+! 214 ! for(i=1;i<nlen;i++)
+! 215 ! {
+! 216 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 217 ! }
+! 218 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 219 !
+! 220 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 221 ! }
+! 222 ! }
+! 223 ! else
+! 224 ! {
+! 225 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 227 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 228 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 229 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 230 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 231 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 232 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 233 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 234 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 235 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 236 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 237 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 239 ! pdn_0=pdn[0];
+! 240 ! pdm1_0=pdm1[0];
+! 242 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 243 ! pdtj=&(dt[0]);
+! 245 ! for(j=0;j<32;j++,pdtj++)
+! 246 ! {
+! 248 ! m2j=pdm2[j];
+! 249 ! a=pdtj[0]+pdn_0*digit;
+! 250 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+! 251 ! pdtj[1]=b;
+! 253 ! /**** this loop will be fully unrolled:
+! 254 ! for(i=1;i<16;i++)
+! 255 ! {
+! 256 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 257 ! }
+! 258 ! *************************************/
+! 259 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 260 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 261 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 262 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 263 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 264 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 265 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 266 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 267 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 268 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 269 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 270 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 271 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 272 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 273 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 274 ! /* no need for cleenup, cannot overflow */
+! 275 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 276 ! }
+! 277 ! }
+! 279 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+! 281 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x002c 281 ( 4 5) */ or %g0,1,%o4
+/* 0x0030 187 ( 6 9) */ ldd [%fp+96],%f0
+/* 0x0034 196 ( 7 10) */ ld [%fp+92],%o0
+/* 0x0038 187 ( 8 9) */ fmovd %f0,%f16
+/* 0x003c 196 ( 9 10) */ sll %o0,4,%g2
+/* 0x0040 ( 9 10) */ or %g0,%o0,%g1
+/* 0x0044 198 (10 11) */ cmp %o0,16
+/* 0x0048 (10 11) */ be,pn %icc,.L77000289 ! tprob=0.50
+/* 0x004c (10 11) */ std %f2,[%o1+%g2]
+/* 0x0050 200 (11 12) */ sll %o0,2,%g2
+/* 0x0054 (11 14) */ ldd [%g4],%f2
+/* 0x0058 (12 13) */ add %g2,2,%o1
+/* 0x005c (12 13) */ add %g2,1,%o3
+/* 0x0060 196 (13 14) */ sll %o0,1,%o7
+/* 0x0064 200 (13 14) */ cmp %o1,0
+/* 0x0068 (13 14) */ ble,a,pt %icc,.L900000755 ! tprob=0.55
+/* 0x006c (14 17) */ ldd [%i1],%f0
+/* 0x0070 (14 15) */ cmp %o1,3
+/* 0x0074 281 (14 15) */ or %g0,1,%o1
+/* 0x0078 (14 15) */ bl,pn %icc,.L77000279 ! tprob=0.40
+/* 0x007c (15 16) */ add %o2,8,%o0
+/* 0x0080 (15 16) */ std %f2,[%g5]
+/* 0x0084 0 (16 17) */ or %g0,%o0,%o2
+ .L900000726: /* frequency 64.0 confidence 0.0 */
+/* 0x0088 ( 3 5) */ ldd [%g4],%f0
+/* 0x008c ( 3 4) */ add %o4,1,%o4
+/* 0x0090 ( 3 4) */ add %o2,8,%o2
+/* 0x0094 ( 4 4) */ cmp %o4,%o3
+/* 0x0098 ( 5 6) */ ble,pt %icc,.L900000726 ! tprob=0.50
+/* 0x009c ( 5 6) */ std %f0,[%o2-8]
+ .L900000729: /* frequency 8.0 confidence 0.0 */
+/* 0x00a0 ( 0 1) */ ba .L900000755 ! tprob=1.00
+/* 0x00a4 ( 0 3) */ ldd [%i1],%f0
+ .L77000279: /* frequency 0.6 confidence 0.0 */
+/* 0x00a8 ( 0 1) */ std %f2,[%o2]
+ .L900000754: /* frequency 5.3 confidence 0.0 */
+/* 0x00ac ( 0 3) */ ldd [%g4],%f2
+/* 0x00b0 ( 0 1) */ cmp %o1,%o3
+/* 0x00b4 ( 0 1) */ add %o2,8,%o2
+/* 0x00b8 ( 1 2) */ add %o1,1,%o1
+/* 0x00bc ( 1 2) */ ble,a,pt %icc,.L900000754 ! tprob=0.87
+/* 0x00c0 ( 3 4) */ std %f2,[%o2]
+ .L77000284: /* frequency 0.8 confidence 0.0 */
+/* 0x00c4 202 ( 0 3) */ ldd [%i1],%f0
+ .L900000755: /* frequency 0.8 confidence 0.0 */
+/* 0x00c8 202 ( 0 3) */ ldd [%i0],%f2
+/* 0x00cc ( 0 1) */ add %o7,1,%o2
+/* 0x00d0 206 ( 0 1) */ cmp %o7,0
+/* 0x00d4 ( 1 2) */ sll %o2,1,%o0
+/* 0x00d8 ( 1 2) */ sub %o7,1,%o1
+/* 0x00dc 202 ( 2 5) */ fmuld %f0,%f2,%f0
+/* 0x00e0 ( 2 3) */ std %f0,[%g5]
+/* 0x00e4 ( 2 3) */ sub %g1,1,%o7
+/* 0x00e8 ( 3 6) */ ldd [%g4],%f6
+/* 0x00ec 0 ( 3 4) */ or %g0,%o7,%g3
+/* 0x00f0 ( 3 4) */ or %g0,0,%l0
+/* 0x00f4 ( 4 7) */ ldd [%g4-8],%f2
+/* 0x00f8 ( 4 5) */ or %g0,0,%i5
+/* 0x00fc ( 4 5) */ or %g0,%o1,%o5
+/* 0x0100 ( 5 8) */ fdtox %f0,%f0
+/* 0x0104 ( 5 8) */ ldd [%g4-16],%f4
+/* 0x0108 ( 5 6) */ or %g0,%o0,%o3
+/* 0x010c 210 ( 6 7) */ add %i0,8,%o4
+/* 0x0110 ( 6 7) */ or %g0,0,%i4
+/* 0x0114 ( 9 10) */ fmovs %f6,%f0
+/* 0x0118 (11 14) */ fxtod %f0,%f0
+/* 0x011c 203 (14 17) */ fmuld %f0,%f16,%f0
+/* 0x0120 (17 20) */ fmuld %f0,%f2,%f2
+/* 0x0124 (20 23) */ fdtox %f2,%f2
+/* 0x0128 (23 26) */ fxtod %f2,%f2
+/* 0x012c (26 29) */ fmuld %f2,%f4,%f2
+/* 0x0130 (29 32) */ fsubd %f0,%f2,%f22
+/* 0x0134 206 (29 30) */ ble,pt %icc,.L900000748 ! tprob=0.60
+/* 0x0138 (29 30) */ sll %g1,4,%g2
+/* 0x013c 210 (30 33) */ ldd [%i2],%f0
+ .L900000749: /* frequency 5.3 confidence 0.0 */
+/* 0x0140 210 ( 0 3) */ fmuld %f0,%f22,%f8
+/* 0x0144 ( 0 3) */ ldd [%i1],%f0
+/* 0x0148 214 ( 0 1) */ cmp %g1,1
+/* 0x014c 210 ( 1 4) */ ldd [%o4+%i4],%f6
+/* 0x0150 ( 1 2) */ add %i1,8,%o0
+/* 0x0154 214 ( 1 2) */ or %g0,1,%o1
+/* 0x0158 210 ( 2 5) */ ldd [%i3],%f2
+/* 0x015c ( 2 3) */ add %i3,16,%l1
+/* 0x0160 ( 3 6) */ fmuld %f0,%f6,%f6
+/* 0x0164 ( 3 6) */ ldd [%g4-8],%f4
+/* 0x0168 ( 4 7) */ faddd %f2,%f8,%f2
+/* 0x016c ( 4 7) */ ldd [%i3+8],%f0
+/* 0x0170 208 ( 5 8) */ ldd [%i0+%i4],%f20
+/* 0x0174 210 ( 6 9) */ faddd %f0,%f6,%f0
+/* 0x0178 ( 7 10) */ fmuld %f2,%f4,%f2
+/* 0x017c (10 13) */ faddd %f0,%f2,%f18
+/* 0x0180 211 (10 11) */ std %f18,[%i3+8]
+/* 0x0184 214 (10 11) */ ble,pt %icc,.L900000753 ! tprob=0.54
+/* 0x0188 (11 12) */ srl %i5,31,%g2
+/* 0x018c (11 12) */ cmp %g3,7
+/* 0x0190 210 (12 13) */ add %i2,8,%g2
+/* 0x0194 214 (12 13) */ bl,pn %icc,.L77000281 ! tprob=0.36
+/* 0x0198 (13 14) */ add %g2,24,%o2
+/* 0x019c 216 (13 16) */ ldd [%o0+16],%f14
+/* 0x01a0 (13 14) */ add %i3,48,%l1
+/* 0x01a4 (14 17) */ ldd [%o0+24],%f12
+/* 0x01a8 0 (14 15) */ or %g0,%o2,%g2
+/* 0x01ac 214 (14 15) */ sub %g1,3,%o2
+/* 0x01b0 216 (15 18) */ ldd [%o0],%f2
+/* 0x01b4 (15 16) */ or %g0,5,%o1
+/* 0x01b8 (16 19) */ ldd [%g2-24],%f0
+/* 0x01bc (17 20) */ ldd [%o0+8],%f6
+/* 0x01c0 (17 20) */ fmuld %f2,%f20,%f2
+/* 0x01c4 (17 18) */ add %o0,32,%o0
+/* 0x01c8 (18 21) */ ldd [%g2-16],%f8
+/* 0x01cc (18 21) */ fmuld %f0,%f22,%f4
+/* 0x01d0 (19 22) */ ldd [%i3+16],%f0
+/* 0x01d4 (19 22) */ fmuld %f6,%f20,%f10
+/* 0x01d8 (20 23) */ ldd [%g2-8],%f6
+/* 0x01dc (21 24) */ faddd %f2,%f4,%f4
+/* 0x01e0 (21 24) */ ldd [%i3+32],%f2
+ .L900000738: /* frequency 512.0 confidence 0.0 */
+/* 0x01e4 216 (16 24) */ ldd [%g2],%f24
+/* 0x01e8 (16 17) */ add %o1,3,%o1
+/* 0x01ec (16 17) */ add %g2,24,%g2
+/* 0x01f0 (16 19) */ fmuld %f8,%f22,%f8
+/* 0x01f4 (17 25) */ ldd [%l1],%f28
+/* 0x01f8 (17 17) */ cmp %o1,%o2
+/* 0x01fc (17 18) */ add %o0,24,%o0
+/* 0x0200 (18 26) */ ldd [%o0-24],%f26
+/* 0x0204 (18 21) */ faddd %f0,%f4,%f0
+/* 0x0208 (18 19) */ add %l1,48,%l1
+/* 0x020c (19 22) */ faddd %f10,%f8,%f10
+/* 0x0210 (19 22) */ fmuld %f14,%f20,%f4
+/* 0x0214 (19 20) */ std %f0,[%l1-80]
+/* 0x0218 (20 28) */ ldd [%g2-16],%f8
+/* 0x021c (20 23) */ fmuld %f6,%f22,%f6
+/* 0x0220 (21 29) */ ldd [%l1-32],%f0
+/* 0x0224 (22 30) */ ldd [%o0-16],%f14
+/* 0x0228 (22 25) */ faddd %f2,%f10,%f2
+/* 0x022c (23 26) */ faddd %f4,%f6,%f10
+/* 0x0230 (23 26) */ fmuld %f12,%f20,%f4
+/* 0x0234 (23 24) */ std %f2,[%l1-64]
+/* 0x0238 (24 32) */ ldd [%g2-8],%f6
+/* 0x023c (24 27) */ fmuld %f24,%f22,%f24
+/* 0x0240 (25 33) */ ldd [%l1-16],%f2
+/* 0x0244 (26 34) */ ldd [%o0-8],%f12
+/* 0x0248 (26 29) */ faddd %f28,%f10,%f10
+/* 0x024c (27 28) */ std %f10,[%l1-48]
+/* 0x0250 (27 30) */ fmuld %f26,%f20,%f10
+/* 0x0254 (27 28) */ ble,pt %icc,.L900000738 ! tprob=0.50
+/* 0x0258 (27 30) */ faddd %f4,%f24,%f4
+ .L900000741: /* frequency 64.0 confidence 0.0 */
+/* 0x025c 216 ( 0 3) */ fmuld %f8,%f22,%f28
+/* 0x0260 ( 0 3) */ ldd [%g2],%f24
+/* 0x0264 ( 0 3) */ faddd %f0,%f4,%f26
+/* 0x0268 ( 1 4) */ fmuld %f12,%f20,%f8
+/* 0x026c ( 1 2) */ add %l1,32,%l1
+/* 0x0270 ( 1 2) */ cmp %o1,%g3
+/* 0x0274 ( 2 5) */ fmuld %f14,%f20,%f14
+/* 0x0278 ( 2 5) */ ldd [%l1-32],%f4
+/* 0x027c ( 2 3) */ add %g2,8,%g2
+/* 0x0280 ( 3 6) */ faddd %f10,%f28,%f12
+/* 0x0284 ( 3 6) */ fmuld %f6,%f22,%f6
+/* 0x0288 ( 3 6) */ ldd [%l1-16],%f0
+/* 0x028c ( 4 7) */ fmuld %f24,%f22,%f10
+/* 0x0290 ( 4 5) */ std %f26,[%l1-64]
+/* 0x0294 ( 6 9) */ faddd %f2,%f12,%f2
+/* 0x0298 ( 6 7) */ std %f2,[%l1-48]
+/* 0x029c ( 7 10) */ faddd %f14,%f6,%f6
+/* 0x02a0 ( 8 11) */ faddd %f8,%f10,%f2
+/* 0x02a4 (10 13) */ faddd %f4,%f6,%f4
+/* 0x02a8 (10 11) */ std %f4,[%l1-32]
+/* 0x02ac (11 14) */ faddd %f0,%f2,%f0
+/* 0x02b0 (11 12) */ bg,pn %icc,.L77000213 ! tprob=0.13
+/* 0x02b4 (11 12) */ std %f0,[%l1-16]
+ .L77000281: /* frequency 4.0 confidence 0.0 */
+/* 0x02b8 216 ( 0 3) */ ldd [%o0],%f0
+ .L900000752: /* frequency 36.6 confidence 0.0 */
+/* 0x02bc 216 ( 0 3) */ ldd [%g2],%f4
+/* 0x02c0 ( 0 3) */ fmuld %f0,%f20,%f2
+/* 0x02c4 ( 0 1) */ add %o1,1,%o1
+/* 0x02c8 ( 1 4) */ ldd [%l1],%f0
+/* 0x02cc ( 1 2) */ add %o0,8,%o0
+/* 0x02d0 ( 1 2) */ add %g2,8,%g2
+/* 0x02d4 ( 2 5) */ fmuld %f4,%f22,%f4
+/* 0x02d8 ( 2 3) */ cmp %o1,%g3
+/* 0x02dc ( 5 8) */ faddd %f2,%f4,%f2
+/* 0x02e0 ( 8 11) */ faddd %f0,%f2,%f0
+/* 0x02e4 ( 8 9) */ std %f0,[%l1]
+/* 0x02e8 ( 8 9) */ add %l1,16,%l1
+/* 0x02ec ( 8 9) */ ble,a,pt %icc,.L900000752 ! tprob=0.87
+/* 0x02f0 (10 13) */ ldd [%o0],%f0
+ .L77000213: /* frequency 5.3 confidence 0.0 */
+/* 0x02f4 ( 0 1) */ srl %i5,31,%g2
+ .L900000753: /* frequency 5.3 confidence 0.0 */
+/* 0x02f8 218 ( 0 1) */ cmp %l0,30
+/* 0x02fc ( 0 1) */ bne,a,pt %icc,.L900000751 ! tprob=0.54
+/* 0x0300 ( 0 3) */ fdtox %f18,%f0
+/* 0x0304 ( 1 2) */ add %i5,%g2,%g2
+/* 0x0308 ( 1 2) */ sub %o3,1,%o2
+/* 0x030c ( 2 3) */ sra %g2,1,%o0
+/* 0x0310 216 ( 2 5) */ ldd [%g4],%f0
+/* 0x0314 ( 3 4) */ add %o0,1,%g2
+/* 0x0318 ( 4 5) */ sll %g2,1,%o0
+/* 0x031c ( 4 5) */ fmovd %f0,%f2
+/* 0x0320 ( 5 6) */ sll %g2,4,%o1
+/* 0x0324 ( 5 6) */ cmp %o0,%o3
+/* 0x0328 ( 5 6) */ bge,pt %icc,.L77000215 ! tprob=0.53
+/* 0x032c ( 6 7) */ or %g0,0,%l0
+/* 0x0330 218 ( 6 7) */ add %g5,%o1,%o1
+/* 0x0334 216 ( 7 10) */ ldd [%o1],%f8
+ .L900000750: /* frequency 32.0 confidence 0.0 */
+/* 0x0338 ( 0 3) */ fdtox %f8,%f6
+/* 0x033c ( 0 3) */ ldd [%g4],%f10
+/* 0x0340 ( 0 1) */ add %o0,2,%o0
+/* 0x0344 ( 1 4) */ ldd [%o1+8],%f4
+/* 0x0348 ( 1 4) */ fdtox %f8,%f8
+/* 0x034c ( 1 2) */ cmp %o0,%o2
+/* 0x0350 ( 5 6) */ fmovs %f10,%f6
+/* 0x0354 ( 7 10) */ fxtod %f6,%f10
+/* 0x0358 ( 8 11) */ fdtox %f4,%f6
+/* 0x035c ( 9 12) */ fdtox %f4,%f4
+/* 0x0360 (10 13) */ faddd %f10,%f2,%f2
+/* 0x0364 (10 11) */ std %f2,[%o1]
+/* 0x0368 (12 15) */ ldd [%g4],%f2
+/* 0x036c (14 15) */ fmovs %f2,%f6
+/* 0x0370 (16 19) */ fxtod %f6,%f6
+/* 0x0374 (17 20) */ fitod %f8,%f2
+/* 0x0378 (19 22) */ faddd %f6,%f0,%f0
+/* 0x037c (19 20) */ std %f0,[%o1+8]
+/* 0x0380 (19 20) */ add %o1,16,%o1
+/* 0x0384 (20 23) */ fitod %f4,%f0
+/* 0x0388 (20 21) */ ble,a,pt %icc,.L900000750 ! tprob=0.87
+/* 0x038c (20 23) */ ldd [%o1],%f8
+ .L77000233: /* frequency 4.6 confidence 0.0 */
+/* 0x0390 ( 0 0) */ or %g0,0,%l0
+ .L77000215: /* frequency 5.3 confidence 0.0 */
+/* 0x0394 ( 0 3) */ fdtox %f18,%f0
+ .L900000751: /* frequency 5.3 confidence 0.0 */
+/* 0x0398 ( 0 3) */ ldd [%g4],%f6
+/* 0x039c 220 ( 0 1) */ add %i5,1,%i5
+/* 0x03a0 ( 0 1) */ add %i4,8,%i4
+/* 0x03a4 ( 1 4) */ ldd [%g4-8],%f2
+/* 0x03a8 ( 1 2) */ add %l0,1,%l0
+/* 0x03ac ( 1 2) */ add %i3,8,%i3
+/* 0x03b0 ( 2 3) */ fmovs %f6,%f0
+/* 0x03b4 ( 2 5) */ ldd [%g4-16],%f4
+/* 0x03b8 ( 2 3) */ cmp %i5,%o5
+/* 0x03bc ( 4 7) */ fxtod %f0,%f0
+/* 0x03c0 ( 7 10) */ fmuld %f0,%f16,%f0
+/* 0x03c4 (10 13) */ fmuld %f0,%f2,%f2
+/* 0x03c8 (13 16) */ fdtox %f2,%f2
+/* 0x03cc (16 19) */ fxtod %f2,%f2
+/* 0x03d0 (19 22) */ fmuld %f2,%f4,%f2
+/* 0x03d4 (22 25) */ fsubd %f0,%f2,%f22
+/* 0x03d8 (22 23) */ ble,a,pt %icc,.L900000749 ! tprob=0.89
+/* 0x03dc (22 25) */ ldd [%i2],%f0
+ .L900000725: /* frequency 0.7 confidence 0.0 */
+/* 0x03e0 220 ( 0 1) */ ba .L900000748 ! tprob=1.00
+/* 0x03e4 ( 0 1) */ sll %g1,4,%g2
+
+
+ .L77000289: /* frequency 0.8 confidence 0.0 */
+/* 0x03e8 225 ( 0 3) */ ldd [%o1],%f6
+/* 0x03ec 242 ( 0 1) */ add %g4,-8,%g2
+/* 0x03f0 ( 0 1) */ add %g4,-16,%g3
+/* 0x03f4 225 ( 1 4) */ ldd [%i1],%f2
+/* 0x03f8 245 ( 1 2) */ or %g0,0,%o3
+/* 0x03fc ( 1 2) */ or %g0,0,%o0
+/* 0x0400 225 ( 3 6) */ fmuld %f2,%f6,%f2
+/* 0x0404 ( 3 4) */ std %f2,[%o2]
+/* 0x0408 ( 4 7) */ ldd [%g4],%f6
+/* 0x040c 237 ( 7 8) */ std %f6,[%o2+8]
+/* 0x0410 ( 8 9) */ std %f6,[%o2+16]
+/* 0x0414 ( 9 10) */ std %f6,[%o2+24]
+/* 0x0418 (10 11) */ std %f6,[%o2+32]
+/* 0x041c (11 12) */ std %f6,[%o2+40]
+/* 0x0420 (12 13) */ std %f6,[%o2+48]
+/* 0x0424 (13 14) */ std %f6,[%o2+56]
+/* 0x0428 (14 15) */ std %f6,[%o2+64]
+/* 0x042c (15 16) */ std %f6,[%o2+72]
+! prefetch [%i4],0
+! prefetch [%i4+32],0
+! prefetch [%i4+64],0
+! prefetch [%i4+96],0
+! prefetch [%i4+120],0
+! prefetch [%i1],0
+! prefetch [%i1+32],0
+! prefetch [%i1+64],0
+! prefetch [%i1+96],0
+! prefetch [%i1+120],0
+/* 0x0430 (16 17) */ std %f6,[%o2+80]
+/* 0x0434 (17 18) */ std %f6,[%o2+88]
+/* 0x0438 (18 19) */ std %f6,[%o2+96]
+/* 0x043c (19 20) */ std %f6,[%o2+104]
+/* 0x0440 (20 21) */ std %f6,[%o2+112]
+/* 0x0444 (21 22) */ std %f6,[%o2+120]
+/* 0x0448 (22 23) */ std %f6,[%o2+128]
+/* 0x044c (23 24) */ std %f6,[%o2+136]
+/* 0x0450 (24 25) */ std %f6,[%o2+144]
+/* 0x0454 (25 26) */ std %f6,[%o2+152]
+/* 0x0458 (26 27) */ std %f6,[%o2+160]
+/* 0x045c (27 28) */ std %f6,[%o2+168]
+/* 0x0460 (27 30) */ fdtox %f2,%f2
+/* 0x0464 (28 29) */ std %f6,[%o2+176]
+/* 0x0468 (29 30) */ std %f6,[%o2+184]
+/* 0x046c (30 31) */ std %f6,[%o2+192]
+/* 0x0470 (31 32) */ std %f6,[%o2+200]
+/* 0x0474 (32 33) */ std %f6,[%o2+208]
+/* 0x0478 (33 34) */ std %f6,[%o2+216]
+/* 0x047c (34 35) */ std %f6,[%o2+224]
+/* 0x0480 (35 36) */ std %f6,[%o2+232]
+/* 0x0484 (36 37) */ std %f6,[%o2+240]
+/* 0x0488 (37 38) */ std %f6,[%o2+248]
+/* 0x048c (38 39) */ std %f6,[%o2+256]
+/* 0x0490 (39 40) */ std %f6,[%o2+264]
+/* 0x0494 (40 41) */ std %f6,[%o2+272]
+/* 0x0498 (41 42) */ std %f6,[%o2+280]
+/* 0x049c (42 43) */ std %f6,[%o2+288]
+/* 0x04a0 (43 44) */ std %f6,[%o2+296]
+/* 0x04a4 (44 45) */ std %f6,[%o2+304]
+/* 0x04a8 (45 46) */ std %f6,[%o2+312]
+/* 0x04ac (46 47) */ std %f6,[%o2+320]
+/* 0x04b0 (47 48) */ std %f6,[%o2+328]
+/* 0x04b4 (48 49) */ std %f6,[%o2+336]
+/* 0x04b8 (49 50) */ std %f6,[%o2+344]
+/* 0x04bc (50 51) */ std %f6,[%o2+352]
+/* 0x04c0 (51 52) */ std %f6,[%o2+360]
+/* 0x04c4 (52 53) */ std %f6,[%o2+368]
+/* 0x04c8 (53 54) */ std %f6,[%o2+376]
+/* 0x04cc (54 55) */ std %f6,[%o2+384]
+/* 0x04d0 (55 56) */ std %f6,[%o2+392]
+/* 0x04d4 (56 57) */ std %f6,[%o2+400]
+/* 0x04d8 (57 58) */ std %f6,[%o2+408]
+/* 0x04dc (58 59) */ std %f6,[%o2+416]
+/* 0x04e0 (59 60) */ std %f6,[%o2+424]
+/* 0x04e4 (60 61) */ std %f6,[%o2+432]
+/* 0x04e8 (61 62) */ std %f6,[%o2+440]
+/* 0x04ec (62 63) */ std %f6,[%o2+448]
+/* 0x04f0 (63 64) */ std %f6,[%o2+456]
+/* 0x04f4 (64 65) */ std %f6,[%o2+464]
+/* 0x04f8 (65 66) */ std %f6,[%o2+472]
+/* 0x04fc (66 67) */ std %f6,[%o2+480]
+/* 0x0500 (67 68) */ std %f6,[%o2+488]
+/* 0x0504 (68 69) */ std %f6,[%o2+496]
+/* 0x0508 (69 70) */ std %f6,[%o2+504]
+/* 0x050c (70 71) */ std %f6,[%o2+512]
+/* 0x0510 (71 72) */ std %f6,[%o2+520]
+/* 0x0514 242 (72 75) */ ld [%g4],%f2 ! dalign
+/* 0x0518 (73 76) */ ld [%g2],%f6 ! dalign
+/* 0x051c (74 77) */ fxtod %f2,%f10
+/* 0x0520 (74 77) */ ld [%g2+4],%f7
+/* 0x0524 (75 78) */ ld [%g3],%f8 ! dalign
+/* 0x0528 (76 79) */ ld [%g3+4],%f9
+/* 0x052c (77 80) */ fmuld %f10,%f0,%f0
+/* 0x0530 239 (77 80) */ ldd [%i4],%f4
+/* 0x0534 240 (78 81) */ ldd [%i1],%f2
+/* 0x0538 (80 83) */ fmuld %f0,%f6,%f6
+/* 0x053c (83 86) */ fdtox %f6,%f6
+/* 0x0540 (86 89) */ fxtod %f6,%f6
+/* 0x0544 (89 92) */ fmuld %f6,%f8,%f6
+/* 0x0548 (92 95) */ fsubd %f0,%f6,%f0
+/* 0x054c 250 (95 98) */ fmuld %f4,%f0,%f10
+ .L900000747: /* frequency 6.4 confidence 0.0 */
+
+
+ fmovd %f0,%f0
+ fmovd %f16,%f18
+ ldd [%i4],%f2
+ ldd [%o2],%f8
+ ldd [%i1],%f10
+ ldd [%g4-8],%f14
+ ldd [%g4-16],%f16
+ ldd [%o1],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%i4+112],%f60
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+
+ nop
+ nop
+!
+ .L99999999:
+!1
+!!!
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4
+!2
+!!!
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20
+!3
+!!!
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22
+!4
+!!!
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32
+!5
+!!!
+ ldd [%o1+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36
+!6
+!!!
+ add %o1,8,%o1
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42
+!7
+!!!
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44
+!8
+!!!
+ ldd [%o2+16],%f22
+ fmuld %f10,%f6,%f12
+!9
+!!!
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4
+!10
+!!!
+ ldd [%o2+48],%f36
+ fmuld %f30,%f24,%f48
+!11
+!!!
+ ldd [%o2+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!12
+!!!
+ std %f20,[%o2+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52
+!13
+!!!
+ ldd [%o2+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56
+!14
+!!!
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32
+!15
+!!!
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48
+!16
+!!!
+ ldd [%o2+112],%f50
+ faddd %f52,%f56,%f52
+!17
+!!!
+ ldd [%o2+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20
+!18
+!!!
+ std %f32,[%o2+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22
+!19
+!!!
+ std %f42,[%o2+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32
+!20
+!!!
+ std %f48,[%o2+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36
+!21
+!!!
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4
+!22
+!!!
+ std %f52,[%o2+144]
+ faddd %f20,%f22,%f20
+!23
+!!!
+ ldd [%i4+120],%f44
+!24
+!!!
+ ldd [%o2+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42
+!25
+!!!
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4
+!26
+!!!
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44
+!27
+!!!
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48
+!28
+!!!
+ ldd [%o2+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!29
+!!!
+ std %f20,[%o2+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52
+!30
+!!!
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56
+!31
+!!!
+ ldd [%o2+240],%f44
+ faddd %f32,%f36,%f32
+!32
+!!!
+ std %f32,[%o2+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20
+!33
+!!!
+ ldd [%o2+32],%f50
+ fmuld %f4,%f18,%f12
+!34
+!!!
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22
+!35
+!!!
+ ldd [%o2+64],%f56
+ faddd %f42,%f44,%f42
+!36
+!!!
+ std %f42,[%o2+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32
+!37
+!!!
+ std %f48,[%o2+32]
+ fmuld %f12,%f14,%f4
+!38
+!!!
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36
+!39
+!!!
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20
+!40
+!!!
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52
+!41
+!!!
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42
+!42
+!!!
+ std %f56,[%o2+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44
+!43
+!!!
+ ldd [%o2+96],%f22
+ fmuld %f48,%f24,%f48
+!44
+!!!
+ ldd [%o2+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50
+!45
+!!!
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56
+!46
+!!!
+ add %o2,8,%o2
+ faddd %f42,%f44,%f42
+!47
+!!!
+ ldd [%o2+160-8],%f44
+ faddd %f20,%f22,%f20
+!48
+!!!
+ std %f20,[%o2+96-8]
+ faddd %f48,%f50,%f48
+!49
+!!!
+ ldd [%o2+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4
+!50
+!!!
+ ldd [%o2+224-8],%f56
+ faddd %f32,%f36,%f32
+!51
+!!!
+ std %f32,[%o2+128-8]
+ faddd %f42,%f44,%f42
+!52
+ add %o3,1,%o3
+ std %f42,[%o2+160-8]
+ faddd %f48,%f50,%f48
+!53
+!!!
+ cmp %o3,31
+ std %f48,[%o2+192-8]
+ faddd %f52,%f56,%f52
+!54
+ std %f52,[%o2+224-8]
+ ble,pt %icc,.L99999999
+ fsubd %f12,%f4,%f0
+
+
+
+!55
+ std %f8,[%o2]
+
+
+
+
+
+
+ .L77000285: /* frequency 1.0 confidence 0.0 */
+/* 0x07a8 279 ( 0 1) */ sll %g1,4,%g2
+ .L900000748: /* frequency 1.0 confidence 0.0 */
+/* 0x07ac 279 ( 0 3) */ ldd [%g5+%g2],%f0
+/* 0x07b0 ( 0 1) */ add %g5,%g2,%i1
+/* 0x07b4 ( 0 1) */ or %g0,0,%o4
+/* 0x07b8 206 ( 1 4) */ ld [%fp+68],%o0
+/* 0x07bc 279 ( 1 2) */ or %g0,0,%i0
+/* 0x07c0 ( 1 2) */ cmp %g1,0
+/* 0x07c4 ( 2 5) */ fdtox %f0,%f0
+/* 0x07c8 ( 2 3) */ std %f0,[%sp+120]
+/* 0x07cc 275 ( 2 3) */ sethi %hi(0xfc00),%o1
+/* 0x07d0 206 ( 3 4) */ or %g0,%o0,%o3
+/* 0x07d4 275 ( 3 4) */ sub %g1,1,%g4
+/* 0x07d8 279 ( 4 7) */ ldd [%i1+8],%f0
+/* 0x07dc ( 4 5) */ or %g0,%o0,%g5
+/* 0x07e0 ( 4 5) */ add %o1,1023,%o1
+/* 0x07e4 ( 6 9) */ fdtox %f0,%f0
+/* 0x07e8 ( 6 7) */ std %f0,[%sp+112]
+/* 0x07ec (10 12) */ ldx [%sp+112],%o5
+/* 0x07f0 (11 13) */ ldx [%sp+120],%o7
+/* 0x07f4 (11 12) */ ble,pt %icc,.L900000746 ! tprob=0.56
+/* 0x07f8 (11 12) */ sethi %hi(0xfc00),%g2
+/* 0x07fc 275 (12 13) */ or %g0,-1,%g2
+/* 0x0800 279 (12 13) */ cmp %g1,3
+/* 0x0804 275 (13 14) */ srl %g2,0,%o2
+/* 0x0808 279 (13 14) */ bl,pn %icc,.L77000286 ! tprob=0.44
+/* 0x080c (13 14) */ or %g0,%i1,%g2
+/* 0x0810 (14 17) */ ldd [%i1+16],%f0
+/* 0x0814 (14 15) */ and %o5,%o1,%o0
+/* 0x0818 (14 15) */ add %i1,16,%g2
+/* 0x081c (15 16) */ sllx %o0,16,%g3
+/* 0x0820 (15 16) */ and %o7,%o2,%o0
+/* 0x0824 (16 19) */ fdtox %f0,%f0
+/* 0x0828 (16 17) */ std %f0,[%sp+104]
+/* 0x082c (16 17) */ add %o0,%g3,%o4
+/* 0x0830 (17 20) */ ldd [%i1+24],%f2
+/* 0x0834 (17 18) */ srax %o5,16,%o0
+/* 0x0838 (17 18) */ add %o3,4,%g5
+/* 0x083c (18 19) */ stx %o0,[%sp+128]
+/* 0x0840 (18 19) */ and %o4,%o2,%o0
+/* 0x0844 (18 19) */ or %g0,1,%i0
+/* 0x0848 (19 20) */ stx %o0,[%sp+112]
+/* 0x084c (19 20) */ srax %o4,32,%o0
+/* 0x0850 (19 22) */ fdtox %f2,%f0
+/* 0x0854 (20 21) */ stx %o0,[%sp+136]
+/* 0x0858 (20 21) */ srax %o7,32,%o4
+/* 0x085c (21 22) */ std %f0,[%sp+96]
+/* 0x0860 (22 24) */ ldx [%sp+136],%o7
+/* 0x0864 (23 25) */ ldx [%sp+128],%o0
+/* 0x0868 (25 27) */ ldx [%sp+104],%g3
+/* 0x086c (25 26) */ add %o0,%o7,%o0
+/* 0x0870 (26 28) */ ldx [%sp+112],%o7
+/* 0x0874 (26 27) */ add %o4,%o0,%o4
+/* 0x0878 (27 29) */ ldx [%sp+96],%o5
+/* 0x087c (28 29) */ st %o7,[%o3]
+/* 0x0880 (28 29) */ or %g0,%g3,%o7
+ .L900000730: /* frequency 64.0 confidence 0.0 */
+/* 0x0884 (17 19) */ ldd [%g2+16],%f0
+/* 0x0888 (17 18) */ add %i0,1,%i0
+/* 0x088c (17 18) */ add %g5,4,%g5
+/* 0x0890 (18 18) */ cmp %i0,%g4
+/* 0x0894 (18 19) */ add %g2,16,%g2
+/* 0x0898 (19 22) */ fdtox %f0,%f0
+/* 0x089c (20 21) */ std %f0,[%sp+104]
+/* 0x08a0 (21 23) */ ldd [%g2+8],%f0
+/* 0x08a4 (23 26) */ fdtox %f0,%f0
+/* 0x08a8 (24 25) */ std %f0,[%sp+96]
+/* 0x08ac (25 26) */ and %o5,%o1,%g3
+/* 0x08b0 (26 27) */ sllx %g3,16,%g3
+/* 0x08b4 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x08b8 (26 27) */ and %o7,%o2,%g3
+/* 0x08bc ( 0 0) */ stx %o7,[%sp+128]
+/* 0x08c0 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x08c4 (27 27) */ add %g3,%o7,%g3
+/* 0x08c8 ( 0 0) */ ldx [%sp+128],%o7
+/* 0x08cc (28 29) */ srax %o5,16,%o5
+/* 0x08d0 (28 28) */ add %g3,%o4,%g3
+/* 0x08d4 (29 30) */ srax %g3,32,%o4
+/* 0x08d8 ( 0 0) */ stx %o4,[%sp+112]
+/* 0x08dc (30 31) */ srax %o7,32,%o4
+/* 0x08e0 ( 0 0) */ ldx [%sp+112],%o7
+/* 0x08e4 (30 31) */ add %o5,%o7,%o7
+/* 0x08e8 (31 33) */ ldx [%sp+96],%o5
+/* 0x08ec (31 32) */ add %o4,%o7,%o4
+/* 0x08f0 (32 33) */ and %g3,%o2,%g3
+/* 0x08f4 ( 0 0) */ ldx [%sp+104],%o7
+/* 0x08f8 (33 34) */ ble,pt %icc,.L900000730 ! tprob=0.50
+/* 0x08fc (33 34) */ st %g3,[%g5-4]
+ .L900000733: /* frequency 8.0 confidence 0.0 */
+/* 0x0900 ( 0 1) */ ba .L900000746 ! tprob=1.00
+/* 0x0904 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000286: /* frequency 0.7 confidence 0.0 */
+/* 0x0908 ( 0 3) */ ldd [%g2+16],%f0
+ .L900000745: /* frequency 6.4 confidence 0.0 */
+/* 0x090c ( 0 1) */ and %o7,%o2,%o0
+/* 0x0910 ( 0 1) */ and %o5,%o1,%g3
+/* 0x0914 ( 0 3) */ fdtox %f0,%f0
+/* 0x0918 ( 1 2) */ add %o4,%o0,%o0
+/* 0x091c ( 1 2) */ std %f0,[%sp+104]
+/* 0x0920 ( 1 2) */ add %i0,1,%i0
+/* 0x0924 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0928 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x092c ( 2 3) */ add %g2,16,%g2
+/* 0x0930 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0934 ( 3 4) */ cmp %i0,%g4
+/* 0x0938 ( 4 5) */ srax %o5,16,%o0
+/* 0x093c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0940 ( 4 5) */ and %o4,%o2,%g3
+/* 0x0944 ( 5 6) */ srax %o4,32,%o5
+/* 0x0948 ( 5 8) */ fdtox %f2,%f0
+/* 0x094c ( 5 6) */ std %f0,[%sp+96]
+/* 0x0950 ( 6 7) */ srax %o7,32,%o4
+/* 0x0954 ( 6 8) */ ldx [%sp+112],%o7
+/* 0x0958 ( 8 9) */ add %o7,%o5,%o7
+/* 0x095c ( 9 11) */ ldx [%sp+104],%o5
+/* 0x0960 ( 9 10) */ add %o4,%o7,%o4
+/* 0x0964 (10 12) */ ldx [%sp+96],%o0
+/* 0x0968 (11 12) */ st %g3,[%g5]
+/* 0x096c (11 12) */ or %g0,%o5,%o7
+/* 0x0970 (11 12) */ add %g5,4,%g5
+/* 0x0974 (12 13) */ or %g0,%o0,%o5
+/* 0x0978 (12 13) */ ble,a,pt %icc,.L900000745 ! tprob=0.86
+/* 0x097c (12 15) */ ldd [%g2+16],%f0
+ .L77000236: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000746: /* frequency 1.0 confidence 0.0 */
+/* 0x0984 ( 0 1) */ or %g0,-1,%o0
+/* 0x0988 ( 0 1) */ add %g2,1023,%g2
+/* 0x098c ( 0 3) */ ld [%fp+88],%o1
+/* 0x0990 ( 1 2) */ srl %o0,0,%g3
+/* 0x0994 ( 1 2) */ and %o5,%g2,%g2
+/* 0x0998 ( 2 3) */ and %o7,%g3,%g4
+/* 0x099c 281 ( 2 3) */ or %g0,-1,%o5
+/* 0x09a0 275 ( 3 4) */ sllx %g2,16,%g2
+/* 0x09a4 ( 3 4) */ add %o4,%g4,%g4
+/* 0x09a8 ( 4 5) */ add %g4,%g2,%g2
+/* 0x09ac ( 5 6) */ sll %i0,2,%g4
+/* 0x09b0 ( 5 6) */ and %g2,%g3,%g2
+/* 0x09b4 ( 6 7) */ st %g2,[%o3+%g4]
+/* 0x09b8 281 ( 6 7) */ sll %g1,2,%g2
+/* 0x09bc ( 7 10) */ ld [%o3+%g2],%g2
+/* 0x09c0 ( 9 10) */ cmp %g2,0
+/* 0x09c4 ( 9 10) */ bleu,pn %icc,.L77000241 ! tprob=0.50
+/* 0x09c8 ( 9 10) */ or %g0,%o1,%o2
+/* 0x09cc (10 11) */ ba .L900000744 ! tprob=1.00
+/* 0x09d0 (10 11) */ cmp %o5,0
+ .L77000241: /* frequency 0.8 confidence 0.0 */
+/* 0x09d4 ( 0 1) */ subcc %g1,1,%o5
+/* 0x09d8 ( 0 1) */ bneg,pt %icc,.L900000744 ! tprob=0.60
+/* 0x09dc ( 1 2) */ cmp %o5,0
+/* 0x09e0 ( 1 2) */ sll %o5,2,%g2
+/* 0x09e4 ( 2 3) */ add %o1,%g2,%o0
+/* 0x09e8 ( 2 3) */ add %o3,%g2,%o4
+/* 0x09ec ( 3 6) */ ld [%o0],%g2
+ .L900000743: /* frequency 5.3 confidence 0.0 */
+/* 0x09f0 ( 0 3) */ ld [%o4],%g3
+/* 0x09f4 ( 0 1) */ add %o0,4,%o0
+/* 0x09f8 ( 0 1) */ add %o4,4,%o4
+/* 0x09fc ( 2 3) */ cmp %g3,%g2
+/* 0x0a00 ( 2 3) */ bne,pn %icc,.L77000244 ! tprob=0.16
+/* 0x0a04 ( 2 3) */ nop
+/* 0x0a08 ( 3 4) */ addcc %o5,1,%o5
+/* 0x0a0c ( 3 4) */ bpos,a,pt %icc,.L900000743 ! tprob=0.84
+/* 0x0a10 ( 3 6) */ ld [%o0],%g2
+ .L77000244: /* frequency 1.0 confidence 0.0 */
+/* 0x0a14 ( 0 1) */ cmp %o5,0
+ .L900000744: /* frequency 1.0 confidence 0.0 */
+/* 0x0a18 ( 0 1) */ bl,pn %icc,.L77000287 ! tprob=0.50
+/* 0x0a1c ( 0 1) */ sll %o5,2,%g2
+/* 0x0a20 ( 1 4) */ ld [%o2+%g2],%g3
+/* 0x0a24 ( 2 5) */ ld [%o3+%g2],%g2
+/* 0x0a28 ( 4 5) */ cmp %g2,%g3
+/* 0x0a2c ( 4 5) */ bleu,pt %icc,.L77000224 ! tprob=0.56
+/* 0x0a30 ( 4 5) */ nop
+ .L77000287: /* frequency 0.8 confidence 0.0 */
+/* 0x0a34 ( 0 1) */ cmp %g1,0
+/* 0x0a38 ( 0 1) */ ble,pt %icc,.L77000224 ! tprob=0.60
+/* 0x0a3c ( 0 1) */ nop
+/* 0x0a40 281 ( 1 2) */ sub %g1,1,%o7
+/* 0x0a44 ( 1 2) */ or %g0,-1,%g2
+/* 0x0a48 ( 2 3) */ srl %g2,0,%o4
+/* 0x0a4c ( 2 3) */ add %o7,1,%o0
+/* 0x0a50 279 ( 3 4) */ or %g0,0,%o5
+/* 0x0a54 ( 3 4) */ or %g0,0,%g1
+/* 0x0a58 ( 4 5) */ cmp %o0,3
+/* 0x0a5c ( 4 5) */ bl,pn %icc,.L77000288 ! tprob=0.40
+/* 0x0a60 ( 4 5) */ add %o3,8,%o1
+/* 0x0a64 ( 5 6) */ add %o2,4,%o0
+/* 0x0a68 ( 5 8) */ ld [%o1-8],%g2
+/* 0x0a6c 0 ( 5 6) */ or %g0,%o1,%o3
+/* 0x0a70 279 ( 6 9) */ ld [%o0-4],%g3
+/* 0x0a74 0 ( 6 7) */ or %g0,%o0,%o2
+/* 0x0a78 279 ( 6 7) */ or %g0,2,%g1
+/* 0x0a7c ( 7 10) */ ld [%o3-4],%o0
+/* 0x0a80 ( 8 9) */ sub %g2,%g3,%g2
+/* 0x0a84 ( 9 10) */ or %g0,%g2,%o5
+/* 0x0a88 ( 9 10) */ and %g2,%o4,%g2
+/* 0x0a8c ( 9 10) */ st %g2,[%o3-8]
+/* 0x0a90 (10 11) */ srax %o5,32,%o5
+ .L900000734: /* frequency 64.0 confidence 0.0 */
+/* 0x0a94 (12 20) */ ld [%o2],%g2
+/* 0x0a98 (12 13) */ add %g1,1,%g1
+/* 0x0a9c (12 13) */ add %o2,4,%o2
+/* 0x0aa0 (13 13) */ cmp %g1,%o7
+/* 0x0aa4 (13 14) */ add %o3,4,%o3
+/* 0x0aa8 (14 14) */ sub %o0,%g2,%o0
+/* 0x0aac (15 15) */ add %o0,%o5,%o5
+/* 0x0ab0 (16 17) */ and %o5,%o4,%g2
+/* 0x0ab4 (16 24) */ ld [%o3-4],%o0
+/* 0x0ab8 (17 18) */ st %g2,[%o3-8]
+/* 0x0abc (17 18) */ ble,pt %icc,.L900000734 ! tprob=0.50
+/* 0x0ac0 (17 18) */ srax %o5,32,%o5
+ .L900000737: /* frequency 8.0 confidence 0.0 */
+/* 0x0ac4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ac8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0acc ( 3 4) */ add %o0,%o5,%o0
+/* 0x0ad0 ( 4 5) */ and %o0,%o4,%o1
+/* 0x0ad4 ( 4 5) */ st %o1,[%o3-4]
+/* 0x0ad8 ( 5 7) */ ret ! Result =
+/* 0x0adc ( 7 8) */ restore %g0,%g0,%g0
+ .L77000288: /* frequency 0.6 confidence 0.0 */
+/* 0x0ae0 ( 0 3) */ ld [%o3],%o0
+ .L900000742: /* frequency 5.3 confidence 0.0 */
+/* 0x0ae4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ae8 ( 0 1) */ add %o5,%o0,%o0
+/* 0x0aec ( 0 1) */ add %g1,1,%g1
+/* 0x0af0 ( 1 2) */ add %o2,4,%o2
+/* 0x0af4 ( 1 2) */ cmp %g1,%o7
+/* 0x0af8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0afc ( 3 4) */ and %o0,%o4,%o1
+/* 0x0b00 ( 3 4) */ st %o1,[%o3]
+/* 0x0b04 ( 3 4) */ add %o3,4,%o3
+/* 0x0b08 ( 4 5) */ srax %o0,32,%o5
+/* 0x0b0c ( 4 5) */ ble,a,pt %icc,.L900000742 ! tprob=0.84
+/* 0x0b10 ( 4 7) */ ld [%o3],%o0
+ .L77000224: /* frequency 1.0 confidence 0.0 */
+/* 0x0b14 ( 0 2) */ ret ! Result =
+/* 0x0b18 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x0b1c 0 ( 0 0) */ .type mont_mulf_noconv,2
+/* 0x0b1c ( 0 0) */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.il b/security/nss/lib/freebl/mpi/montmulfv8.il
new file mode 100644
index 0000000000..4952d0fb82
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.s b/security/nss/lib/freebl/mpi/montmulfv8.s
new file mode 100644
index 0000000000..ca738880fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.s
@@ -0,0 +1,1818 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-128,%sp
+! FILE montmulf.c
+
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f0
+/* 0x0008 110 */ or %g0,%i1,%o0
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x000c 117 */ sub %i3,1,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 114 */ or %g0,0,%o4
+/* 0x0018 115 */ fdtox %f0,%f0
+/* 0x001c */ std %f0,[%sp+120]
+/* 0x0020 117 */ or %g0,0,%o7
+/* 0x0024 110 */ or %g0,%i3,%o1
+/* 0x0028 */ sub %i3,2,%o2
+/* 0x002c 116 */ ldd [%o0+8],%f0
+/* 0x0030 110 */ sethi %hi(0xfc00),%o1
+/* 0x0034 */ add %o2,1,%g3
+/* 0x0038 */ add %o1,1023,%o1
+/* 0x003c */ or %g0,%i0,%o5
+/* 0x0040 116 */ fdtox %f0,%f0
+/* 0x0044 */ std %f0,[%sp+112]
+/* 0x0048 */ ldx [%sp+112],%g1
+/* 0x004c 115 */ ldx [%sp+120],%g4
+/* 0x0050 117 */ ble,pt %icc,.L900000117
+/* 0x0054 */ sethi %hi(0xfc00),%g2
+/* 0x0058 110 */ or %g0,-1,%g2
+/* 0x005c 117 */ cmp %g3,3
+/* 0x0060 110 */ srl %g2,0,%o3
+/* 0x0064 117 */ bl,pn %icc,.L77000134
+/* 0x0068 */ or %g0,%o0,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+
+/* 0x006c 119 */ ldd [%o0+16],%f0
+
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+! 124 ! t+=(b>>16)+(t1>>32);
+! 125 ! i32[i]=t1&0xffffffff;
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x0070 128 */ add %o0,16,%g2
+/* 0x0074 123 */ and %g1,%o1,%o0
+/* 0x0078 */ sllx %o0,16,%g3
+/* 0x007c 120 */ and %g4,%o3,%o0
+/* 0x0080 117 */ add %o0,%g3,%o4
+/* 0x0084 119 */ fdtox %f0,%f0
+/* 0x0088 */ std %f0,[%sp+104]
+/* 0x008c 125 */ and %o4,%o3,%g5
+/* 0x0090 122 */ ldd [%g2+8],%f2
+/* 0x0094 128 */ add %o5,4,%o5
+/* 0x0098 124 */ srax %o4,32,%o4
+/* 0x009c */ stx %o4,[%sp+112]
+/* 0x00a0 122 */ fdtox %f2,%f0
+/* 0x00a4 */ std %f0,[%sp+96]
+/* 0x00a8 124 */ srax %g1,16,%o0
+/* 0x00ac */ ldx [%sp+112],%o7
+/* 0x00b0 121 */ srax %g4,32,%o4
+/* 0x00b4 124 */ add %o0,%o7,%g4
+/* 0x00b8 128 */ or %g0,1,%o7
+/* 0x00bc 119 */ ldx [%sp+104],%g3
+/* 0x00c0 124 */ add %o4,%g4,%o4
+/* 0x00c4 122 */ ldx [%sp+96],%g1
+/* 0x00c8 125 */ st %g5,[%o5-4]
+/* 0x00cc 127 */ or %g0,%g3,%g4
+ .L900000112:
+/* 0x00d0 119 */ ldd [%g2+16],%f0
+/* 0x00d4 128 */ add %o7,1,%o7
+/* 0x00d8 */ add %o5,4,%o5
+/* 0x00dc */ cmp %o7,%o2
+/* 0x00e0 */ add %g2,16,%g2
+/* 0x00e4 119 */ fdtox %f0,%f0
+/* 0x00e8 */ std %f0,[%sp+104]
+/* 0x00ec 122 */ ldd [%g2+8],%f0
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ std %f0,[%sp+96]
+/* 0x00f8 123 */ and %g1,%o1,%g3
+/* 0x00fc */ sllx %g3,16,%g5
+/* 0x0100 120 */ and %g4,%o3,%g3
+/* 0x0104 117 */ add %g3,%g5,%g3
+/* 0x0108 124 */ srax %g1,16,%g1
+/* 0x010c 117 */ add %g3,%o4,%g3
+/* 0x0110 124 */ srax %g3,32,%o4
+/* 0x0114 */ stx %o4,[%sp+112]
+/* 0x0118 119 */ ldx [%sp+104],%g5
+/* 0x011c 121 */ srax %g4,32,%o4
+/* 0x0120 124 */ ldx [%sp+112],%g4
+/* 0x0124 */ add %g1,%g4,%g4
+/* 0x0128 122 */ ldx [%sp+96],%g1
+/* 0x012c 124 */ add %o4,%g4,%o4
+/* 0x0130 125 */ and %g3,%o3,%g3
+/* 0x0134 127 */ or %g0,%g5,%g4
+/* 0x0138 128 */ ble,pt %icc,.L900000112
+/* 0x013c */ st %g3,[%o5-4]
+ .L900000115:
+/* 0x0140 128 */ ba .L900000117
+/* 0x0144 */ sethi %hi(0xfc00),%g2
+ .L77000134:
+/* 0x0148 119 */ ldd [%g2+16],%f0
+ .L900000116:
+/* 0x014c 120 */ and %g4,%o3,%o0
+/* 0x0150 123 */ and %g1,%o1,%g3
+/* 0x0154 119 */ fdtox %f0,%f0
+/* 0x0158 120 */ add %o4,%o0,%o0
+/* 0x015c 119 */ std %f0,[%sp+104]
+/* 0x0160 128 */ add %o7,1,%o7
+/* 0x0164 123 */ sllx %g3,16,%o4
+/* 0x0168 122 */ ldd [%g2+24],%f2
+/* 0x016c 128 */ add %g2,16,%g2
+/* 0x0170 123 */ add %o0,%o4,%o0
+/* 0x0174 128 */ cmp %o7,%o2
+/* 0x0178 125 */ and %o0,%o3,%g3
+/* 0x017c 122 */ fdtox %f2,%f0
+/* 0x0180 */ std %f0,[%sp+96]
+/* 0x0184 124 */ srax %o0,32,%o0
+/* 0x0188 */ stx %o0,[%sp+112]
+/* 0x018c 121 */ srax %g4,32,%o4
+/* 0x0190 122 */ ldx [%sp+96],%o0
+/* 0x0194 124 */ srax %g1,16,%g5
+/* 0x0198 */ ldx [%sp+112],%g4
+/* 0x019c 119 */ ldx [%sp+104],%g1
+/* 0x01a0 125 */ st %g3,[%o5]
+/* 0x01a4 124 */ add %g5,%g4,%g4
+/* 0x01a8 128 */ add %o5,4,%o5
+/* 0x01ac 124 */ add %o4,%g4,%o4
+/* 0x01b0 127 */ or %g0,%g1,%g4
+/* 0x01b4 128 */ or %g0,%o0,%g1
+/* 0x01b8 */ ble,a,pt %icc,.L900000116
+/* 0x01bc */ ldd [%g2+16],%f0
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x01c0 133 */ sethi %hi(0xfc00),%g2
+ .L900000117:
+/* 0x01c4 133 */ or %g0,-1,%g3
+/* 0x01c8 */ add %g2,1023,%g2
+/* 0x01cc */ srl %g3,0,%g3
+/* 0x01d0 */ and %g1,%g2,%g2
+/* 0x01d4 */ and %g4,%g3,%g4
+/* 0x01d8 */ sllx %g2,16,%g2
+/* 0x01dc */ add %o4,%g4,%g4
+/* 0x01e0 */ add %g4,%g2,%g2
+/* 0x01e4 */ sll %o7,2,%g4
+/* 0x01e8 */ and %g2,%g3,%g2
+/* 0x01ec */ st %g2,[%i0+%g4]
+/* 0x01f0 */ ret ! Result =
+/* 0x01f4 */ restore %g0,%g0,%g0
+/* 0x01f8 0 */ .type conv_d16_to_i32,2
+/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g2
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o5
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 */ or %g0,%o0,%g5
+/* 0x001c */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g2,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g2
+/* 0x002c 138 */ or %g0,%o1,%g4
+/* 0x0030 142 */ add %g2,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ sub %o2,1,%g3
+/* 0x0038 */ ld [%g1+%g2],%g2
+/* 0x003c */ cmp %o2,9
+/* 0x0040 */ bl,pn %icc,.L77000144
+/* 0x0044 */ ldd [%g2],%f8
+/* 0x0048 */ add %o1,16,%g4
+/* 0x004c */ sub %o2,5,%g1
+/* 0x0050 */ ld [%o1],%f7
+/* 0x0054 */ or %g0,4,%o5
+/* 0x0058 */ ld [%o1+4],%f5
+/* 0x005c */ ld [%o1+8],%f3
+/* 0x0060 */ fmovs %f8,%f6
+/* 0x0064 */ ld [%o1+12],%f1
+ .L900000205:
+/* 0x0068 */ ld [%g4],%f11
+/* 0x006c */ add %o5,5,%o5
+/* 0x0070 */ add %g4,20,%g4
+/* 0x0074 */ fsubd %f6,%f8,%f6
+/* 0x0078 */ std %f6,[%g5]
+/* 0x007c */ cmp %o5,%g1
+/* 0x0080 */ add %g5,40,%g5
+/* 0x0084 */ fmovs %f8,%f4
+/* 0x0088 */ ld [%g4-16],%f7
+/* 0x008c */ fsubd %f4,%f8,%f12
+/* 0x0090 */ fmovs %f8,%f2
+/* 0x0094 */ std %f12,[%g5-32]
+/* 0x0098 */ ld [%g4-12],%f5
+/* 0x009c */ fsubd %f2,%f8,%f12
+/* 0x00a0 */ fmovs %f8,%f0
+/* 0x00a4 */ std %f12,[%g5-24]
+/* 0x00a8 */ ld [%g4-8],%f3
+/* 0x00ac */ fsubd %f0,%f8,%f12
+/* 0x00b0 */ fmovs %f8,%f10
+/* 0x00b4 */ std %f12,[%g5-16]
+/* 0x00b8 */ ld [%g4-4],%f1
+/* 0x00bc */ fsubd %f10,%f8,%f10
+/* 0x00c0 */ fmovs %f8,%f6
+/* 0x00c4 */ ble,pt %icc,.L900000205
+/* 0x00c8 */ std %f10,[%g5-8]
+ .L900000208:
+/* 0x00cc */ fmovs %f8,%f4
+/* 0x00d0 */ add %g5,32,%g5
+/* 0x00d4 */ cmp %o5,%g3
+/* 0x00d8 */ fmovs %f8,%f2
+/* 0x00dc */ fmovs %f8,%f0
+/* 0x00e0 */ fsubd %f6,%f8,%f6
+/* 0x00e4 */ std %f6,[%g5-32]
+/* 0x00e8 */ fsubd %f4,%f8,%f4
+/* 0x00ec */ std %f4,[%g5-24]
+/* 0x00f0 */ fsubd %f2,%f8,%f2
+/* 0x00f4 */ std %f2,[%g5-16]
+/* 0x00f8 */ fsubd %f0,%f8,%f0
+/* 0x00fc */ bg,pn %icc,.L77000140
+/* 0x0100 */ std %f0,[%g5-8]
+ .L77000144:
+/* 0x0104 */ ld [%g4],%f1
+ .L900000211:
+/* 0x0108 */ ldd [%g2],%f8
+/* 0x010c */ add %o5,1,%o5
+/* 0x0110 */ add %g4,4,%g4
+/* 0x0114 */ cmp %o5,%g3
+/* 0x0118 */ fmovs %f8,%f0
+/* 0x011c */ fsubd %f0,%f8,%f0
+/* 0x0120 */ std %f0,[%g5]
+/* 0x0124 */ add %g5,8,%g5
+/* 0x0128 */ ble,a,pt %icc,.L900000211
+/* 0x012c */ ld [%g4],%f1
+ .L77000140:
+/* 0x0130 */ retl ! Result =
+/* 0x0134 */ nop
+/* 0x0138 0 */ .type conv_i32_to_d32,2
+/* 0x0138 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-104,%sp
+/* 0x0004 */ or %g0,%i2,%o0
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2
+ .L900000310:
+/* 0x000c */ call .+8
+/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ cmp %o0,0
+/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x001c 152 */ ble,pt %icc,.L77000150
+/* 0x0020 */ add %g3,%o7,%o2
+/* 0x0024 */ sub %i2,1,%o5
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1
+/* 0x002c 152 */ sethi %hi(0xfc00),%o0
+/* 0x0030 */ ld [%o2+%o1],%o3
+/* 0x0034 */ add %o5,1,%g2
+/* 0x0038 */ or %g0,0,%g1
+/* 0x003c */ cmp %g2,3
+/* 0x0040 */ or %g0,%i1,%o7
+/* 0x0044 */ add %o0,1023,%o4
+/* 0x0048 */ or %g0,%i0,%g3
+/* 0x004c */ bl,pn %icc,.L77000154
+/* 0x0050 */ add %o7,4,%o0
+/* 0x0054 155 */ ldd [%o3],%f0
+/* 0x0058 156 */ or %g0,1,%g1
+/* 0x005c 154 */ ld [%o0-4],%o1
+/* 0x0060 0 */ or %g0,%o0,%o7
+/* 0x0064 155 */ and %o1,%o4,%o0
+ .L900000306:
+/* 0x0068 155 */ st %o0,[%sp+96]
+/* 0x006c 156 */ add %g1,1,%g1
+/* 0x0070 */ add %g3,16,%g3
+/* 0x0074 */ cmp %g1,%o5
+/* 0x0078 */ add %o7,4,%o7
+/* 0x007c 155 */ ld [%sp+96],%f3
+/* 0x0080 */ fmovs %f0,%f2
+/* 0x0084 */ fsubd %f2,%f0,%f2
+/* 0x0088 156 */ srl %o1,16,%o0
+/* 0x008c 155 */ std %f2,[%g3-16]
+/* 0x0090 156 */ st %o0,[%sp+92]
+/* 0x0094 */ ld [%sp+92],%f3
+/* 0x0098 154 */ ld [%o7-4],%o1
+/* 0x009c 156 */ fmovs %f0,%f2
+/* 0x00a0 */ fsubd %f2,%f0,%f2
+/* 0x00a4 155 */ and %o1,%o4,%o0
+/* 0x00a8 156 */ ble,pt %icc,.L900000306
+/* 0x00ac */ std %f2,[%g3-8]
+ .L900000309:
+/* 0x00b0 155 */ st %o0,[%sp+96]
+/* 0x00b4 */ fmovs %f0,%f2
+/* 0x00b8 156 */ add %g3,16,%g3
+/* 0x00bc */ srl %o1,16,%o0
+/* 0x00c0 155 */ ld [%sp+96],%f3
+/* 0x00c4 */ fsubd %f2,%f0,%f2
+/* 0x00c8 */ std %f2,[%g3-16]
+/* 0x00cc 156 */ st %o0,[%sp+92]
+/* 0x00d0 */ fmovs %f0,%f2
+/* 0x00d4 */ ld [%sp+92],%f3
+/* 0x00d8 */ fsubd %f2,%f0,%f0
+/* 0x00dc */ std %f0,[%g3-8]
+/* 0x00e0 */ ret ! Result =
+/* 0x00e4 */ restore %g0,%g0,%g0
+ .L77000154:
+/* 0x00e8 154 */ ld [%o7],%o0
+ .L900000311:
+/* 0x00ec 155 */ and %o0,%o4,%o1
+/* 0x00f0 */ st %o1,[%sp+96]
+/* 0x00f4 156 */ add %g1,1,%g1
+/* 0x00f8 155 */ ldd [%o3],%f0
+/* 0x00fc 156 */ srl %o0,16,%o0
+/* 0x0100 */ add %o7,4,%o7
+/* 0x0104 */ cmp %g1,%o5
+/* 0x0108 155 */ fmovs %f0,%f2
+/* 0x010c */ ld [%sp+96],%f3
+/* 0x0110 */ fsubd %f2,%f0,%f2
+/* 0x0114 */ std %f2,[%g3]
+/* 0x0118 156 */ st %o0,[%sp+92]
+/* 0x011c */ fmovs %f0,%f2
+/* 0x0120 */ ld [%sp+92],%f3
+/* 0x0124 */ fsubd %f2,%f0,%f0
+/* 0x0128 */ std %f0,[%g3+8]
+/* 0x012c */ add %g3,16,%g3
+/* 0x0130 */ ble,a,pt %icc,.L900000311
+/* 0x0134 */ ld [%o7],%o0
+ .L77000150:
+/* 0x0138 */ ret ! Result =
+/* 0x013c */ restore %g0,%g0,%g0
+/* 0x0140 0 */ .type conv_i32_to_d16,2
+/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-120,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+
+/* 0x000c 169 */ sub %i3,3,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 163 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0018 172 */ sethi %hi(Zero),%g2
+/* 0x001c 163 */ add %g4,%o7,%o4
+/* 0x0020 172 */ add %g2,%lo(Zero),%g2
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 */ ld [%o4+%g2],%o1
+/* 0x002c */ sethi %hi(TwoTo16),%g4
+/* 0x0030 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0034 */ ld [%o4+%g2],%o3
+/* 0x0038 164 */ or %g0,0,%g5
+/* 0x003c 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0040 */ ld [%o4+%g3],%o2
+/* 0x0044 163 */ or %g0,%i0,%i4
+/* 0x0048 169 */ or %g0,%i2,%o7
+/* 0x004c */ ble,pt %icc,.L900000418
+/* 0x0050 */ cmp %g5,%i3
+/* 0x0054 172 */ stx %o7,[%sp+104]
+/* 0x0058 169 */ sub %i3,4,%o5
+/* 0x005c */ or %g0,0,%g4
+/* 0x0060 */ or %g0,0,%g1
+ .L900000417:
+/* 0x0064 */ ldd [%o1],%f2
+/* 0x0068 172 */ add %i4,%g4,%g2
+/* 0x006c */ add %i1,%g1,%g3
+/* 0x0070 */ ldd [%o3],%f0
+/* 0x0074 */ add %g5,4,%g5
+/* 0x0078 */ fmovd %f2,%f14
+/* 0x007c */ ld [%o7],%f15
+/* 0x0080 */ cmp %g5,%o5
+/* 0x0084 */ fmovd %f2,%f10
+/* 0x0088 */ ld [%o7+4],%f11
+/* 0x008c */ add %o7,16,%o7
+/* 0x0090 */ ldx [%sp+104],%o0
+/* 0x0094 */ fmovd %f2,%f6
+/* 0x0098 */ stx %o7,[%sp+112]
+/* 0x009c */ fxtod %f14,%f14
+/* 0x00a0 */ ld [%o0+8],%f7
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ ld [%o0+12],%f3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ ldd [%o2],%f16
+/* 0x00b4 */ fmuld %f0,%f14,%f12
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f10,%f8
+/* 0x00c0 */ std %f14,[%i4+%g4]
+/* 0x00c4 */ ldx [%sp+112],%o7
+/* 0x00c8 */ add %g4,32,%g4
+/* 0x00cc */ fmuld %f0,%f6,%f4
+/* 0x00d0 */ fdtox %f12,%f12
+/* 0x00d4 */ std %f10,[%g2+8]
+/* 0x00d8 */ fmuld %f0,%f2,%f0
+/* 0x00dc */ fdtox %f8,%f8
+/* 0x00e0 */ std %f6,[%g2+16]
+/* 0x00e4 */ std %f2,[%g2+24]
+/* 0x00e8 */ fdtox %f4,%f4
+/* 0x00ec */ fdtox %f0,%f0
+/* 0x00f0 */ fxtod %f12,%f12
+/* 0x00f4 */ std %f12,[%g3+8]
+/* 0x00f8 */ fxtod %f8,%f8
+/* 0x00fc */ std %f8,[%g3+24]
+/* 0x0100 */ fxtod %f4,%f4
+/* 0x0104 */ std %f4,[%g3+40]
+/* 0x0108 */ fxtod %f0,%f0
+/* 0x010c */ std %f0,[%g3+56]
+/* 0x0110 */ fmuld %f12,%f16,%f12
+/* 0x0114 */ fmuld %f8,%f16,%f8
+/* 0x0118 */ fmuld %f4,%f16,%f4
+/* 0x011c */ fsubd %f14,%f12,%f12
+/* 0x0120 */ std %f12,[%i1+%g1]
+/* 0x0124 */ fmuld %f0,%f16,%f0
+/* 0x0128 */ fsubd %f10,%f8,%f8
+/* 0x012c */ std %f8,[%g3+16]
+/* 0x0130 */ add %g1,64,%g1
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g3+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g3+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ stx %o7,[%sp+104]
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %g5,%i3
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1
+/* 0x0160 175 */ sethi %hi(0xfc00),%o0
+/* 0x0164 */ ld [%o4+%o1],%o2
+/* 0x0168 */ sll %g5,2,%o3
+/* 0x016c */ sub %i3,%g5,%g3
+/* 0x0170 */ sll %g5,3,%g2
+/* 0x0174 */ add %o0,1023,%o4
+/* 0x0178 178 */ ldd [%o2],%f0
+/* 0x017c */ add %i2,%o3,%o0
+/* 0x0180 175 */ cmp %g3,3
+/* 0x0184 */ add %i4,%g2,%o3
+/* 0x0188 */ sub %i3,1,%o1
+/* 0x018c */ sll %g5,4,%g4
+/* 0x0190 */ bl,pn %icc,.L77000161
+/* 0x0194 */ add %i1,%g4,%o5
+/* 0x0198 178 */ ld [%o0],%f3
+/* 0x019c 180 */ add %o3,8,%o3
+/* 0x01a0 177 */ ld [%o0],%o7
+/* 0x01a4 180 */ add %o5,16,%o5
+/* 0x01a8 */ add %g5,1,%g5
+/* 0x01ac 178 */ fmovs %f0,%f2
+/* 0x01b0 180 */ add %o0,4,%o0
+/* 0x01b4 179 */ and %o7,%o4,%g1
+/* 0x01b8 178 */ fsubd %f2,%f0,%f2
+/* 0x01bc */ std %f2,[%o3-8]
+/* 0x01c0 180 */ srl %o7,16,%o7
+/* 0x01c4 179 */ st %g1,[%sp+96]
+/* 0x01c8 */ fmovs %f0,%f2
+/* 0x01cc */ ld [%sp+96],%f3
+/* 0x01d0 */ fsubd %f2,%f0,%f2
+/* 0x01d4 */ std %f2,[%o5-16]
+/* 0x01d8 180 */ st %o7,[%sp+92]
+/* 0x01dc */ fmovs %f0,%f2
+/* 0x01e0 */ ld [%sp+92],%f3
+/* 0x01e4 */ fsubd %f2,%f0,%f2
+/* 0x01e8 */ std %f2,[%o5-8]
+ .L900000411:
+/* 0x01ec 178 */ ld [%o0],%f3
+/* 0x01f0 180 */ add %g5,2,%g5
+/* 0x01f4 */ add %o5,32,%o5
+/* 0x01f8 177 */ ld [%o0],%o7
+/* 0x01fc 180 */ cmp %g5,%o1
+/* 0x0200 */ add %o3,16,%o3
+/* 0x0204 178 */ fmovs %f0,%f2
+/* 0x0208 */ fsubd %f2,%f0,%f2
+/* 0x020c */ std %f2,[%o3-16]
+/* 0x0210 179 */ and %o7,%o4,%g1
+/* 0x0214 */ st %g1,[%sp+96]
+/* 0x0218 */ ld [%sp+96],%f3
+/* 0x021c */ fmovs %f0,%f2
+/* 0x0220 */ fsubd %f2,%f0,%f2
+/* 0x0224 180 */ srl %o7,16,%o7
+/* 0x0228 179 */ std %f2,[%o5-32]
+/* 0x022c 180 */ st %o7,[%sp+92]
+/* 0x0230 */ ld [%sp+92],%f3
+/* 0x0234 */ fmovs %f0,%f2
+/* 0x0238 */ fsubd %f2,%f0,%f2
+/* 0x023c */ std %f2,[%o5-24]
+/* 0x0240 */ add %o0,4,%o0
+/* 0x0244 178 */ ld [%o0],%f3
+/* 0x0248 177 */ ld [%o0],%o7
+/* 0x024c 178 */ fmovs %f0,%f2
+/* 0x0250 */ fsubd %f2,%f0,%f2
+/* 0x0254 */ std %f2,[%o3-8]
+/* 0x0258 179 */ and %o7,%o4,%g1
+/* 0x025c */ st %g1,[%sp+96]
+/* 0x0260 */ ld [%sp+96],%f3
+/* 0x0264 */ fmovs %f0,%f2
+/* 0x0268 */ fsubd %f2,%f0,%f2
+/* 0x026c 180 */ srl %o7,16,%o7
+/* 0x0270 179 */ std %f2,[%o5-16]
+/* 0x0274 180 */ st %o7,[%sp+92]
+/* 0x0278 */ ld [%sp+92],%f3
+/* 0x027c */ fmovs %f0,%f2
+/* 0x0280 */ fsubd %f2,%f0,%f2
+/* 0x0284 */ std %f2,[%o5-8]
+/* 0x0288 */ bl,pt %icc,.L900000411
+/* 0x028c */ add %o0,4,%o0
+ .L900000414:
+/* 0x0290 180 */ cmp %g5,%i3
+/* 0x0294 */ bge,pn %icc,.L77000164
+/* 0x0298 */ nop
+ .L77000161:
+/* 0x029c 178 */ ld [%o0],%f3
+ .L900000416:
+/* 0x02a0 178 */ ldd [%o2],%f0
+/* 0x02a4 180 */ add %g5,1,%g5
+/* 0x02a8 177 */ ld [%o0],%o1
+/* 0x02ac 180 */ add %o0,4,%o0
+/* 0x02b0 */ cmp %g5,%i3
+/* 0x02b4 178 */ fmovs %f0,%f2
+/* 0x02b8 179 */ and %o1,%o4,%o7
+/* 0x02bc 178 */ fsubd %f2,%f0,%f2
+/* 0x02c0 */ std %f2,[%o3]
+/* 0x02c4 180 */ srl %o1,16,%o1
+/* 0x02c8 179 */ st %o7,[%sp+96]
+/* 0x02cc 180 */ add %o3,8,%o3
+/* 0x02d0 179 */ fmovs %f0,%f2
+/* 0x02d4 */ ld [%sp+96],%f3
+/* 0x02d8 */ fsubd %f2,%f0,%f2
+/* 0x02dc */ std %f2,[%o5]
+/* 0x02e0 180 */ st %o1,[%sp+92]
+/* 0x02e4 */ fmovs %f0,%f2
+/* 0x02e8 */ ld [%sp+92],%f3
+/* 0x02ec */ fsubd %f2,%f0,%f0
+/* 0x02f0 */ std %f0,[%o5+8]
+/* 0x02f4 */ add %o5,16,%o5
+/* 0x02f8 */ bl,a,pt %icc,.L900000416
+/* 0x02fc */ ld [%o0],%f3
+ .L77000164:
+/* 0x0300 */ ret ! Result =
+/* 0x0304 */ restore %g0,%g0,%g0
+/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ or %g0,%o2,%g5
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x0004 190 */ or %g0,-1,%g4
+/* 0x0008 */ sll %o2,2,%g1
+/* 0x000c */ ld [%o0+%g1],%g1
+/* 0x0010 */ cmp %g1,0
+/* 0x0014 */ bleu,pn %icc,.L77000175
+/* 0x0018 */ or %g0,%o1,%o3
+/* 0x001c */ ba .L900000511
+/* 0x0020 */ cmp %g4,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0024 193 */ sub %o2,1,%g4
+/* 0x0028 */ sll %g4,2,%g1
+/* 0x002c */ cmp %g4,0
+/* 0x0030 */ bl,pt %icc,.L900000511
+/* 0x0034 */ cmp %g4,0
+/* 0x0038 */ add %o1,%g1,%g2
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x003c 195 */ ld [%g2],%o5
+/* 0x0040 193 */ add %o0,%g1,%g3
+ .L900000510:
+/* 0x0044 195 */ ld [%g3],%o2
+/* 0x0048 */ sub %g4,1,%g1
+/* 0x004c */ sub %g2,4,%g2
+/* 0x0050 */ sub %g3,4,%g3
+/* 0x0054 */ cmp %o2,%o5
+/* 0x0058 */ bne,pn %icc,.L77000182
+/* 0x005c */ nop
+/* 0x0060 0 */ or %g0,%g1,%g4
+/* 0x0064 195 */ cmp %g1,0
+/* 0x0068 */ bge,a,pt %icc,.L900000510
+/* 0x006c */ ld [%g2],%o5
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0070 198 */ cmp %g4,0
+ .L900000511:
+/* 0x0074 198 */ bl,pn %icc,.L77000198
+/* 0x0078 */ sll %g4,2,%g2
+/* 0x007c */ ld [%o1+%g2],%g1
+/* 0x0080 */ ld [%o0+%g2],%g2
+/* 0x0084 */ cmp %g2,%g1
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %g5,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c */ or %g0,%g5,%g1
+/* 0x00a0 198 */ or %g0,-1,%g2
+/* 0x00a4 */ srl %g2,0,%g3
+/* 0x00a8 */ sub %g5,1,%g4
+/* 0x00ac 200 */ or %g0,0,%g5
+/* 0x00b0 201 */ or %g0,0,%o5
+/* 0x00b4 198 */ or %g0,%o0,%o4
+/* 0x00b8 */ cmp %g1,3
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ add %o0,8,%g1
+/* 0x00c4 */ add %o1,4,%g2
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00c8 203 */ ld [%o0],%o2
+/* 0x00cc */ ld [%o1],%o1
+/* 0x00d0 0 */ or %g0,%g1,%o4
+/* 0x00d4 */ or %g0,%g2,%o3
+/* 0x00d8 203 */ ld [%o0+4],%g1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00dc 205 */ or %g0,2,%o5
+/* 0x00e0 201 */ sub %o2,%o1,%o2
+/* 0x00e4 */ or %g0,%o2,%g5
+/* 0x00e8 204 */ and %o2,%g3,%o2
+/* 0x00ec */ st %o2,[%o0]
+/* 0x00f0 205 */ srax %g5,32,%g5
+ .L900000505:
+/* 0x00f4 203 */ ld [%o3],%o2
+/* 0x00f8 205 */ add %o5,1,%o5
+/* 0x00fc */ add %o3,4,%o3
+/* 0x0100 */ cmp %o5,%g4
+/* 0x0104 */ add %o4,4,%o4
+/* 0x0108 201 */ sub %g1,%o2,%g1
+/* 0x010c */ add %g1,%g5,%g5
+/* 0x0110 204 */ and %g5,%g3,%o2
+/* 0x0114 203 */ ld [%o4-4],%g1
+/* 0x0118 204 */ st %o2,[%o4-8]
+/* 0x011c 205 */ ble,pt %icc,.L900000505
+/* 0x0120 */ srax %g5,32,%g5
+ .L900000508:
+/* 0x0124 203 */ ld [%o3],%g2
+/* 0x0128 201 */ sub %g1,%g2,%g1
+/* 0x012c */ add %g1,%g5,%g1
+/* 0x0130 204 */ and %g1,%g3,%g2
+/* 0x0134 */ retl ! Result =
+/* 0x0138 */ st %g2,[%o4-4]
+ .L77000199:
+/* 0x013c 203 */ ld [%o4],%g1
+ .L900000509:
+/* 0x0140 203 */ ld [%o3],%g2
+/* 0x0144 */ add %g5,%g1,%g1
+/* 0x0148 205 */ add %o5,1,%o5
+/* 0x014c */ add %o3,4,%o3
+/* 0x0150 */ cmp %o5,%g4
+/* 0x0154 203 */ sub %g1,%g2,%g1
+/* 0x0158 204 */ and %g1,%g3,%g2
+/* 0x015c */ st %g2,[%o4]
+/* 0x0160 205 */ add %o4,4,%o4
+/* 0x0164 */ srax %g1,32,%g5
+/* 0x0168 */ ble,a,pt %icc,.L900000509
+/* 0x016c */ ld [%o4],%g1
+ .L77000191:
+/* 0x0170 */ retl ! Result =
+/* 0x0174 */ nop
+/* 0x0178 0 */ .type adjust_montf_result,2
+/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+/* 000000 */ .skip 16
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-144,%sp
+ .L900000646:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x000c 232 */ ld [%fp+92],%o1
+/* 0x0010 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ ldd [%fp+96],%f2
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 223 */ st %i0,[%fp+68]
+/* 0x0024 */ add %g5,%o7,%o3
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x002c 232 */ ld [%o3+%g2],%l0
+/* 0x0030 239 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 223 */ or %g0,%i2,%o2
+/* 0x0038 */ fmovd %f2,%f16
+/* 0x003c */ st %i5,[%fp+88]
+/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0044 223 */ or %g0,%i1,%i2
+/* 0x0048 232 */ ldd [%l0],%f0
+/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0050 223 */ or %g0,%i3,%o0
+/* 0x0054 232 */ sll %o1,4,%g4
+/* 0x0058 239 */ ld [%o3+%g2],%g5
+/* 0x005c 223 */ or %g0,%i3,%i1
+/* 0x0060 239 */ ld [%o3+%g3],%g1
+/* 0x0064 232 */ or %g0,%o1,%i0
+/* 0x0068 */ or %g0,%o2,%i3
+/* 0x006c 234 */ cmp %o1,16
+/* 0x0070 */ be,pn %icc,.L77000279
+/* 0x0074 */ std %f0,[%o2+%g4]
+/* 0x0078 236 */ sll %o1,2,%g2
+/* 0x007c */ or %g0,%o0,%o3
+/* 0x0080 232 */ sll %o1,1,%o1
+/* 0x0084 236 */ add %g2,2,%o2
+/* 0x0088 */ cmp %o2,0
+/* 0x008c */ ble,a,pt %icc,.L900000660
+/* 0x0090 */ ldd [%i2],%f0
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x0094 281 */ add %g2,2,%o0
+/* 0x0098 236 */ add %g2,1,%o2
+/* 0x009c 281 */ cmp %o0,3
+/* 0x00a0 */ bl,pn %icc,.L77000280
+/* 0x00a4 */ or %g0,1,%o0
+/* 0x00a8 */ add %o3,8,%o3
+/* 0x00ac */ or %g0,1,%o4
+/* 0x00b0 */ std %f0,[%o3-8]
+ .L900000630:
+/* 0x00b4 */ std %f0,[%o3]
+/* 0x00b8 */ add %o4,2,%o4
+/* 0x00bc */ add %o3,16,%o3
+/* 0x00c0 */ cmp %o4,%g2
+/* 0x00c4 */ ble,pt %icc,.L900000630
+/* 0x00c8 */ std %f0,[%o3-8]
+ .L900000633:
+/* 0x00cc */ cmp %o4,%o2
+/* 0x00d0 */ bg,pn %icc,.L77000285
+/* 0x00d4 */ add %o4,1,%o0
+ .L77000280:
+/* 0x00d8 */ std %f0,[%o3]
+ .L900000659:
+/* 0x00dc */ ldd [%l0],%f0
+/* 0x00e0 */ cmp %o0,%o2
+/* 0x00e4 */ add %o3,8,%o3
+/* 0x00e8 */ add %o0,1,%o0
+/* 0x00ec */ ble,a,pt %icc,.L900000659
+/* 0x00f0 */ std %f0,[%o3]
+ .L77000285:
+/* 0x00f4 238 */ ldd [%i2],%f0
+ .L900000660:
+/* 0x00f8 238 */ ldd [%i3],%f2
+/* 0x00fc */ add %o1,1,%o2
+/* 0x0100 242 */ cmp %o1,0
+/* 0x0104 */ sll %o2,1,%o0
+/* 0x0108 */ sub %o1,1,%o1
+/* 0x010c 238 */ fmuld %f0,%f2,%f0
+/* 0x0110 */ std %f0,[%i1]
+/* 0x0114 0 */ or %g0,0,%l1
+/* 0x0118 */ ldd [%l0],%f6
+/* 0x011c */ or %g0,0,%g4
+/* 0x0120 */ or %g0,%o2,%i5
+/* 0x0124 */ ldd [%g5],%f2
+/* 0x0128 */ or %g0,%o1,%g3
+/* 0x012c */ or %g0,%o0,%o3
+/* 0x0130 */ fdtox %f0,%f0
+/* 0x0134 */ ldd [%g1],%f4
+/* 0x0138 246 */ add %i3,8,%o4
+/* 0x013c */ or %g0,0,%l2
+/* 0x0140 */ or %g0,%i1,%o5
+/* 0x0144 */ sub %i0,1,%o7
+/* 0x0148 */ fmovs %f6,%f0
+/* 0x014c */ fxtod %f0,%f0
+/* 0x0150 239 */ fmuld %f0,%f16,%f0
+/* 0x0154 */ fmuld %f0,%f2,%f2
+/* 0x0158 */ fdtox %f2,%f2
+/* 0x015c */ fxtod %f2,%f2
+/* 0x0160 */ fmuld %f2,%f4,%f2
+/* 0x0164 */ fsubd %f0,%f2,%f22
+/* 0x0168 242 */ ble,pt %icc,.L900000653
+/* 0x016c */ sll %i0,4,%g2
+/* 0x0170 246 */ ldd [%i4],%f0
+ .L900000654:
+/* 0x0174 246 */ fmuld %f0,%f22,%f8
+/* 0x0178 */ ldd [%i2],%f0
+/* 0x017c 250 */ cmp %i0,1
+/* 0x0180 246 */ ldd [%o4+%l2],%f6
+/* 0x0184 */ add %i2,8,%o0
+/* 0x0188 250 */ or %g0,1,%o1
+/* 0x018c 246 */ ldd [%o5],%f2
+/* 0x0190 */ add %o5,16,%l3
+/* 0x0194 */ fmuld %f0,%f6,%f6
+/* 0x0198 */ ldd [%g5],%f4
+/* 0x019c */ faddd %f2,%f8,%f2
+/* 0x01a0 */ ldd [%o5+8],%f0
+/* 0x01a4 244 */ ldd [%i3+%l2],%f20
+/* 0x01a8 246 */ faddd %f0,%f6,%f0
+/* 0x01ac */ fmuld %f2,%f4,%f2
+/* 0x01b0 */ faddd %f0,%f2,%f18
+/* 0x01b4 247 */ std %f18,[%o5+8]
+/* 0x01b8 250 */ ble,pt %icc,.L900000658
+/* 0x01bc */ srl %g4,31,%g2
+/* 0x01c0 */ cmp %o7,7
+/* 0x01c4 246 */ add %i4,8,%g2
+/* 0x01c8 250 */ bl,pn %icc,.L77000284
+/* 0x01cc */ add %g2,24,%o2
+/* 0x01d0 252 */ ldd [%o0+24],%f12
+/* 0x01d4 */ add %o5,48,%l3
+/* 0x01d8 */ ldd [%o0],%f2
+/* 0x01dc 0 */ or %g0,%o2,%g2
+/* 0x01e0 250 */ sub %o7,2,%o2
+/* 0x01e4 252 */ ldd [%g2-24],%f0
+/* 0x01e8 */ or %g0,5,%o1
+/* 0x01ec */ ldd [%o0+8],%f6
+/* 0x01f0 */ fmuld %f2,%f20,%f2
+/* 0x01f4 */ ldd [%o0+16],%f14
+/* 0x01f8 */ fmuld %f0,%f22,%f4
+/* 0x01fc */ add %o0,32,%o0
+/* 0x0200 */ ldd [%g2-16],%f8
+/* 0x0204 */ fmuld %f6,%f20,%f10
+/* 0x0208 */ ldd [%o5+16],%f0
+/* 0x020c */ ldd [%g2-8],%f6
+/* 0x0210 */ faddd %f2,%f4,%f4
+/* 0x0214 */ ldd [%o5+32],%f2
+ .L900000642:
+/* 0x0218 252 */ ldd [%g2],%f24
+/* 0x021c */ add %o1,3,%o1
+/* 0x0220 */ add %g2,24,%g2
+/* 0x0224 */ fmuld %f8,%f22,%f8
+/* 0x0228 */ ldd [%l3],%f28
+/* 0x022c */ cmp %o1,%o2
+/* 0x0230 */ add %o0,24,%o0
+/* 0x0234 */ ldd [%o0-24],%f26
+/* 0x0238 */ faddd %f0,%f4,%f0
+/* 0x023c */ add %l3,48,%l3
+/* 0x0240 */ faddd %f10,%f8,%f10
+/* 0x0244 */ fmuld %f14,%f20,%f4
+/* 0x0248 */ std %f0,[%l3-80]
+/* 0x024c */ ldd [%g2-16],%f8
+/* 0x0250 */ fmuld %f6,%f22,%f6
+/* 0x0254 */ ldd [%l3-32],%f0
+/* 0x0258 */ ldd [%o0-16],%f14
+/* 0x025c */ faddd %f2,%f10,%f2
+/* 0x0260 */ faddd %f4,%f6,%f10
+/* 0x0264 */ fmuld %f12,%f20,%f4
+/* 0x0268 */ std %f2,[%l3-64]
+/* 0x026c */ ldd [%g2-8],%f6
+/* 0x0270 */ fmuld %f24,%f22,%f24
+/* 0x0274 */ ldd [%l3-16],%f2
+/* 0x0278 */ ldd [%o0-8],%f12
+/* 0x027c */ faddd %f28,%f10,%f10
+/* 0x0280 */ std %f10,[%l3-48]
+/* 0x0284 */ fmuld %f26,%f20,%f10
+/* 0x0288 */ ble,pt %icc,.L900000642
+/* 0x028c */ faddd %f4,%f24,%f4
+ .L900000645:
+/* 0x0290 252 */ fmuld %f8,%f22,%f28
+/* 0x0294 */ ldd [%g2],%f24
+/* 0x0298 */ faddd %f0,%f4,%f26
+/* 0x029c */ fmuld %f12,%f20,%f8
+/* 0x02a0 */ add %l3,32,%l3
+/* 0x02a4 */ cmp %o1,%o7
+/* 0x02a8 */ fmuld %f14,%f20,%f14
+/* 0x02ac */ ldd [%l3-32],%f4
+/* 0x02b0 */ add %g2,8,%g2
+/* 0x02b4 */ faddd %f10,%f28,%f12
+/* 0x02b8 */ fmuld %f6,%f22,%f6
+/* 0x02bc */ ldd [%l3-16],%f0
+/* 0x02c0 */ fmuld %f24,%f22,%f10
+/* 0x02c4 */ std %f26,[%l3-64]
+/* 0x02c8 */ faddd %f2,%f12,%f2
+/* 0x02cc */ std %f2,[%l3-48]
+/* 0x02d0 */ faddd %f14,%f6,%f6
+/* 0x02d4 */ faddd %f8,%f10,%f2
+/* 0x02d8 */ faddd %f4,%f6,%f4
+/* 0x02dc */ std %f4,[%l3-32]
+/* 0x02e0 */ faddd %f0,%f2,%f0
+/* 0x02e4 */ bg,pn %icc,.L77000213
+/* 0x02e8 */ std %f0,[%l3-16]
+ .L77000284:
+/* 0x02ec 252 */ ldd [%o0],%f0
+ .L900000657:
+/* 0x02f0 252 */ ldd [%g2],%f4
+/* 0x02f4 */ fmuld %f0,%f20,%f2
+/* 0x02f8 */ add %o1,1,%o1
+/* 0x02fc */ ldd [%l3],%f0
+/* 0x0300 */ add %o0,8,%o0
+/* 0x0304 */ add %g2,8,%g2
+/* 0x0308 */ fmuld %f4,%f22,%f4
+/* 0x030c */ cmp %o1,%o7
+/* 0x0310 */ faddd %f2,%f4,%f2
+/* 0x0314 */ faddd %f0,%f2,%f0
+/* 0x0318 */ std %f0,[%l3]
+/* 0x031c */ add %l3,16,%l3
+/* 0x0320 */ ble,a,pt %icc,.L900000657
+/* 0x0324 */ ldd [%o0],%f0
+ .L77000213:
+/* 0x0328 */ srl %g4,31,%g2
+ .L900000658:
+/* 0x032c 254 */ cmp %l1,30
+/* 0x0330 */ bne,a,pt %icc,.L900000656
+/* 0x0334 */ fdtox %f18,%f0
+/* 0x0338 */ add %g4,%g2,%g2
+/* 0x033c */ sra %g2,1,%o0
+/* 0x0340 281 */ ldd [%l0],%f0
+/* 0x0344 */ sll %i5,1,%o2
+/* 0x0348 */ add %o0,1,%g2
+/* 0x034c */ sll %g2,1,%o0
+/* 0x0350 254 */ sub %o2,1,%o2
+/* 0x0354 281 */ fmovd %f0,%f2
+/* 0x0358 */ sll %g2,4,%o1
+/* 0x035c */ cmp %o0,%o3
+/* 0x0360 */ bge,pt %icc,.L77000215
+/* 0x0364 */ or %g0,0,%l1
+/* 0x0368 254 */ add %i1,%o1,%o1
+/* 0x036c 281 */ ldd [%o1],%f6
+ .L900000655:
+/* 0x0370 */ fdtox %f6,%f10
+/* 0x0374 */ ldd [%o1+8],%f4
+/* 0x0378 */ add %o0,2,%o0
+/* 0x037c */ ldd [%l0],%f12
+/* 0x0380 */ fdtox %f6,%f6
+/* 0x0384 */ cmp %o0,%o2
+/* 0x0388 */ fdtox %f4,%f8
+/* 0x038c */ fdtox %f4,%f4
+/* 0x0390 */ fmovs %f12,%f10
+/* 0x0394 */ fmovs %f12,%f8
+/* 0x0398 */ fxtod %f10,%f10
+/* 0x039c */ fxtod %f8,%f8
+/* 0x03a0 */ faddd %f10,%f2,%f2
+/* 0x03a4 */ std %f2,[%o1]
+/* 0x03a8 */ faddd %f8,%f0,%f0
+/* 0x03ac */ std %f0,[%o1+8]
+/* 0x03b0 */ add %o1,16,%o1
+/* 0x03b4 */ fitod %f6,%f2
+/* 0x03b8 */ fitod %f4,%f0
+/* 0x03bc */ ble,a,pt %icc,.L900000655
+/* 0x03c0 */ ldd [%o1],%f6
+ .L77000233:
+/* 0x03c4 */ or %g0,0,%l1
+ .L77000215:
+/* 0x03c8 */ fdtox %f18,%f0
+ .L900000656:
+/* 0x03cc */ ldd [%l0],%f6
+/* 0x03d0 256 */ add %g4,1,%g4
+/* 0x03d4 */ add %l2,8,%l2
+/* 0x03d8 */ ldd [%g5],%f2
+/* 0x03dc */ add %l1,1,%l1
+/* 0x03e0 */ add %o5,8,%o5
+/* 0x03e4 */ fmovs %f6,%f0
+/* 0x03e8 */ ldd [%g1],%f4
+/* 0x03ec */ cmp %g4,%g3
+/* 0x03f0 */ fxtod %f0,%f0
+/* 0x03f4 */ fmuld %f0,%f16,%f0
+/* 0x03f8 */ fmuld %f0,%f2,%f2
+/* 0x03fc */ fdtox %f2,%f2
+/* 0x0400 */ fxtod %f2,%f2
+/* 0x0404 */ fmuld %f2,%f4,%f2
+/* 0x0408 */ fsubd %f0,%f2,%f22
+/* 0x040c */ ble,a,pt %icc,.L900000654
+/* 0x0410 */ ldd [%i4],%f0
+ .L900000629:
+/* 0x0414 256 */ ba .L900000653
+/* 0x0418 */ sll %i0,4,%g2
+ .L77000279:
+/* 0x041c 261 */ ldd [%o2],%f6
+/* 0x0420 279 */ or %g0,%o0,%o4
+/* 0x0424 281 */ or %g0,0,%o3
+/* 0x0428 261 */ ldd [%i2],%f4
+/* 0x042c 273 */ std %f0,[%o0+8]
+/* 0x0430 */ std %f0,[%o0+16]
+/* 0x0434 261 */ fmuld %f4,%f6,%f4
+/* 0x0438 */ std %f4,[%o0]
+/* 0x043c 273 */ std %f0,[%o0+24]
+/* 0x0440 */ std %f0,[%o0+32]
+/* 0x0444 */ fdtox %f4,%f4
+/* 0x0448 */ std %f0,[%o0+40]
+/* 0x044c */ std %f0,[%o0+48]
+/* 0x0450 */ std %f0,[%o0+56]
+/* 0x0454 */ std %f0,[%o0+64]
+/* 0x0458 */ std %f0,[%o0+72]
+/* 0x045c */ std %f0,[%o0+80]
+/* 0x0460 */ std %f0,[%o0+88]
+/* 0x0464 */ std %f0,[%o0+96]
+/* 0x0468 */ std %f0,[%o0+104]
+/* 0x046c */ std %f0,[%o0+112]
+/* 0x0470 */ std %f0,[%o0+120]
+/* 0x0474 */ std %f0,[%o0+128]
+/* 0x0478 */ std %f0,[%o0+136]
+/* 0x047c */ std %f0,[%o0+144]
+/* 0x0480 */ std %f0,[%o0+152]
+/* 0x0484 */ std %f0,[%o0+160]
+/* 0x0488 */ std %f0,[%o0+168]
+/* 0x048c */ fmovs %f0,%f4
+/* 0x0490 */ std %f0,[%o0+176]
+/* 0x0494 281 */ or %g0,0,%o1
+/* 0x0498 273 */ std %f0,[%o0+184]
+/* 0x049c */ fxtod %f4,%f4
+/* 0x04a0 */ std %f0,[%o0+192]
+/* 0x04a4 */ std %f0,[%o0+200]
+/* 0x04a8 */ std %f0,[%o0+208]
+/* 0x04ac 278 */ fmuld %f4,%f2,%f2
+/* 0x04b0 273 */ std %f0,[%o0+216]
+/* 0x04b4 */ std %f0,[%o0+224]
+/* 0x04b8 */ std %f0,[%o0+232]
+/* 0x04bc */ std %f0,[%o0+240]
+/* 0x04c0 */ std %f0,[%o0+248]
+/* 0x04c4 */ std %f0,[%o0+256]
+/* 0x04c8 */ std %f0,[%o0+264]
+/* 0x04cc */ std %f0,[%o0+272]
+/* 0x04d0 */ std %f0,[%o0+280]
+/* 0x04d4 */ std %f0,[%o0+288]
+/* 0x04d8 */ std %f0,[%o0+296]
+/* 0x04dc */ std %f0,[%o0+304]
+/* 0x04e0 */ std %f0,[%o0+312]
+/* 0x04e4 */ std %f0,[%o0+320]
+/* 0x04e8 */ std %f0,[%o0+328]
+/* 0x04ec */ std %f0,[%o0+336]
+/* 0x04f0 */ std %f0,[%o0+344]
+/* 0x04f4 */ std %f0,[%o0+352]
+/* 0x04f8 */ std %f0,[%o0+360]
+/* 0x04fc */ std %f0,[%o0+368]
+/* 0x0500 */ std %f0,[%o0+376]
+/* 0x0504 */ std %f0,[%o0+384]
+/* 0x0508 */ std %f0,[%o0+392]
+/* 0x050c */ std %f0,[%o0+400]
+/* 0x0510 */ std %f0,[%o0+408]
+/* 0x0514 */ std %f0,[%o0+416]
+/* 0x0518 */ std %f0,[%o0+424]
+/* 0x051c */ std %f0,[%o0+432]
+/* 0x0520 */ std %f0,[%o0+440]
+/* 0x0524 */ std %f0,[%o0+448]
+/* 0x0528 */ std %f0,[%o0+456]
+/* 0x052c */ std %f0,[%o0+464]
+/* 0x0530 */ std %f0,[%o0+472]
+/* 0x0534 */ std %f0,[%o0+480]
+/* 0x0538 */ std %f0,[%o0+488]
+/* 0x053c */ std %f0,[%o0+496]
+/* 0x0540 */ std %f0,[%o0+504]
+/* 0x0544 */ std %f0,[%o0+512]
+/* 0x0548 */ std %f0,[%o0+520]
+/* 0x054c */ ldd [%g5],%f0
+/* 0x0550 */ ldd [%g1],%f8
+/* 0x0554 */ fmuld %f2,%f0,%f6
+/* 0x0558 275 */ ldd [%i4],%f4
+/* 0x055c 276 */ ldd [%i2],%f0
+/* 0x0560 */ fdtox %f6,%f6
+/* 0x0564 */ fxtod %f6,%f6
+/* 0x0568 */ fmuld %f6,%f8,%f6
+/* 0x056c */ fsubd %f2,%f6,%f2
+/* 0x0570 286 */ fmuld %f4,%f2,%f12
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f16,%f18 ! hand modified
+ ldd [%i4],%f2
+ ldd [%o4],%f8
+ ldd [%i2],%f10
+ ldd [%g5],%f14 ! hand modified
+ ldd [%g1],%f16 ! hand modified
+ ldd [%i3],%f24
+
+ ldd [%i2+8],%f26
+ ldd [%i2+16],%f40
+ ldd [%i2+48],%f46
+ ldd [%i2+56],%f30
+ ldd [%i2+64],%f54
+ ldd [%i2+104],%f34
+ ldd [%i2+112],%f58
+
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+ ldd [%i4+112],%f60
+
+ .L99999999: !1
+ ldd [%i2+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i2+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i3+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i3,8,%i3
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i2+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o4+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o4+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o4+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o4+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o4+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i2+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o4+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o4+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o4+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o4+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o4+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i2+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o4+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%i4+120],%f44 !24
+ ldd [%o4+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i2+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o4+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o4+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o4+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o4+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o4+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o4+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o4+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o4+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i2+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i2+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o4+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o4+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o4+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o4,8,%o4
+ faddd %f42,%f44,%f42 !47
+ ldd [%o4+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o4+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o4+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o4+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o4+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %o3,1,%o3
+ std %f42,[%o4+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %o3,31
+ std %f48,[%o4+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o4+224-8] !55
+ std %f8,[%o4]
+
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x07c8 315 */ sll %i0,4,%g2
+ .L900000653:
+/* 0x07cc 315 */ add %i1,%g2,%i1
+/* 0x07d0 242 */ ld [%fp+68],%o0
+/* 0x07d4 315 */ or %g0,0,%o4
+/* 0x07d8 */ ldd [%i1],%f0
+/* 0x07dc */ or %g0,0,%g5
+/* 0x07e0 */ cmp %i0,0
+/* 0x07e4 242 */ or %g0,%o0,%o3
+/* 0x07e8 311 */ sub %i0,1,%g1
+/* 0x07ec 315 */ fdtox %f0,%f0
+/* 0x07f0 */ std %f0,[%sp+120]
+/* 0x07f4 311 */ sethi %hi(0xfc00),%o1
+/* 0x07f8 */ add %g1,1,%g3
+/* 0x07fc */ or %g0,%o0,%g4
+/* 0x0800 315 */ ldd [%i1+8],%f0
+/* 0x0804 */ add %o1,1023,%o1
+/* 0x0808 */ fdtox %f0,%f0
+/* 0x080c */ std %f0,[%sp+112]
+/* 0x0810 */ ldx [%sp+112],%o5
+/* 0x0814 */ ldx [%sp+120],%o7
+/* 0x0818 */ ble,pt %icc,.L900000651
+/* 0x081c */ sethi %hi(0xfc00),%g2
+/* 0x0820 311 */ or %g0,-1,%g2
+/* 0x0824 315 */ cmp %g3,3
+/* 0x0828 311 */ srl %g2,0,%o2
+/* 0x082c 315 */ bl,pn %icc,.L77000287
+/* 0x0830 */ or %g0,%i1,%g2
+/* 0x0834 */ ldd [%i1+16],%f0
+/* 0x0838 */ and %o5,%o1,%o0
+/* 0x083c */ add %i1,16,%g2
+/* 0x0840 */ sllx %o0,16,%g3
+/* 0x0844 */ and %o7,%o2,%o0
+/* 0x0848 */ fdtox %f0,%f0
+/* 0x084c */ std %f0,[%sp+104]
+/* 0x0850 */ add %o0,%g3,%o4
+/* 0x0854 */ ldd [%i1+24],%f2
+/* 0x0858 */ srax %o5,16,%o0
+/* 0x085c */ add %o3,4,%g4
+/* 0x0860 */ stx %o0,[%sp+128]
+/* 0x0864 */ and %o4,%o2,%o0
+/* 0x0868 */ stx %o0,[%sp+112]
+/* 0x086c */ srax %o4,32,%o0
+/* 0x0870 */ fdtox %f2,%f0
+/* 0x0874 */ stx %o0,[%sp+136]
+/* 0x0878 */ srax %o7,32,%o4
+/* 0x087c */ std %f0,[%sp+96]
+/* 0x0880 */ ldx [%sp+128],%g5
+/* 0x0884 */ ldx [%sp+136],%o7
+/* 0x0888 */ ldx [%sp+104],%g3
+/* 0x088c */ add %g5,%o7,%o0
+/* 0x0890 */ or %g0,1,%g5
+/* 0x0894 */ ldx [%sp+112],%o7
+/* 0x0898 */ add %o4,%o0,%o4
+/* 0x089c */ ldx [%sp+96],%o5
+/* 0x08a0 */ st %o7,[%o3]
+/* 0x08a4 */ or %g0,%g3,%o7
+ .L900000634:
+/* 0x08a8 */ ldd [%g2+16],%f0
+/* 0x08ac */ add %g5,1,%g5
+/* 0x08b0 */ add %g4,4,%g4
+/* 0x08b4 */ cmp %g5,%g1
+/* 0x08b8 */ add %g2,16,%g2
+/* 0x08bc */ fdtox %f0,%f0
+/* 0x08c0 */ std %f0,[%sp+104]
+/* 0x08c4 */ ldd [%g2+8],%f0
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+96]
+/* 0x08d0 */ and %o5,%o1,%g3
+/* 0x08d4 */ sllx %g3,16,%g3
+/* 0x08d8 */ stx %g3,[%sp+120]
+/* 0x08dc */ and %o7,%o2,%g3
+/* 0x08e0 */ stx %o7,[%sp+128]
+/* 0x08e4 */ ldx [%sp+120],%o7
+/* 0x08e8 */ add %g3,%o7,%g3
+/* 0x08ec */ ldx [%sp+128],%o7
+/* 0x08f0 */ srax %o5,16,%o5
+/* 0x08f4 */ add %g3,%o4,%g3
+/* 0x08f8 */ srax %g3,32,%o4
+/* 0x08fc */ stx %o4,[%sp+112]
+/* 0x0900 */ srax %o7,32,%o4
+/* 0x0904 */ ldx [%sp+112],%o7
+/* 0x0908 */ add %o5,%o7,%o7
+/* 0x090c */ ldx [%sp+96],%o5
+/* 0x0910 */ add %o4,%o7,%o4
+/* 0x0914 */ and %g3,%o2,%g3
+/* 0x0918 */ ldx [%sp+104],%o7
+/* 0x091c */ ble,pt %icc,.L900000634
+/* 0x0920 */ st %g3,[%g4-4]
+ .L900000637:
+/* 0x0924 */ ba .L900000651
+/* 0x0928 */ sethi %hi(0xfc00),%g2
+ .L77000287:
+/* 0x092c */ ldd [%g2+16],%f0
+ .L900000650:
+/* 0x0930 */ and %o7,%o2,%o0
+/* 0x0934 */ and %o5,%o1,%g3
+/* 0x0938 */ fdtox %f0,%f0
+/* 0x093c */ add %o4,%o0,%o0
+/* 0x0940 */ std %f0,[%sp+104]
+/* 0x0944 */ add %g5,1,%g5
+/* 0x0948 */ sllx %g3,16,%o4
+/* 0x094c */ ldd [%g2+24],%f2
+/* 0x0950 */ add %g2,16,%g2
+/* 0x0954 */ add %o0,%o4,%o4
+/* 0x0958 */ cmp %g5,%g1
+/* 0x095c */ srax %o5,16,%o0
+/* 0x0960 */ stx %o0,[%sp+112]
+/* 0x0964 */ and %o4,%o2,%g3
+/* 0x0968 */ srax %o4,32,%o5
+/* 0x096c */ fdtox %f2,%f0
+/* 0x0970 */ std %f0,[%sp+96]
+/* 0x0974 */ srax %o7,32,%o4
+/* 0x0978 */ ldx [%sp+112],%o7
+/* 0x097c */ add %o7,%o5,%o7
+/* 0x0980 */ ldx [%sp+104],%o5
+/* 0x0984 */ add %o4,%o7,%o4
+/* 0x0988 */ ldx [%sp+96],%o0
+/* 0x098c */ st %g3,[%g4]
+/* 0x0990 */ or %g0,%o5,%o7
+/* 0x0994 */ add %g4,4,%g4
+/* 0x0998 */ or %g0,%o0,%o5
+/* 0x099c */ ble,a,pt %icc,.L900000650
+/* 0x09a0 */ ldd [%g2+16],%f0
+ .L77000236:
+/* 0x09a4 */ sethi %hi(0xfc00),%g2
+ .L900000651:
+/* 0x09a8 */ or %g0,-1,%o0
+/* 0x09ac */ add %g2,1023,%g2
+/* 0x09b0 */ ld [%fp+88],%o1
+/* 0x09b4 */ srl %o0,0,%g3
+/* 0x09b8 */ and %o5,%g2,%g2
+/* 0x09bc */ and %o7,%g3,%g4
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x09c0 317 */ or %g0,-1,%o5
+/* 0x09c4 311 */ sllx %g2,16,%g2
+/* 0x09c8 */ add %o4,%g4,%g4
+/* 0x09cc */ add %g4,%g2,%g2
+/* 0x09d0 */ sll %g5,2,%g4
+/* 0x09d4 */ and %g2,%g3,%g2
+/* 0x09d8 */ st %g2,[%o3+%g4]
+/* 0x09dc 317 */ sll %i0,2,%g2
+/* 0x09e0 */ ld [%o3+%g2],%g2
+/* 0x09e4 */ cmp %g2,0
+/* 0x09e8 */ bleu,pn %icc,.L77000241
+/* 0x09ec */ or %g0,%o1,%o2
+/* 0x09f0 */ ba .L900000649
+/* 0x09f4 */ cmp %o5,0
+ .L77000241:
+/* 0x09f8 */ sub %i0,1,%o5
+/* 0x09fc */ sll %o5,2,%g2
+/* 0x0a00 */ cmp %o5,0
+/* 0x0a04 */ bl,pt %icc,.L900000649
+/* 0x0a08 */ cmp %o5,0
+/* 0x0a0c */ add %o1,%g2,%o1
+/* 0x0a10 */ add %o3,%g2,%o4
+/* 0x0a14 */ ld [%o1],%g2
+ .L900000648:
+/* 0x0a18 */ ld [%o4],%g3
+/* 0x0a1c */ sub %o5,1,%o0
+/* 0x0a20 */ sub %o1,4,%o1
+/* 0x0a24 */ sub %o4,4,%o4
+/* 0x0a28 */ cmp %g3,%g2
+/* 0x0a2c */ bne,pn %icc,.L77000244
+/* 0x0a30 */ nop
+/* 0x0a34 0 */ or %g0,%o0,%o5
+/* 0x0a38 317 */ cmp %o0,0
+/* 0x0a3c */ bge,a,pt %icc,.L900000648
+/* 0x0a40 */ ld [%o1],%g2
+ .L77000244:
+/* 0x0a44 */ cmp %o5,0
+ .L900000649:
+/* 0x0a48 */ bl,pn %icc,.L77000288
+/* 0x0a4c */ sll %o5,2,%g2
+/* 0x0a50 */ ld [%o2+%g2],%g3
+/* 0x0a54 */ ld [%o3+%g2],%g2
+/* 0x0a58 */ cmp %g2,%g3
+/* 0x0a5c */ bleu,pt %icc,.L77000224
+/* 0x0a60 */ nop
+ .L77000288:
+/* 0x0a64 */ cmp %i0,0
+/* 0x0a68 */ ble,pt %icc,.L77000224
+/* 0x0a6c */ nop
+/* 0x0a70 317 */ sub %i0,1,%o7
+/* 0x0a74 */ or %g0,-1,%g2
+/* 0x0a78 */ srl %g2,0,%o4
+/* 0x0a7c */ add %o7,1,%o0
+/* 0x0a80 315 */ or %g0,0,%o5
+/* 0x0a84 */ or %g0,0,%g1
+/* 0x0a88 */ cmp %o0,3
+/* 0x0a8c */ bl,pn %icc,.L77000289
+/* 0x0a90 */ add %o3,8,%o1
+/* 0x0a94 */ add %o2,4,%o0
+/* 0x0a98 */ ld [%o1-8],%g2
+/* 0x0a9c 0 */ or %g0,%o1,%o3
+/* 0x0aa0 315 */ ld [%o0-4],%g3
+/* 0x0aa4 0 */ or %g0,%o0,%o2
+/* 0x0aa8 315 */ or %g0,2,%g1
+/* 0x0aac */ ld [%o3-4],%o0
+/* 0x0ab0 */ sub %g2,%g3,%g2
+/* 0x0ab4 */ or %g0,%g2,%o5
+/* 0x0ab8 */ and %g2,%o4,%g2
+/* 0x0abc */ st %g2,[%o3-8]
+/* 0x0ac0 */ srax %o5,32,%o5
+ .L900000638:
+/* 0x0ac4 */ ld [%o2],%g2
+/* 0x0ac8 */ add %g1,1,%g1
+/* 0x0acc */ add %o2,4,%o2
+/* 0x0ad0 */ cmp %g1,%o7
+/* 0x0ad4 */ add %o3,4,%o3
+/* 0x0ad8 */ sub %o0,%g2,%o0
+/* 0x0adc */ add %o0,%o5,%o5
+/* 0x0ae0 */ and %o5,%o4,%g2
+/* 0x0ae4 */ ld [%o3-4],%o0
+/* 0x0ae8 */ st %g2,[%o3-8]
+/* 0x0aec */ ble,pt %icc,.L900000638
+/* 0x0af0 */ srax %o5,32,%o5
+ .L900000641:
+/* 0x0af4 */ ld [%o2],%o1
+/* 0x0af8 */ sub %o0,%o1,%o0
+/* 0x0afc */ add %o0,%o5,%o0
+/* 0x0b00 */ and %o0,%o4,%o1
+/* 0x0b04 */ st %o1,[%o3-4]
+/* 0x0b08 */ ret ! Result =
+/* 0x0b0c */ restore %g0,%g0,%g0
+ .L77000289:
+/* 0x0b10 */ ld [%o3],%o0
+ .L900000647:
+/* 0x0b14 */ ld [%o2],%o1
+/* 0x0b18 */ add %o5,%o0,%o0
+/* 0x0b1c */ add %g1,1,%g1
+/* 0x0b20 */ add %o2,4,%o2
+/* 0x0b24 */ cmp %g1,%o7
+/* 0x0b28 */ sub %o0,%o1,%o0
+/* 0x0b2c */ and %o0,%o4,%o1
+/* 0x0b30 */ st %o1,[%o3]
+/* 0x0b34 */ add %o3,4,%o3
+/* 0x0b38 */ srax %o0,32,%o5
+/* 0x0b3c */ ble,a,pt %icc,.L900000647
+/* 0x0b40 */ ld [%o3],%o0
+ .L77000224:
+/* 0x0b44 */ ret ! Result =
+/* 0x0b48 */ restore %g0,%g0,%g0
+/* 0x0b4c 0 */ .type mont_mulf_noconv,2
+/* 0x0b4c */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.il b/security/nss/lib/freebl/mpi/montmulfv9.il
new file mode 100644
index 0000000000..006f47431c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.il
@@ -0,0 +1,93 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ fdtox %f0,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ fdtox %f0,%f10
+ fmovs %f2,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ fmuld %f0,%f2,%f2
+ fdtox %f2,%f2
+ fxtod %f2,%f2
+ fmuld %f2,%f4,%f2
+ fsubd %f0,%f2,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.s b/security/nss/lib/freebl/mpi/montmulfv9.s
new file mode 100644
index 0000000000..560e47f7bc
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.s
@@ -0,0 +1,2346 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .register %g3,#scratch
+/* 000000 */ .register %g2,#scratch
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-208,%sp
+! FILE montmulf.c
+
+! 1 !/*
+! 2 ! * The contents of this file are subject to the Mozilla Public
+! 3 ! * License Version 1.1 (the "License"); you may not use this file
+! 4 ! * except in compliance with the License. You may obtain a copy of
+! 5 ! * the License at http://www.mozilla.org/MPL/
+! 6 ! *
+! 7 ! * Software distributed under the License is distributed on an "AS
+! 8 ! * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+! 9 ! * implied. See the License for the specific language governing
+! 10 ! * rights and limitations under the License.
+! 11 ! *
+! 12 ! * The Original Code is SPARC optimized Montgomery multiply functions.
+! 13 ! *
+! 14 ! * The Initial Developer of the Original Code is Sun Microsystems Inc.
+! 15 ! * Portions created by Sun Microsystems Inc. are
+! 16 ! * Copyright (C) 1999-2000 Sun Microsystems Inc. All Rights Reserved.
+! 17 ! *
+! 18 ! * Contributor(s):
+! 19 ! * Netscape Communications Corporation
+! 20 ! *
+! 21 ! * Alternatively, the contents of this file may be used under the
+! 22 ! * terms of the GNU General Public License Version 2 or later (the
+! 23 ! * "GPL"), in which case the provisions of the GPL are applicable
+! 24 ! * instead of those above. If you wish to allow use of your
+! 25 ! * version of this file only under the terms of the GPL and not to
+! 26 ! * allow others to use your version of this file under the MPL,
+! 27 ! * indicate your decision by deleting the provisions above and
+! 28 ! * replace them with the notice and other provisions required by
+! 29 ! * the GPL. If you do not delete the provisions above, a recipient
+! 30 ! * may use your version of this file under either the MPL or the
+! 31 ! * GPL.
+! 34 ! */
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f2
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 117 */ sub %i3,1,%o1
+/* 0x000c 110 */ or %g0,%i0,%g1
+/* 0x0010 116 */ ldd [%i1+8],%f4
+/* 0x0014 117 */ cmp %o1,0
+/* 0x0018 114 */ or %g0,0,%g5
+/* 0x001c 115 */ fdtox %f2,%f2
+/* 0x0020 */ std %f2,[%sp+2247]
+/* 0x0024 117 */ or %g0,0,%o0
+/* 0x0028 116 */ fdtox %f4,%f2
+/* 0x002c */ std %f2,[%sp+2239]
+/* 0x0030 110 */ sub %o1,1,%o7
+/* 0x0034 */ or %g0,%i1,%o4
+/* 0x0038 */ sethi %hi(0xfc00),%o3
+/* 0x003c */ or %g0,-1,%o1
+/* 0x0040 */ or %g0,2,%i1
+/* 0x0044 */ srl %o1,0,%g3
+/* 0x0048 */ or %g0,%o4,%g4
+/* 0x004c 116 */ ldx [%sp+2239],%i2
+/* 0x0050 */ add %o3,1023,%o5
+/* 0x0054 117 */ sub %o7,1,%o2
+/* 0x0058 115 */ ldx [%sp+2247],%i3
+/* 0x005c 117 */ ble,pt %icc,.L900000113
+/* 0x0060 */ sethi %hi(0xfc00),%g2
+/* 0x0064 */ add %o7,1,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+
+/* 0x0068 123 */ and %i2,%o5,%i4
+/* 0x006c */ sllx %i4,16,%o1
+/* 0x0070 117 */ cmp %g2,6
+/* 0x0074 */ bl,pn %icc,.L77000134
+/* 0x0078 */ or %g0,3,%i0
+/* 0x007c 119 */ ldd [%o4+16],%f0
+/* 0x0080 120 */ and %i3,%g3,%o3
+
+! 124 ! t+=(b>>16)+(t1>>32);
+
+/* 0x0084 124 */ srax %i2,16,%i5
+/* 0x0088 117 */ add %o3,%o1,%i4
+/* 0x008c 121 */ srax %i3,32,%i3
+/* 0x0090 119 */ fdtox %f0,%f0
+/* 0x0094 */ std %f0,[%sp+2231]
+
+! 125 ! i32[i]=t1&0xffffffff;
+
+/* 0x0098 125 */ and %i4,%g3,%l0
+/* 0x009c 117 */ or %g0,72,%o3
+/* 0x00a0 122 */ ldd [%g4+24],%f0
+/* 0x00a4 117 */ or %g0,64,%o4
+/* 0x00a8 */ or %g0,4,%o1
+
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x00ac 128 */ or %g0,5,%i0
+/* 0x00b0 */ or %g0,4,%i1
+/* 0x00b4 119 */ ldx [%sp+2231],%g2
+/* 0x00b8 122 */ fdtox %f0,%f0
+/* 0x00bc 128 */ or %g0,4,%o0
+/* 0x00c0 122 */ std %f0,[%sp+2223]
+/* 0x00c4 */ ldd [%g4+40],%f2
+/* 0x00c8 120 */ and %g2,%g3,%i2
+/* 0x00cc 119 */ ldd [%g4+32],%f0
+/* 0x00d0 121 */ srax %g2,32,%g2
+/* 0x00d4 122 */ ldd [%g4+56],%f4
+/* 0x00d8 */ fdtox %f2,%f2
+/* 0x00dc */ ldx [%sp+2223],%g5
+/* 0x00e0 119 */ fdtox %f0,%f0
+/* 0x00e4 125 */ st %l0,[%g1]
+/* 0x00e8 124 */ srax %i4,32,%l0
+/* 0x00ec 122 */ fdtox %f4,%f4
+/* 0x00f0 */ std %f2,[%sp+2223]
+/* 0x00f4 123 */ and %g5,%o5,%i4
+/* 0x00f8 124 */ add %i5,%l0,%i5
+/* 0x00fc 119 */ std %f0,[%sp+2231]
+/* 0x0100 123 */ sllx %i4,16,%i4
+/* 0x0104 124 */ add %i3,%i5,%i3
+/* 0x0108 119 */ ldd [%g4+48],%f2
+/* 0x010c 124 */ srax %g5,16,%g5
+/* 0x0110 117 */ add %i2,%i4,%i2
+/* 0x0114 122 */ ldd [%g4+72],%f0
+/* 0x0118 117 */ add %i2,%i3,%i4
+/* 0x011c 124 */ srax %i4,32,%i5
+/* 0x0120 119 */ fdtox %f2,%f2
+/* 0x0124 125 */ and %i4,%g3,%i4
+/* 0x0128 122 */ ldx [%sp+2223],%i2
+/* 0x012c 124 */ add %g5,%i5,%g5
+/* 0x0130 119 */ ldx [%sp+2231],%i3
+/* 0x0134 124 */ add %g2,%g5,%g5
+/* 0x0138 119 */ std %f2,[%sp+2231]
+/* 0x013c 122 */ std %f4,[%sp+2223]
+/* 0x0140 119 */ ldd [%g4+64],%f2
+/* 0x0144 125 */ st %i4,[%g1+4]
+ .L900000108:
+/* 0x0148 122 */ ldx [%sp+2223],%i4
+/* 0x014c 128 */ add %o0,2,%o0
+/* 0x0150 */ add %i0,4,%i0
+/* 0x0154 119 */ ldx [%sp+2231],%l0
+/* 0x0158 117 */ add %o3,16,%o3
+/* 0x015c 123 */ and %i2,%o5,%g2
+/* 0x0160 */ sllx %g2,16,%i5
+/* 0x0164 120 */ and %i3,%g3,%g2
+/* 0x0168 122 */ ldd [%g4+%o3],%f4
+/* 0x016c */ fdtox %f0,%f0
+/* 0x0170 */ std %f0,[%sp+2223]
+/* 0x0174 124 */ srax %i2,16,%i2
+/* 0x0178 117 */ add %g2,%i5,%g2
+/* 0x017c 119 */ fdtox %f2,%f0
+/* 0x0180 117 */ add %o4,16,%o4
+/* 0x0184 119 */ std %f0,[%sp+2231]
+/* 0x0188 117 */ add %g2,%g5,%g2
+/* 0x018c 119 */ ldd [%g4+%o4],%f2
+/* 0x0190 124 */ srax %g2,32,%i5
+/* 0x0194 128 */ cmp %o0,%o2
+/* 0x0198 121 */ srax %i3,32,%g5
+/* 0x019c 124 */ add %i2,%i5,%i2
+/* 0x01a0 */ add %g5,%i2,%i5
+/* 0x01a4 117 */ add %o1,4,%o1
+/* 0x01a8 125 */ and %g2,%g3,%g2
+/* 0x01ac 127 */ or %g0,%l0,%g5
+/* 0x01b0 125 */ st %g2,[%g1+%o1]
+/* 0x01b4 128 */ add %i1,4,%i1
+/* 0x01b8 122 */ ldx [%sp+2223],%i2
+/* 0x01bc 119 */ ldx [%sp+2231],%i3
+/* 0x01c0 117 */ add %o3,16,%o3
+/* 0x01c4 123 */ and %i4,%o5,%g2
+/* 0x01c8 */ sllx %g2,16,%l0
+/* 0x01cc 120 */ and %g5,%g3,%g2
+/* 0x01d0 122 */ ldd [%g4+%o3],%f0
+/* 0x01d4 */ fdtox %f4,%f4
+/* 0x01d8 */ std %f4,[%sp+2223]
+/* 0x01dc 124 */ srax %i4,16,%i4
+/* 0x01e0 117 */ add %g2,%l0,%g2
+/* 0x01e4 119 */ fdtox %f2,%f2
+/* 0x01e8 117 */ add %o4,16,%o4
+/* 0x01ec 119 */ std %f2,[%sp+2231]
+/* 0x01f0 117 */ add %g2,%i5,%g2
+/* 0x01f4 119 */ ldd [%g4+%o4],%f2
+/* 0x01f8 124 */ srax %g2,32,%i5
+/* 0x01fc 121 */ srax %g5,32,%g5
+/* 0x0200 124 */ add %i4,%i5,%i4
+/* 0x0204 */ add %g5,%i4,%g5
+/* 0x0208 117 */ add %o1,4,%o1
+/* 0x020c 125 */ and %g2,%g3,%g2
+/* 0x0210 128 */ ble,pt %icc,.L900000108
+/* 0x0214 */ st %g2,[%g1+%o1]
+ .L900000111:
+/* 0x0218 122 */ ldx [%sp+2223],%o2
+/* 0x021c 123 */ and %i2,%o5,%i4
+/* 0x0220 120 */ and %i3,%g3,%g2
+/* 0x0224 123 */ sllx %i4,16,%i4
+/* 0x0228 119 */ ldx [%sp+2231],%i5
+/* 0x022c 128 */ cmp %o0,%o7
+/* 0x0230 124 */ srax %i2,16,%i2
+/* 0x0234 117 */ add %g2,%i4,%g2
+/* 0x0238 122 */ fdtox %f0,%f4
+/* 0x023c */ std %f4,[%sp+2223]
+/* 0x0240 117 */ add %g2,%g5,%g5
+/* 0x0244 123 */ and %o2,%o5,%l0
+/* 0x0248 124 */ srax %g5,32,%l1
+/* 0x024c 120 */ and %i5,%g3,%i4
+/* 0x0250 119 */ fdtox %f2,%f0
+/* 0x0254 121 */ srax %i3,32,%g2
+/* 0x0258 119 */ std %f0,[%sp+2231]
+/* 0x025c 124 */ add %i2,%l1,%i2
+/* 0x0260 123 */ sllx %l0,16,%i3
+/* 0x0264 124 */ add %g2,%i2,%i2
+/* 0x0268 */ srax %o2,16,%o2
+/* 0x026c 117 */ add %o1,4,%g2
+/* 0x0270 */ add %i4,%i3,%o1
+/* 0x0274 125 */ and %g5,%g3,%g5
+/* 0x0278 */ st %g5,[%g1+%g2]
+/* 0x027c 119 */ ldx [%sp+2231],%i3
+/* 0x0280 117 */ add %o1,%i2,%o1
+/* 0x0284 */ add %g2,4,%g2
+/* 0x0288 124 */ srax %o1,32,%i4
+/* 0x028c 122 */ ldx [%sp+2223],%i2
+/* 0x0290 125 */ and %o1,%g3,%g5
+/* 0x0294 121 */ srax %i5,32,%o1
+/* 0x0298 124 */ add %o2,%i4,%o2
+/* 0x029c 125 */ st %g5,[%g1+%g2]
+/* 0x02a0 128 */ bg,pn %icc,.L77000127
+/* 0x02a4 */ add %o1,%o2,%g5
+/* 0x02a8 */ add %i0,6,%i0
+/* 0x02ac */ add %i1,6,%i1
+ .L77000134:
+/* 0x02b0 119 */ sra %i1,0,%o2
+ .L900000112:
+/* 0x02b4 119 */ sllx %o2,3,%o3
+/* 0x02b8 120 */ and %i3,%g3,%o1
+/* 0x02bc 119 */ ldd [%g4+%o3],%f0
+/* 0x02c0 122 */ sra %i0,0,%o3
+/* 0x02c4 123 */ and %i2,%o5,%o2
+/* 0x02c8 122 */ sllx %o3,3,%o3
+/* 0x02cc 120 */ add %g5,%o1,%o1
+/* 0x02d0 119 */ fdtox %f0,%f0
+/* 0x02d4 */ std %f0,[%sp+2231]
+/* 0x02d8 123 */ sllx %o2,16,%o2
+/* 0x02dc */ add %o1,%o2,%o2
+/* 0x02e0 128 */ add %i1,2,%i1
+/* 0x02e4 122 */ ldd [%g4+%o3],%f0
+/* 0x02e8 124 */ srax %o2,32,%g2
+/* 0x02ec 125 */ and %o2,%g3,%o3
+/* 0x02f0 124 */ srax %i2,16,%o1
+/* 0x02f4 128 */ add %i0,2,%i0
+/* 0x02f8 122 */ fdtox %f0,%f0
+/* 0x02fc */ std %f0,[%sp+2223]
+/* 0x0300 125 */ sra %o0,0,%o2
+/* 0x0304 */ sllx %o2,2,%o2
+/* 0x0308 124 */ add %o1,%g2,%g5
+/* 0x030c 121 */ srax %i3,32,%g2
+/* 0x0310 128 */ add %o0,1,%o0
+/* 0x0314 124 */ add %g2,%g5,%g5
+/* 0x0318 128 */ cmp %o0,%o7
+/* 0x031c 119 */ ldx [%sp+2231],%o4
+/* 0x0320 122 */ ldx [%sp+2223],%i2
+/* 0x0324 125 */ st %o3,[%g1+%o2]
+/* 0x0328 127 */ or %g0,%o4,%i3
+/* 0x032c 128 */ ble,pt %icc,.L900000112
+/* 0x0330 */ sra %i1,0,%o2
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x0334 133 */ sethi %hi(0xfc00),%g2
+ .L900000113:
+/* 0x0338 133 */ or %g0,-1,%g3
+/* 0x033c */ add %g2,1023,%g2
+/* 0x0340 */ srl %g3,0,%g3
+/* 0x0344 */ and %i2,%g2,%g2
+/* 0x0348 */ and %i3,%g3,%g4
+/* 0x034c */ sllx %g2,16,%g2
+/* 0x0350 */ add %g5,%g4,%g4
+/* 0x0354 */ sra %o0,0,%g5
+/* 0x0358 */ add %g4,%g2,%g4
+/* 0x035c */ sllx %g5,2,%g2
+/* 0x0360 */ and %g4,%g3,%g3
+/* 0x0364 */ st %g3,[%g1+%g2]
+/* 0x0368 */ ret ! Result =
+/* 0x036c */ restore %g0,%g0,%g0
+/* 0x0370 0 */ .type conv_d16_to_i32,2
+/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g3
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o3
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 142 */ sub %o2,1,%o4
+/* 0x001c 138 */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g3,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g3
+/* 0x002c */ cmp %o2,12
+/* 0x0030 */ add %g3,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ or %g0,%o1,%g5
+/* 0x0038 */ ldx [%g1+%g2],%g4
+/* 0x003c */ or %g0,0,%g1
+/* 0x0040 */ or %g0,24,%g2
+/* 0x0044 */ bl,pn %icc,.L77000144
+/* 0x0048 */ or %g0,0,%g3
+/* 0x004c */ ld [%o1],%f13
+/* 0x0050 */ or %g0,7,%o3
+/* 0x0054 */ ldd [%g4],%f8
+/* 0x0058 */ sub %o2,5,%g3
+/* 0x005c */ or %g0,8,%g1
+/* 0x0060 */ ld [%o1+4],%f11
+/* 0x0064 */ ld [%o1+8],%f7
+/* 0x0068 */ fmovs %f8,%f12
+/* 0x006c */ ld [%o1+12],%f5
+/* 0x0070 */ fmovs %f8,%f10
+/* 0x0074 */ ld [%o1+16],%f3
+/* 0x0078 */ fmovs %f8,%f6
+/* 0x007c */ ld [%o1+20],%f1
+/* 0x0080 */ fsubd %f12,%f8,%f12
+/* 0x0084 */ std %f12,[%o0]
+/* 0x0088 */ fsubd %f10,%f8,%f10
+/* 0x008c */ std %f10,[%o0+8]
+ .L900000205:
+/* 0x0090 */ ld [%o1+%g2],%f11
+/* 0x0094 */ add %g1,8,%g1
+/* 0x0098 */ add %o3,5,%o3
+/* 0x009c */ fsubd %f6,%f8,%f6
+/* 0x00a0 */ add %g2,4,%g2
+/* 0x00a4 */ std %f6,[%o0+%g1]
+/* 0x00a8 */ cmp %o3,%g3
+/* 0x00ac */ fmovs %f8,%f4
+/* 0x00b0 */ ld [%o1+%g2],%f7
+/* 0x00b4 */ fsubd %f4,%f8,%f12
+/* 0x00b8 */ add %g1,8,%g1
+/* 0x00bc */ add %g2,4,%g2
+/* 0x00c0 */ fmovs %f8,%f2
+/* 0x00c4 */ std %f12,[%o0+%g1]
+/* 0x00c8 */ ld [%o1+%g2],%f5
+/* 0x00cc */ fsubd %f2,%f8,%f12
+/* 0x00d0 */ add %g1,8,%g1
+/* 0x00d4 */ add %g2,4,%g2
+/* 0x00d8 */ fmovs %f8,%f0
+/* 0x00dc */ std %f12,[%o0+%g1]
+/* 0x00e0 */ ld [%o1+%g2],%f3
+/* 0x00e4 */ fsubd %f0,%f8,%f12
+/* 0x00e8 */ add %g1,8,%g1
+/* 0x00ec */ add %g2,4,%g2
+/* 0x00f0 */ fmovs %f8,%f10
+/* 0x00f4 */ std %f12,[%o0+%g1]
+/* 0x00f8 */ ld [%o1+%g2],%f1
+/* 0x00fc */ fsubd %f10,%f8,%f10
+/* 0x0100 */ add %g1,8,%g1
+/* 0x0104 */ add %g2,4,%g2
+/* 0x0108 */ std %f10,[%o0+%g1]
+/* 0x010c */ ble,pt %icc,.L900000205
+/* 0x0110 */ fmovs %f8,%f6
+ .L900000208:
+/* 0x0114 */ fmovs %f8,%f4
+/* 0x0118 */ ld [%o1+%g2],%f11
+/* 0x011c */ add %g1,8,%g3
+/* 0x0120 */ fmovs %f8,%f2
+/* 0x0124 */ add %g1,16,%g1
+/* 0x0128 */ cmp %o3,%o4
+/* 0x012c */ fmovs %f8,%f0
+/* 0x0130 */ add %g1,8,%o1
+/* 0x0134 */ add %g1,16,%o2
+/* 0x0138 */ fmovs %f8,%f10
+/* 0x013c */ add %g1,24,%g2
+/* 0x0140 */ fsubd %f6,%f8,%f6
+/* 0x0144 */ std %f6,[%o0+%g3]
+/* 0x0148 */ fsubd %f4,%f8,%f4
+/* 0x014c */ std %f4,[%o0+%g1]
+/* 0x0150 */ sra %o3,0,%g1
+/* 0x0154 */ fsubd %f2,%f8,%f2
+/* 0x0158 */ std %f2,[%o0+%o1]
+/* 0x015c */ sllx %g1,2,%g3
+/* 0x0160 */ fsubd %f0,%f8,%f0
+/* 0x0164 */ std %f0,[%o0+%o2]
+/* 0x0168 */ fsubd %f10,%f8,%f0
+/* 0x016c */ bg,pn %icc,.L77000140
+/* 0x0170 */ std %f0,[%o0+%g2]
+ .L77000144:
+/* 0x0174 */ ldd [%g4],%f8
+ .L900000211:
+/* 0x0178 */ ld [%g5+%g3],%f13
+/* 0x017c */ sllx %g1,3,%g2
+/* 0x0180 */ add %o3,1,%o3
+/* 0x0184 */ sra %o3,0,%g1
+/* 0x0188 */ cmp %o3,%o4
+/* 0x018c */ fmovs %f8,%f12
+/* 0x0190 */ sllx %g1,2,%g3
+/* 0x0194 */ fsubd %f12,%f8,%f0
+/* 0x0198 */ std %f0,[%o0+%g2]
+/* 0x019c */ ble,a,pt %icc,.L900000211
+/* 0x01a0 */ ldd [%g4],%f8
+ .L77000140:
+/* 0x01a4 */ retl ! Result =
+/* 0x01a8 */ nop
+/* 0x01ac 0 */ .type conv_i32_to_d32,2
+/* 0x01ac */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000310:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+
+/* 0x000c 152 */ cmp %i2,0
+/* 0x0010 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ ble,pt %icc,.L77000150
+/* 0x0018 */ add %g3,%o7,%o0
+
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0020 147 */ or %g0,%i2,%o1
+/* 0x0024 152 */ sethi %hi(0xfc00),%g3
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x002c 152 */ or %g0,%o1,%g4
+/* 0x0030 156 */ ldx [%o0+%g2],%o5
+/* 0x0034 152 */ add %g3,1023,%g1
+/* 0x0038 147 */ or %g0,%i1,%o7
+/* 0x003c 152 */ or %g0,0,%i2
+/* 0x0040 */ sub %o1,1,%g5
+/* 0x0044 */ or %g0,0,%g3
+/* 0x0048 */ or %g0,1,%g2
+/* 0x004c 154 */ or %g0,0,%o2
+/* 0x0050 */ cmp %g4,6
+/* 0x0054 152 */ bl,pn %icc,.L77000154
+/* 0x0058 */ ldd [%o5],%f0
+/* 0x005c */ sub %o1,2,%o3
+/* 0x0060 */ or %g0,16,%o2
+/* 0x0064 154 */ ld [%i1],%o4
+/* 0x0068 156 */ or %g0,3,%g2
+/* 0x006c */ or %g0,2,%g3
+/* 0x0070 155 */ fmovs %f0,%f2
+/* 0x0074 156 */ or %g0,4,%i2
+/* 0x0078 155 */ and %o4,%g1,%o0
+/* 0x007c */ st %o0,[%sp+2227]
+/* 0x0080 */ fmovs %f0,%f4
+/* 0x0084 156 */ srl %o4,16,%i4
+/* 0x0088 152 */ or %g0,12,%o4
+/* 0x008c */ or %g0,24,%o0
+/* 0x0090 155 */ ld [%sp+2227],%f3
+/* 0x0094 */ fsubd %f2,%f0,%f2
+/* 0x0098 */ std %f2,[%i0]
+/* 0x009c 156 */ st %i4,[%sp+2223]
+/* 0x00a0 154 */ ld [%o7+4],%o1
+/* 0x00a4 156 */ fmovs %f0,%f2
+/* 0x00a8 155 */ and %o1,%g1,%i1
+/* 0x00ac 156 */ ld [%sp+2223],%f3
+/* 0x00b0 */ srl %o1,16,%o1
+/* 0x00b4 */ fsubd %f2,%f0,%f2
+/* 0x00b8 */ std %f2,[%i0+8]
+/* 0x00bc */ st %o1,[%sp+2223]
+/* 0x00c0 155 */ st %i1,[%sp+2227]
+/* 0x00c4 154 */ ld [%o7+8],%o1
+/* 0x00c8 156 */ fmovs %f0,%f2
+/* 0x00cc 155 */ and %o1,%g1,%g4
+/* 0x00d0 */ ld [%sp+2227],%f5
+/* 0x00d4 156 */ srl %o1,16,%o1
+/* 0x00d8 */ ld [%sp+2223],%f3
+/* 0x00dc */ st %o1,[%sp+2223]
+/* 0x00e0 155 */ fsubd %f4,%f0,%f4
+/* 0x00e4 */ st %g4,[%sp+2227]
+/* 0x00e8 156 */ fsubd %f2,%f0,%f2
+/* 0x00ec 154 */ ld [%o7+12],%o1
+/* 0x00f0 155 */ std %f4,[%i0+16]
+/* 0x00f4 156 */ std %f2,[%i0+24]
+ .L900000306:
+/* 0x00f8 155 */ ld [%sp+2227],%f5
+/* 0x00fc 156 */ add %i2,2,%i2
+/* 0x0100 */ add %g2,4,%g2
+/* 0x0104 */ ld [%sp+2223],%f3
+/* 0x0108 */ cmp %i2,%o3
+/* 0x010c */ add %g3,4,%g3
+/* 0x0110 155 */ and %o1,%g1,%g4
+/* 0x0114 156 */ srl %o1,16,%o1
+/* 0x0118 155 */ st %g4,[%sp+2227]
+/* 0x011c 156 */ st %o1,[%sp+2223]
+/* 0x0120 152 */ add %o4,4,%o1
+/* 0x0124 154 */ ld [%o7+%o1],%o4
+/* 0x0128 156 */ fmovs %f0,%f2
+/* 0x012c 155 */ fmovs %f0,%f4
+/* 0x0130 */ fsubd %f4,%f0,%f4
+/* 0x0134 152 */ add %o2,16,%o2
+/* 0x0138 156 */ fsubd %f2,%f0,%f2
+/* 0x013c 155 */ std %f4,[%i0+%o2]
+/* 0x0140 152 */ add %o0,16,%o0
+/* 0x0144 156 */ std %f2,[%i0+%o0]
+/* 0x0148 155 */ ld [%sp+2227],%f5
+/* 0x014c 156 */ ld [%sp+2223],%f3
+/* 0x0150 155 */ and %o4,%g1,%g4
+/* 0x0154 156 */ srl %o4,16,%o4
+/* 0x0158 155 */ st %g4,[%sp+2227]
+/* 0x015c 156 */ st %o4,[%sp+2223]
+/* 0x0160 152 */ add %o1,4,%o4
+/* 0x0164 154 */ ld [%o7+%o4],%o1
+/* 0x0168 156 */ fmovs %f0,%f2
+/* 0x016c 155 */ fmovs %f0,%f4
+/* 0x0170 */ fsubd %f4,%f0,%f4
+/* 0x0174 152 */ add %o2,16,%o2
+/* 0x0178 156 */ fsubd %f2,%f0,%f2
+/* 0x017c 155 */ std %f4,[%i0+%o2]
+/* 0x0180 152 */ add %o0,16,%o0
+/* 0x0184 156 */ ble,pt %icc,.L900000306
+/* 0x0188 */ std %f2,[%i0+%o0]
+ .L900000309:
+/* 0x018c 155 */ ld [%sp+2227],%f5
+/* 0x0190 156 */ fmovs %f0,%f2
+/* 0x0194 */ srl %o1,16,%o3
+/* 0x0198 */ ld [%sp+2223],%f3
+/* 0x019c 155 */ and %o1,%g1,%i1
+/* 0x01a0 152 */ add %o2,16,%g4
+/* 0x01a4 155 */ fmovs %f0,%f4
+/* 0x01a8 */ st %i1,[%sp+2227]
+/* 0x01ac 152 */ add %o0,16,%o2
+/* 0x01b0 156 */ st %o3,[%sp+2223]
+/* 0x01b4 154 */ sra %i2,0,%o3
+/* 0x01b8 152 */ add %g4,16,%o1
+/* 0x01bc 155 */ fsubd %f4,%f0,%f4
+/* 0x01c0 */ std %f4,[%i0+%g4]
+/* 0x01c4 152 */ add %o0,32,%o0
+/* 0x01c8 156 */ fsubd %f2,%f0,%f2
+/* 0x01cc */ std %f2,[%i0+%o2]
+/* 0x01d0 */ sllx %o3,2,%o2
+/* 0x01d4 155 */ ld [%sp+2227],%f5
+/* 0x01d8 156 */ cmp %i2,%g5
+/* 0x01dc */ add %g2,6,%g2
+/* 0x01e0 */ ld [%sp+2223],%f3
+/* 0x01e4 */ add %g3,6,%g3
+/* 0x01e8 155 */ fmovs %f0,%f4
+/* 0x01ec 156 */ fmovs %f0,%f2
+/* 0x01f0 155 */ fsubd %f4,%f0,%f4
+/* 0x01f4 */ std %f4,[%i0+%o1]
+/* 0x01f8 156 */ fsubd %f2,%f0,%f0
+/* 0x01fc */ bg,pn %icc,.L77000150
+/* 0x0200 */ std %f0,[%i0+%o0]
+ .L77000154:
+/* 0x0204 155 */ ldd [%o5],%f0
+ .L900000311:
+/* 0x0208 154 */ ld [%o7+%o2],%o0
+/* 0x020c 155 */ sra %g3,0,%o1
+/* 0x0210 */ fmovs %f0,%f2
+/* 0x0214 */ sllx %o1,3,%o2
+/* 0x0218 156 */ add %i2,1,%i2
+/* 0x021c 155 */ and %o0,%g1,%o1
+/* 0x0220 */ st %o1,[%sp+2227]
+/* 0x0224 156 */ add %g3,2,%g3
+/* 0x0228 */ srl %o0,16,%o1
+/* 0x022c */ cmp %i2,%g5
+/* 0x0230 */ sra %g2,0,%o0
+/* 0x0234 */ add %g2,2,%g2
+/* 0x0238 */ sllx %o0,3,%o0
+/* 0x023c 155 */ ld [%sp+2227],%f3
+/* 0x0240 154 */ sra %i2,0,%o3
+/* 0x0244 155 */ fsubd %f2,%f0,%f2
+/* 0x0248 */ std %f2,[%i0+%o2]
+/* 0x024c */ sllx %o3,2,%o2
+/* 0x0250 156 */ st %o1,[%sp+2223]
+/* 0x0254 */ fmovs %f0,%f2
+/* 0x0258 */ ld [%sp+2223],%f3
+/* 0x025c */ fsubd %f2,%f0,%f0
+/* 0x0260 */ std %f0,[%i0+%o0]
+/* 0x0264 */ ble,a,pt %icc,.L900000311
+/* 0x0268 */ ldd [%o5],%f0
+ .L77000150:
+/* 0x026c */ ret ! Result =
+/* 0x0270 */ restore %g0,%g0,%g0
+/* 0x0274 0 */ .type conv_i32_to_d16,2
+/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x000c 172 */ sethi %hi(Zero),%g2
+/* 0x0010 163 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+/* 0x0014 */ or %g0,%i3,%g5
+/* 0x0018 */ add %g3,%o7,%o3
+/* 0x001c 172 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ ldx [%o3+%g2],%o0
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 163 */ or %g0,%i0,%i3
+/* 0x002c 169 */ sub %g5,3,%o1
+/* 0x0030 172 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 163 */ or %g0,%i2,%i0
+/* 0x0038 172 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c */ ldx [%o3+%g2],%o2
+/* 0x0040 169 */ cmp %o1,0
+/* 0x0044 164 */ or %g0,0,%i2
+/* 0x0048 169 */ ble,pt %icc,.L900000418
+/* 0x004c */ cmp %i2,%g5
+/* 0x0050 */ ldd [%o0],%f2
+/* 0x0054 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0058 */ ldx [%o3+%g3],%o1
+/* 0x005c 169 */ sub %g5,4,%o4
+/* 0x0060 */ or %g0,0,%o5
+ .L900000417:
+/* 0x0064 172 */ sra %i2,0,%g2
+/* 0x0068 */ fmovd %f2,%f14
+/* 0x006c */ ldd [%o2],%f0
+/* 0x0070 */ sllx %g2,2,%g3
+/* 0x0074 */ fmovd %f2,%f10
+/* 0x0078 */ ldd [%o1],%f16
+/* 0x007c */ ld [%g3+%i0],%f15
+/* 0x0080 */ add %i0,%g3,%g3
+/* 0x0084 */ fmovd %f2,%f6
+/* 0x0088 */ ld [%g3+4],%f11
+/* 0x008c */ sra %o5,0,%g4
+/* 0x0090 */ add %i2,4,%i2
+/* 0x0094 */ ld [%g3+8],%f7
+/* 0x0098 */ fxtod %f14,%f14
+/* 0x009c */ sllx %g2,3,%g2
+/* 0x00a0 */ ld [%g3+12],%f3
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ sllx %g4,3,%g3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ std %f14,[%g2+%i3]
+/* 0x00b4 */ add %i3,%g2,%g4
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f14,%f12
+/* 0x00c0 */ std %f2,[%g4+24]
+/* 0x00c4 */ fmuld %f0,%f10,%f8
+/* 0x00c8 */ std %f10,[%g4+8]
+/* 0x00cc */ add %i1,%g3,%g2
+/* 0x00d0 */ fmuld %f0,%f6,%f4
+/* 0x00d4 */ std %f6,[%g4+16]
+/* 0x00d8 */ cmp %i2,%o4
+/* 0x00dc */ fmuld %f0,%f2,%f0
+/* 0x00e0 */ fdtox %f12,%f12
+/* 0x00e4 */ add %o5,8,%o5
+/* 0x00e8 */ fdtox %f8,%f8
+/* 0x00ec */ fdtox %f4,%f4
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ fxtod %f12,%f12
+/* 0x00f8 */ std %f12,[%g2+8]
+/* 0x00fc */ fxtod %f8,%f8
+/* 0x0100 */ std %f8,[%g2+24]
+/* 0x0104 */ fxtod %f4,%f4
+/* 0x0108 */ std %f4,[%g2+40]
+/* 0x010c */ fxtod %f0,%f0
+/* 0x0110 */ std %f0,[%g2+56]
+/* 0x0114 */ fmuld %f12,%f16,%f12
+/* 0x0118 */ fmuld %f8,%f16,%f8
+/* 0x011c */ fmuld %f4,%f16,%f4
+/* 0x0120 */ fsubd %f14,%f12,%f12
+/* 0x0124 */ std %f12,[%g3+%i1]
+/* 0x0128 */ fmuld %f0,%f16,%f0
+/* 0x012c */ fsubd %f10,%f8,%f8
+/* 0x0130 */ std %f8,[%g2+16]
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g2+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g2+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ ldd [%o0],%f2
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %i2,%g5
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2
+/* 0x0160 175 */ sethi %hi(0xfc00),%g3
+/* 0x0164 180 */ ldx [%o3+%g2],%g1
+/* 0x0168 175 */ sll %i2,1,%i4
+/* 0x016c */ sub %g5,%i2,%g4
+/* 0x0170 177 */ sra %i2,0,%o3
+/* 0x0174 175 */ add %g3,1023,%g3
+/* 0x0178 178 */ ldd [%g1],%f2
+/* 0x017c */ sllx %o3,2,%o2
+/* 0x0180 175 */ add %i4,1,%g2
+/* 0x0184 177 */ or %g0,%o3,%o1
+/* 0x0188 */ cmp %g4,6
+/* 0x018c 175 */ bl,pn %icc,.L77000161
+/* 0x0190 */ sra %i2,0,%o3
+/* 0x0194 177 */ or %g0,%o2,%o0
+/* 0x0198 178 */ ld [%i0+%o2],%f5
+/* 0x019c 179 */ fmovs %f2,%f8
+/* 0x01a0 175 */ add %o0,4,%o3
+/* 0x01a4 177 */ ld [%i0+%o0],%o7
+/* 0x01a8 180 */ fmovs %f2,%f6
+/* 0x01ac 178 */ fmovs %f2,%f4
+/* 0x01b0 */ sllx %o1,3,%o2
+/* 0x01b4 175 */ add %o3,4,%o5
+/* 0x01b8 179 */ sra %i4,0,%o0
+/* 0x01bc 175 */ add %o3,8,%o4
+/* 0x01c0 178 */ fsubd %f4,%f2,%f4
+/* 0x01c4 */ std %f4,[%i3+%o2]
+/* 0x01c8 179 */ sllx %o0,3,%i5
+/* 0x01cc */ and %o7,%g3,%o0
+/* 0x01d0 */ st %o0,[%sp+2227]
+/* 0x01d4 175 */ add %i5,16,%o1
+/* 0x01d8 180 */ srl %o7,16,%g4
+/* 0x01dc */ add %i2,1,%i2
+/* 0x01e0 */ sra %g2,0,%o0
+/* 0x01e4 175 */ add %o2,8,%o2
+/* 0x01e8 179 */ fmovs %f2,%f4
+/* 0x01ec 180 */ sllx %o0,3,%l0
+/* 0x01f0 */ add %i4,3,%g2
+/* 0x01f4 179 */ ld [%sp+2227],%f5
+/* 0x01f8 175 */ add %l0,16,%o0
+/* 0x01fc 180 */ add %i4,2,%i4
+/* 0x0200 175 */ sub %g5,1,%o7
+/* 0x0204 180 */ add %i2,3,%i2
+/* 0x0208 179 */ fsubd %f4,%f2,%f4
+/* 0x020c */ std %f4,[%i1+%i5]
+/* 0x0210 180 */ st %g4,[%sp+2223]
+/* 0x0214 177 */ ld [%i0+%o3],%i5
+/* 0x0218 180 */ fmovs %f2,%f4
+/* 0x021c */ srl %i5,16,%g4
+/* 0x0220 179 */ and %i5,%g3,%i5
+/* 0x0224 180 */ ld [%sp+2223],%f5
+/* 0x0228 */ fsubd %f4,%f2,%f4
+/* 0x022c */ std %f4,[%i1+%l0]
+/* 0x0230 */ st %g4,[%sp+2223]
+/* 0x0234 177 */ ld [%i0+%o5],%g4
+/* 0x0238 179 */ st %i5,[%sp+2227]
+/* 0x023c 178 */ fmovs %f2,%f4
+/* 0x0240 180 */ srl %g4,16,%i5
+/* 0x0244 179 */ and %g4,%g3,%g4
+/* 0x0248 180 */ ld [%sp+2223],%f7
+/* 0x024c */ st %i5,[%sp+2223]
+/* 0x0250 178 */ ld [%i0+%o3],%f5
+/* 0x0254 180 */ fsubd %f6,%f2,%f6
+/* 0x0258 177 */ ld [%i0+%o4],%o3
+/* 0x025c 178 */ fsubd %f4,%f2,%f4
+/* 0x0260 179 */ ld [%sp+2227],%f9
+/* 0x0264 180 */ ld [%sp+2223],%f1
+/* 0x0268 179 */ st %g4,[%sp+2227]
+/* 0x026c */ fsubd %f8,%f2,%f8
+/* 0x0270 */ std %f8,[%i1+%o1]
+/* 0x0274 180 */ std %f6,[%i1+%o0]
+/* 0x0278 178 */ std %f4,[%i3+%o2]
+ .L900000411:
+/* 0x027c 179 */ ld [%sp+2227],%f13
+/* 0x0280 180 */ srl %o3,16,%g4
+/* 0x0284 */ add %i2,2,%i2
+/* 0x0288 */ st %g4,[%sp+2223]
+/* 0x028c */ cmp %i2,%o7
+/* 0x0290 */ add %g2,4,%g2
+/* 0x0294 178 */ ld [%i0+%o5],%f11
+/* 0x0298 180 */ add %i4,4,%i4
+/* 0x029c 175 */ add %o4,4,%o5
+/* 0x02a0 177 */ ld [%i0+%o5],%g4
+/* 0x02a4 179 */ and %o3,%g3,%o3
+/* 0x02a8 */ st %o3,[%sp+2227]
+/* 0x02ac 180 */ fmovs %f2,%f0
+/* 0x02b0 179 */ fmovs %f2,%f12
+/* 0x02b4 180 */ fsubd %f0,%f2,%f8
+/* 0x02b8 179 */ fsubd %f12,%f2,%f4
+/* 0x02bc 175 */ add %o1,16,%o1
+/* 0x02c0 180 */ ld [%sp+2223],%f7
+/* 0x02c4 178 */ fmovs %f2,%f10
+/* 0x02c8 179 */ std %f4,[%i1+%o1]
+/* 0x02cc 175 */ add %o0,16,%o0
+/* 0x02d0 178 */ fsubd %f10,%f2,%f4
+/* 0x02d4 175 */ add %o2,8,%o2
+/* 0x02d8 180 */ std %f8,[%i1+%o0]
+/* 0x02dc 178 */ std %f4,[%i3+%o2]
+/* 0x02e0 179 */ ld [%sp+2227],%f9
+/* 0x02e4 180 */ srl %g4,16,%o3
+/* 0x02e8 */ st %o3,[%sp+2223]
+/* 0x02ec 178 */ ld [%i0+%o4],%f5
+/* 0x02f0 175 */ add %o4,8,%o4
+/* 0x02f4 177 */ ld [%i0+%o4],%o3
+/* 0x02f8 179 */ and %g4,%g3,%g4
+/* 0x02fc */ st %g4,[%sp+2227]
+/* 0x0300 180 */ fmovs %f2,%f6
+/* 0x0304 179 */ fmovs %f2,%f8
+/* 0x0308 180 */ fsubd %f6,%f2,%f6
+/* 0x030c 179 */ fsubd %f8,%f2,%f8
+/* 0x0310 175 */ add %o1,16,%o1
+/* 0x0314 180 */ ld [%sp+2223],%f1
+/* 0x0318 178 */ fmovs %f2,%f4
+/* 0x031c 179 */ std %f8,[%i1+%o1]
+/* 0x0320 175 */ add %o0,16,%o0
+/* 0x0324 178 */ fsubd %f4,%f2,%f4
+/* 0x0328 175 */ add %o2,8,%o2
+/* 0x032c 180 */ std %f6,[%i1+%o0]
+/* 0x0330 */ bl,pt %icc,.L900000411
+/* 0x0334 */ std %f4,[%i3+%o2]
+ .L900000414:
+/* 0x0338 180 */ srl %o3,16,%o7
+/* 0x033c */ st %o7,[%sp+2223]
+/* 0x0340 179 */ fmovs %f2,%f12
+/* 0x0344 178 */ ld [%i0+%o5],%f11
+/* 0x0348 180 */ fmovs %f2,%f0
+/* 0x034c 179 */ and %o3,%g3,%g4
+/* 0x0350 180 */ fmovs %f2,%f6
+/* 0x0354 175 */ add %o1,16,%o3
+/* 0x0358 */ add %o0,16,%o7
+/* 0x035c 178 */ fmovs %f2,%f10
+/* 0x0360 175 */ add %o2,8,%o2
+/* 0x0364 */ add %o1,32,%o5
+/* 0x0368 179 */ ld [%sp+2227],%f13
+/* 0x036c 178 */ fmovs %f2,%f4
+/* 0x0370 175 */ add %o0,32,%o1
+/* 0x0374 180 */ ld [%sp+2223],%f7
+/* 0x0378 175 */ add %o2,8,%o0
+/* 0x037c 180 */ cmp %i2,%g5
+/* 0x0380 179 */ st %g4,[%sp+2227]
+/* 0x0384 */ fsubd %f12,%f2,%f8
+/* 0x0388 180 */ add %g2,6,%g2
+/* 0x038c 179 */ std %f8,[%i1+%o3]
+/* 0x0390 180 */ fsubd %f0,%f2,%f0
+/* 0x0394 177 */ sra %i2,0,%o3
+/* 0x0398 180 */ std %f0,[%i1+%o7]
+/* 0x039c 178 */ fsubd %f10,%f2,%f0
+/* 0x03a0 180 */ add %i4,6,%i4
+/* 0x03a4 178 */ std %f0,[%i3+%o2]
+/* 0x03a8 */ sllx %o3,2,%o2
+/* 0x03ac 179 */ ld [%sp+2227],%f9
+/* 0x03b0 178 */ ld [%i0+%o4],%f5
+/* 0x03b4 179 */ fmovs %f2,%f8
+/* 0x03b8 */ fsubd %f8,%f2,%f0
+/* 0x03bc */ std %f0,[%i1+%o5]
+/* 0x03c0 180 */ fsubd %f6,%f2,%f0
+/* 0x03c4 */ std %f0,[%i1+%o1]
+/* 0x03c8 178 */ fsubd %f4,%f2,%f0
+/* 0x03cc 180 */ bge,pn %icc,.L77000164
+/* 0x03d0 */ std %f0,[%i3+%o0]
+ .L77000161:
+/* 0x03d4 178 */ ldd [%g1],%f2
+ .L900000416:
+/* 0x03d8 178 */ ld [%i0+%o2],%f5
+/* 0x03dc 179 */ sra %i4,0,%o0
+/* 0x03e0 180 */ add %i2,1,%i2
+/* 0x03e4 177 */ ld [%i0+%o2],%o1
+/* 0x03e8 178 */ sllx %o3,3,%o3
+/* 0x03ec 180 */ add %i4,2,%i4
+/* 0x03f0 178 */ fmovs %f2,%f4
+/* 0x03f4 179 */ sllx %o0,3,%o4
+/* 0x03f8 180 */ cmp %i2,%g5
+/* 0x03fc 179 */ and %o1,%g3,%o0
+/* 0x0400 178 */ fsubd %f4,%f2,%f0
+/* 0x0404 */ std %f0,[%i3+%o3]
+/* 0x0408 180 */ srl %o1,16,%o1
+/* 0x040c 179 */ st %o0,[%sp+2227]
+/* 0x0410 180 */ sra %g2,0,%o0
+/* 0x0414 */ add %g2,2,%g2
+/* 0x0418 177 */ sra %i2,0,%o3
+/* 0x041c 180 */ sllx %o0,3,%o0
+/* 0x0420 179 */ fmovs %f2,%f4
+/* 0x0424 */ sllx %o3,2,%o2
+/* 0x0428 */ ld [%sp+2227],%f5
+/* 0x042c */ fsubd %f4,%f2,%f0
+/* 0x0430 */ std %f0,[%i1+%o4]
+/* 0x0434 180 */ st %o1,[%sp+2223]
+/* 0x0438 */ fmovs %f2,%f4
+/* 0x043c */ ld [%sp+2223],%f5
+/* 0x0440 */ fsubd %f4,%f2,%f0
+/* 0x0444 */ std %f0,[%i1+%o0]
+/* 0x0448 */ bl,a,pt %icc,.L900000416
+/* 0x044c */ ldd [%g1],%f2
+ .L77000164:
+/* 0x0450 */ ret ! Result =
+/* 0x0454 */ restore %g0,%g0,%g0
+/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ save %sp,-176,%sp
+/* 0x0004 */ or %g0,%i2,%o1
+/* 0x0008 */ or %g0,%i0,%i2
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x000c 190 */ sra %o1,0,%g2
+/* 0x0010 */ or %g0,-1,%o2
+/* 0x0014 */ sllx %g2,2,%g2
+/* 0x0018 */ ld [%i2+%g2],%g2
+/* 0x001c */ cmp %g2,0
+/* 0x0020 */ bleu,pn %icc,.L77000175
+/* 0x0024 */ or %g0,%i1,%i0
+/* 0x0028 */ ba .L900000511
+/* 0x002c */ cmp %o2,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0030 193 */ sub %o1,1,%o2
+/* 0x0034 */ cmp %o2,0
+/* 0x0038 */ bl,pn %icc,.L77000182
+/* 0x003c */ sra %o2,0,%g2
+ .L900000510:
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0040 195 */ sllx %g2,2,%g2
+/* 0x0044 */ sub %o2,1,%o0
+/* 0x0048 */ ld [%i1+%g2],%g3
+/* 0x004c */ ld [%i2+%g2],%g2
+/* 0x0050 */ cmp %g2,%g3
+/* 0x0054 */ bne,pn %icc,.L77000182
+/* 0x0058 */ nop
+/* 0x005c 0 */ or %g0,%o0,%o2
+/* 0x0060 195 */ cmp %o0,0
+/* 0x0064 */ bge,pt %icc,.L900000510
+/* 0x0068 */ sra %o2,0,%g2
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x006c 198 */ cmp %o2,0
+ .L900000511:
+/* 0x0070 198 */ bl,pn %icc,.L77000198
+/* 0x0074 */ sra %o2,0,%g2
+/* 0x0078 */ sllx %g2,2,%g2
+/* 0x007c */ ld [%i1+%g2],%g3
+/* 0x0080 */ ld [%i2+%g2],%g2
+/* 0x0084 */ cmp %g2,%g3
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %o1,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c 198 */ or %g0,-1,%g2
+/* 0x00a0 201 */ or %g0,%o1,%g3
+/* 0x00a4 198 */ srl %g2,0,%g2
+/* 0x00a8 */ sub %o1,1,%g4
+/* 0x00ac */ cmp %o1,9
+/* 0x00b0 201 */ or %g0,0,%i1
+/* 0x00b4 200 */ or %g0,0,%g5
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b8 203 */ or %g0,0,%o1
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ sub %g3,4,%o7
+/* 0x00c4 203 */ ld [%i2],%o1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00c8 205 */ or %g0,5,%i1
+/* 0x00cc 203 */ ld [%i0],%o2
+/* 0x00d0 201 */ or %g0,8,%o5
+/* 0x00d4 */ or %g0,12,%o4
+/* 0x00d8 203 */ ld [%i0+4],%o3
+/* 0x00dc 201 */ or %g0,16,%g1
+/* 0x00e0 203 */ ld [%i2+4],%o0
+/* 0x00e4 201 */ sub %o1,%o2,%o1
+/* 0x00e8 203 */ ld [%i0+8],%i3
+/* 0x00ec 204 */ and %o1,%g2,%g5
+/* 0x00f0 */ st %g5,[%i2]
+/* 0x00f4 205 */ srax %o1,32,%g5
+/* 0x00f8 201 */ sub %o0,%o3,%o0
+/* 0x00fc 203 */ ld [%i0+12],%o2
+/* 0x0100 201 */ add %o0,%g5,%o0
+/* 0x0104 204 */ and %o0,%g2,%g5
+/* 0x0108 */ st %g5,[%i2+4]
+/* 0x010c 205 */ srax %o0,32,%o0
+/* 0x0110 203 */ ld [%i2+8],%o1
+/* 0x0114 */ ld [%i2+12],%o3
+/* 0x0118 201 */ sub %o1,%i3,%o1
+ .L900000505:
+/* 0x011c */ add %g1,4,%g3
+/* 0x0120 203 */ ld [%g1+%i2],%g5
+/* 0x0124 201 */ add %o1,%o0,%o0
+/* 0x0128 203 */ ld [%i0+%g1],%i3
+/* 0x012c 201 */ sub %o3,%o2,%o1
+/* 0x0130 204 */ and %o0,%g2,%o2
+/* 0x0134 */ st %o2,[%o5+%i2]
+/* 0x0138 205 */ srax %o0,32,%o2
+/* 0x013c */ add %i1,4,%i1
+/* 0x0140 201 */ add %g1,8,%o5
+/* 0x0144 203 */ ld [%g3+%i2],%o0
+/* 0x0148 201 */ add %o1,%o2,%o1
+/* 0x014c 203 */ ld [%i0+%g3],%o3
+/* 0x0150 201 */ sub %g5,%i3,%o2
+/* 0x0154 204 */ and %o1,%g2,%g5
+/* 0x0158 */ st %g5,[%o4+%i2]
+/* 0x015c 205 */ srax %o1,32,%g5
+/* 0x0160 */ cmp %i1,%o7
+/* 0x0164 201 */ add %g1,12,%o4
+/* 0x0168 203 */ ld [%o5+%i2],%o1
+/* 0x016c 201 */ add %o2,%g5,%o2
+/* 0x0170 203 */ ld [%i0+%o5],%i3
+/* 0x0174 201 */ sub %o0,%o3,%o0
+/* 0x0178 204 */ and %o2,%g2,%o3
+/* 0x017c */ st %o3,[%g1+%i2]
+/* 0x0180 205 */ srax %o2,32,%g5
+/* 0x0184 203 */ ld [%o4+%i2],%o3
+/* 0x0188 201 */ add %g1,16,%g1
+/* 0x018c */ add %o0,%g5,%o0
+/* 0x0190 203 */ ld [%i0+%o4],%o2
+/* 0x0194 201 */ sub %o1,%i3,%o1
+/* 0x0198 204 */ and %o0,%g2,%g5
+/* 0x019c */ st %g5,[%g3+%i2]
+/* 0x01a0 205 */ ble,pt %icc,.L900000505
+/* 0x01a4 */ srax %o0,32,%o0
+ .L900000508:
+/* 0x01a8 */ add %o1,%o0,%g3
+/* 0x01ac */ sub %o3,%o2,%o1
+/* 0x01b0 203 */ ld [%g1+%i2],%o0
+/* 0x01b4 */ ld [%i0+%g1],%o2
+/* 0x01b8 205 */ srax %g3,32,%o7
+/* 0x01bc 204 */ and %g3,%g2,%o3
+/* 0x01c0 201 */ add %o1,%o7,%o1
+/* 0x01c4 204 */ st %o3,[%o5+%i2]
+/* 0x01c8 205 */ cmp %i1,%g4
+/* 0x01cc 201 */ sub %o0,%o2,%o0
+/* 0x01d0 204 */ and %o1,%g2,%o2
+/* 0x01d4 */ st %o2,[%o4+%i2]
+/* 0x01d8 205 */ srax %o1,32,%o1
+/* 0x01dc 203 */ sra %i1,0,%o2
+/* 0x01e0 201 */ add %o0,%o1,%o0
+/* 0x01e4 205 */ srax %o0,32,%g5
+/* 0x01e8 204 */ and %o0,%g2,%o1
+/* 0x01ec */ st %o1,[%g1+%i2]
+/* 0x01f0 205 */ bg,pn %icc,.L77000191
+/* 0x01f4 */ sllx %o2,2,%o1
+ .L77000199:
+/* 0x01f8 0 */ or %g0,%o1,%g1
+ .L900000509:
+/* 0x01fc 203 */ ld [%o1+%i2],%o0
+/* 0x0200 205 */ add %i1,1,%i1
+/* 0x0204 203 */ ld [%i0+%o1],%o1
+/* 0x0208 */ sra %i1,0,%o2
+/* 0x020c 205 */ cmp %i1,%g4
+/* 0x0210 203 */ add %g5,%o0,%o0
+/* 0x0214 */ sub %o0,%o1,%o0
+/* 0x0218 205 */ srax %o0,32,%g5
+/* 0x021c 204 */ and %o0,%g2,%o1
+/* 0x0220 */ st %o1,[%g1+%i2]
+/* 0x0224 */ sllx %o2,2,%o1
+/* 0x0228 205 */ ble,pt %icc,.L900000509
+/* 0x022c */ or %g0,%o1,%g1
+ .L77000191:
+/* 0x0230 */ ret ! Result =
+/* 0x0234 */ restore %g0,%g0,%g0
+/* 0x0238 0 */ .type adjust_montf_result,2
+/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+/* 000000 */ .skip 24
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-224,%sp
+ .L900000643:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x000c */ ldx [%fp+2223],%l0
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x0010 232 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ fmovd %f14,%f30
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ sll %l0,1,%o3
+/* 0x0024 223 */ add %g5,%o7,%o4
+/* 0x0028 232 */ sra %o3,0,%g5
+/* 0x002c */ ldx [%o4+%g2],%o7
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0034 */ sethi %hi(TwoTo16),%g4
+/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c 232 */ ldd [%o7],%f0
+/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0044 223 */ or %g0,%i4,%o0
+/* 0x0048 232 */ sllx %g5,3,%g4
+/* 0x004c 239 */ ldx [%o4+%g2],%o5
+/* 0x0050 223 */ or %g0,%i5,%l3
+/* 0x0054 */ or %g0,%i0,%l2
+/* 0x0058 239 */ ldx [%o4+%g3],%o4
+/* 0x005c 234 */ cmp %l0,16
+/* 0x0060 232 */ std %f0,[%i2+%g4]
+/* 0x0064 234 */ be,pn %icc,.L77000279
+/* 0x0068 */ or %g0,%i3,%l4
+/* 0x006c 236 */ sll %l0,2,%g2
+/* 0x0070 223 */ or %g0,%o0,%i5
+/* 0x0074 236 */ add %g2,2,%o0
+/* 0x0078 223 */ or %g0,%i1,%i4
+/* 0x007c 236 */ cmp %o0,0
+/* 0x0080 223 */ or %g0,%i2,%l1
+/* 0x0084 236 */ ble,a,pt %icc,.L900000657
+/* 0x0088 */ ldd [%i1],%f6
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x008c 281 */ or %g0,%o0,%o1
+/* 0x0090 236 */ sub %o0,1,%g1
+/* 0x0094 */ or %g0,0,%g2
+/* 0x0098 281 */ cmp %o1,5
+/* 0x009c */ bl,pn %icc,.L77000280
+/* 0x00a0 */ or %g0,8,%o0
+/* 0x00a4 */ std %f0,[%i3]
+/* 0x00a8 */ or %g0,2,%g2
+/* 0x00ac */ sub %g1,2,%o1
+ .L900000627:
+/* 0x00b0 */ add %o0,8,%g3
+/* 0x00b4 */ std %f0,[%i3+%o0]
+/* 0x00b8 */ add %g2,3,%g2
+/* 0x00bc */ add %o0,16,%o2
+/* 0x00c0 */ std %f0,[%i3+%g3]
+/* 0x00c4 */ cmp %g2,%o1
+/* 0x00c8 */ add %o0,24,%o0
+/* 0x00cc */ ble,pt %icc,.L900000627
+/* 0x00d0 */ std %f0,[%i3+%o2]
+ .L900000630:
+/* 0x00d4 */ cmp %g2,%g1
+/* 0x00d8 */ bg,pn %icc,.L77000285
+/* 0x00dc */ std %f0,[%i3+%o0]
+ .L77000280:
+/* 0x00e0 */ ldd [%o7],%f0
+ .L900000656:
+/* 0x00e4 */ sra %g2,0,%o0
+/* 0x00e8 */ add %g2,1,%g2
+/* 0x00ec */ sllx %o0,3,%o0
+/* 0x00f0 */ cmp %g2,%g1
+/* 0x00f4 */ std %f0,[%i3+%o0]
+/* 0x00f8 */ ble,a,pt %icc,.L900000656
+/* 0x00fc */ ldd [%o7],%f0
+ .L77000285:
+/* 0x0100 238 */ ldd [%i1],%f6
+ .L900000657:
+/* 0x0104 238 */ ldd [%i2],%f8
+/* 0x0108 242 */ cmp %o3,0
+/* 0x010c */ sub %o3,1,%o1
+/* 0x0110 239 */ ldd [%o7],%f10
+/* 0x0114 */ add %o3,1,%o2
+/* 0x0118 0 */ or %g0,0,%i2
+/* 0x011c 238 */ fmuld %f6,%f8,%f6
+/* 0x0120 */ std %f6,[%i3]
+/* 0x0124 0 */ or %g0,0,%g3
+/* 0x0128 239 */ ldd [%o5],%f8
+/* 0x012c 0 */ or %g0,%o2,%g1
+/* 0x0130 236 */ sub %l0,1,%i1
+/* 0x0134 239 */ ldd [%o4],%f12
+/* 0x0138 236 */ or %g0,1,%g4
+/* 0x013c */ fdtox %f6,%f0
+/* 0x0140 */ fmovs %f10,%f0
+/* 0x0144 */ fxtod %f0,%f6
+/* 0x0148 239 */ fmuld %f6,%f14,%f6
+/* 0x014c */ fmuld %f6,%f8,%f8
+/* 0x0150 */ fdtox %f8,%f8
+/* 0x0154 */ fxtod %f8,%f8
+/* 0x0158 */ fmuld %f8,%f12,%f8
+/* 0x015c */ fsubd %f6,%f8,%f20
+/* 0x0160 242 */ ble,pt %icc,.L900000650
+/* 0x0164 */ sllx %g5,3,%g2
+/* 0x0168 0 */ st %o1,[%sp+2223]
+/* 0x016c 246 */ ldd [%i5],%f6
+ .L900000651:
+/* 0x0170 246 */ sra %g4,0,%g2
+/* 0x0174 */ fmuld %f6,%f20,%f6
+/* 0x0178 */ ldd [%i3],%f12
+/* 0x017c */ sllx %g2,3,%g2
+/* 0x0180 */ ldd [%i4],%f8
+/* 0x0184 250 */ cmp %l0,1
+/* 0x0188 246 */ ldd [%l1+%g2],%f10
+/* 0x018c 244 */ sra %i2,0,%g2
+/* 0x0190 */ add %i2,1,%i0
+/* 0x0194 246 */ faddd %f12,%f6,%f6
+/* 0x0198 */ ldd [%o5],%f12
+/* 0x019c 244 */ sllx %g2,3,%g2
+/* 0x01a0 246 */ fmuld %f8,%f10,%f8
+/* 0x01a4 */ ldd [%i3+8],%f10
+/* 0x01a8 */ srl %i2,31,%o3
+/* 0x01ac 244 */ ldd [%l1+%g2],%f18
+/* 0x01b0 0 */ or %g0,1,%l5
+/* 0x01b4 236 */ or %g0,2,%g2
+/* 0x01b8 246 */ fmuld %f6,%f12,%f6
+/* 0x01bc 250 */ or %g0,32,%o1
+/* 0x01c0 */ or %g0,48,%o2
+/* 0x01c4 246 */ faddd %f10,%f8,%f8
+/* 0x01c8 */ faddd %f8,%f6,%f16
+/* 0x01cc 250 */ ble,pn %icc,.L77000213
+/* 0x01d0 */ std %f16,[%i3+8]
+/* 0x01d4 */ cmp %i1,8
+/* 0x01d8 */ sub %l0,3,%o3
+/* 0x01dc */ bl,pn %icc,.L77000284
+/* 0x01e0 */ or %g0,8,%o0
+/* 0x01e4 252 */ ldd [%i5+8],%f0
+/* 0x01e8 */ or %g0,6,%l5
+/* 0x01ec */ ldd [%i4+8],%f2
+/* 0x01f0 */ or %g0,4,%g2
+/* 0x01f4 250 */ or %g0,40,%o0
+/* 0x01f8 252 */ ldd [%i5+16],%f8
+/* 0x01fc */ fmuld %f0,%f20,%f10
+/* 0x0200 */ ldd [%i4+16],%f4
+/* 0x0204 */ fmuld %f2,%f18,%f2
+/* 0x0208 */ ldd [%i3+16],%f0
+/* 0x020c */ fmuld %f8,%f20,%f12
+/* 0x0210 */ ldd [%i4+24],%f6
+/* 0x0214 */ fmuld %f4,%f18,%f4
+/* 0x0218 */ ldd [%i5+24],%f8
+/* 0x021c */ faddd %f2,%f10,%f2
+/* 0x0220 */ ldd [%i4+32],%f14
+/* 0x0224 */ fmuld %f6,%f18,%f10
+/* 0x0228 */ ldd [%i5+32],%f6
+/* 0x022c */ faddd %f4,%f12,%f4
+/* 0x0230 */ ldd [%i4+40],%f12
+/* 0x0234 */ faddd %f0,%f2,%f0
+/* 0x0238 */ std %f0,[%i3+16]
+/* 0x023c */ ldd [%i3+32],%f0
+/* 0x0240 */ ldd [%i3+48],%f2
+ .L900000639:
+/* 0x0244 */ add %o2,16,%l6
+/* 0x0248 252 */ ldd [%i5+%o0],%f22
+/* 0x024c */ add %l5,3,%l5
+/* 0x0250 */ fmuld %f8,%f20,%f8
+/* 0x0254 250 */ add %o0,8,%o0
+/* 0x0258 252 */ ldd [%l6+%i3],%f26
+/* 0x025c */ cmp %l5,%o3
+/* 0x0260 */ ldd [%i4+%o0],%f24
+/* 0x0264 */ faddd %f0,%f4,%f0
+/* 0x0268 */ add %g2,6,%g2
+/* 0x026c */ faddd %f10,%f8,%f10
+/* 0x0270 */ fmuld %f14,%f18,%f4
+/* 0x0274 */ std %f0,[%o1+%i3]
+/* 0x0278 250 */ add %o2,32,%o1
+/* 0x027c 252 */ ldd [%i5+%o0],%f8
+/* 0x0280 */ fmuld %f6,%f20,%f6
+/* 0x0284 250 */ add %o0,8,%o0
+/* 0x0288 252 */ ldd [%o1+%i3],%f0
+/* 0x028c */ ldd [%i4+%o0],%f14
+/* 0x0290 */ faddd %f2,%f10,%f2
+/* 0x0294 */ faddd %f4,%f6,%f10
+/* 0x0298 */ fmuld %f12,%f18,%f4
+/* 0x029c */ std %f2,[%o2+%i3]
+/* 0x02a0 250 */ add %o2,48,%o2
+/* 0x02a4 252 */ ldd [%i5+%o0],%f6
+/* 0x02a8 */ fmuld %f22,%f20,%f22
+/* 0x02ac 250 */ add %o0,8,%o0
+/* 0x02b0 252 */ ldd [%o2+%i3],%f2
+/* 0x02b4 */ ldd [%i4+%o0],%f12
+/* 0x02b8 */ faddd %f26,%f10,%f10
+/* 0x02bc */ std %f10,[%l6+%i3]
+/* 0x02c0 */ fmuld %f24,%f18,%f10
+/* 0x02c4 */ ble,pt %icc,.L900000639
+/* 0x02c8 */ faddd %f4,%f22,%f4
+ .L900000642:
+/* 0x02cc 252 */ fmuld %f8,%f20,%f24
+/* 0x02d0 */ faddd %f0,%f4,%f8
+/* 0x02d4 250 */ add %o2,16,%o3
+/* 0x02d8 252 */ ldd [%o3+%i3],%f4
+/* 0x02dc */ fmuld %f14,%f18,%f0
+/* 0x02e0 */ cmp %l5,%i1
+/* 0x02e4 */ std %f8,[%o1+%i3]
+/* 0x02e8 */ fmuld %f12,%f18,%f8
+/* 0x02ec 250 */ add %o2,32,%o1
+/* 0x02f0 252 */ faddd %f10,%f24,%f12
+/* 0x02f4 */ ldd [%i5+%o0],%f22
+/* 0x02f8 */ fmuld %f6,%f20,%f6
+/* 0x02fc */ add %g2,8,%g2
+/* 0x0300 */ fmuld %f22,%f20,%f10
+/* 0x0304 */ faddd %f2,%f12,%f2
+/* 0x0308 */ faddd %f0,%f6,%f6
+/* 0x030c */ ldd [%o1+%i3],%f0
+/* 0x0310 */ std %f2,[%o2+%i3]
+/* 0x0314 */ faddd %f8,%f10,%f2
+/* 0x0318 */ sra %l5,0,%o2
+/* 0x031c */ sllx %o2,3,%o0
+/* 0x0320 */ faddd %f4,%f6,%f4
+/* 0x0324 */ std %f4,[%o3+%i3]
+/* 0x0328 */ faddd %f0,%f2,%f0
+/* 0x032c */ std %f0,[%o1+%i3]
+/* 0x0330 */ bg,a,pn %icc,.L77000213
+/* 0x0334 */ srl %i2,31,%o3
+ .L77000284:
+/* 0x0338 252 */ ldd [%i4+%o0],%f2
+ .L900000655:
+/* 0x033c 252 */ ldd [%i5+%o0],%f0
+/* 0x0340 */ fmuld %f2,%f18,%f2
+/* 0x0344 */ sra %g2,0,%o0
+/* 0x0348 */ sllx %o0,3,%o1
+/* 0x034c */ add %l5,1,%l5
+/* 0x0350 */ fmuld %f0,%f20,%f4
+/* 0x0354 */ ldd [%o1+%i3],%f0
+/* 0x0358 */ sra %l5,0,%o2
+/* 0x035c */ sllx %o2,3,%o0
+/* 0x0360 */ add %g2,2,%g2
+/* 0x0364 */ cmp %l5,%i1
+/* 0x0368 */ faddd %f2,%f4,%f2
+/* 0x036c */ faddd %f0,%f2,%f0
+/* 0x0370 */ std %f0,[%o1+%i3]
+/* 0x0374 */ ble,a,pt %icc,.L900000655
+/* 0x0378 */ ldd [%i4+%o0],%f2
+ .L900000626:
+/* 0x037c */ srl %i2,31,%o3
+/* 0x0380 252 */ ba .L900000654
+/* 0x0384 */ cmp %g3,30
+ .L77000213:
+/* 0x0388 254 */ cmp %g3,30
+ .L900000654:
+/* 0x038c */ add %i2,%o3,%o0
+/* 0x0390 254 */ bne,a,pt %icc,.L900000653
+/* 0x0394 */ fdtox %f16,%f0
+/* 0x0398 281 */ sra %o0,1,%g2
+/* 0x039c */ add %g2,1,%g2
+/* 0x03a0 */ ldd [%o7],%f0
+/* 0x03a4 */ sll %g2,1,%o1
+/* 0x03a8 */ sll %g1,1,%g2
+/* 0x03ac */ or %g0,%o1,%o2
+/* 0x03b0 */ fmovd %f0,%f2
+/* 0x03b4 */ or %g0,%g2,%o0
+/* 0x03b8 */ cmp %o1,%o0
+/* 0x03bc */ sub %g2,1,%o0
+/* 0x03c0 */ bge,pt %icc,.L77000215
+/* 0x03c4 */ or %g0,0,%g3
+/* 0x03c8 254 */ add %o1,1,%o1
+/* 0x03cc 281 */ sra %o2,0,%g2
+ .L900000652:
+/* 0x03d0 */ sllx %g2,3,%g2
+/* 0x03d4 */ ldd [%o7],%f6
+/* 0x03d8 */ add %o2,2,%o2
+/* 0x03dc */ sra %o1,0,%g3
+/* 0x03e0 */ ldd [%g2+%l4],%f8
+/* 0x03e4 */ cmp %o2,%o0
+/* 0x03e8 */ sllx %g3,3,%g3
+/* 0x03ec */ add %o1,2,%o1
+/* 0x03f0 */ ldd [%l4+%g3],%f10
+/* 0x03f4 */ fdtox %f8,%f12
+/* 0x03f8 */ fdtox %f10,%f4
+/* 0x03fc */ fmovd %f12,%f8
+/* 0x0400 */ fmovs %f6,%f12
+/* 0x0404 */ fmovs %f6,%f4
+/* 0x0408 */ fxtod %f12,%f6
+/* 0x040c */ fxtod %f4,%f12
+/* 0x0410 */ fdtox %f10,%f4
+/* 0x0414 */ faddd %f6,%f2,%f6
+/* 0x0418 */ std %f6,[%g2+%l4]
+/* 0x041c */ faddd %f12,%f0,%f6
+/* 0x0420 */ std %f6,[%l4+%g3]
+/* 0x0424 */ fitod %f8,%f2
+/* 0x0428 */ fitod %f4,%f0
+/* 0x042c */ ble,pt %icc,.L900000652
+/* 0x0430 */ sra %o2,0,%g2
+ .L77000233:
+/* 0x0434 */ or %g0,0,%g3
+ .L77000215:
+/* 0x0438 */ fdtox %f16,%f0
+ .L900000653:
+/* 0x043c 256 */ ldd [%o7],%f6
+/* 0x0440 */ add %g4,1,%g4
+/* 0x0444 */ or %g0,%i0,%i2
+/* 0x0448 */ ldd [%o5],%f8
+/* 0x044c */ add %g3,1,%g3
+/* 0x0450 */ add %i3,8,%i3
+/* 0x0454 */ fmovs %f6,%f0
+/* 0x0458 */ ldd [%o4],%f10
+/* 0x045c */ ld [%sp+2223],%o0
+/* 0x0460 */ fxtod %f0,%f6
+/* 0x0464 */ cmp %i0,%o0
+/* 0x0468 */ fmuld %f6,%f30,%f6
+/* 0x046c */ fmuld %f6,%f8,%f8
+/* 0x0470 */ fdtox %f8,%f8
+/* 0x0474 */ fxtod %f8,%f8
+/* 0x0478 */ fmuld %f8,%f10,%f8
+/* 0x047c */ fsubd %f6,%f8,%f20
+/* 0x0480 */ ble,a,pt %icc,.L900000651
+/* 0x0484 */ ldd [%i5],%f6
+ .L900000625:
+/* 0x0488 256 */ ba .L900000650
+/* 0x048c */ sllx %g5,3,%g2
+ .L77000279:
+/* 0x0490 261 */ ldd [%i1],%f4
+/* 0x0494 */ ldd [%i2],%f6
+/* 0x0498 273 */ std %f0,[%i3+8]
+/* 0x049c */ std %f0,[%i3+16]
+/* 0x04a0 261 */ fmuld %f4,%f6,%f6
+/* 0x04a4 */ std %f6,[%i3]
+/* 0x04a8 273 */ std %f0,[%i3+24]
+/* 0x04ac */ std %f0,[%i3+32]
+/* 0x04b0 */ fdtox %f6,%f2
+/* 0x04b4 */ std %f0,[%i3+40]
+/* 0x04b8 */ std %f0,[%i3+48]
+/* 0x04bc */ std %f0,[%i3+56]
+/* 0x04c0 */ std %f0,[%i3+64]
+/* 0x04c4 */ fmovs %f0,%f2
+/* 0x04c8 */ std %f0,[%i3+72]
+/* 0x04cc */ std %f0,[%i3+80]
+/* 0x04d0 */ std %f0,[%i3+88]
+/* 0x04d4 */ std %f0,[%i3+96]
+/* 0x04d8 */ std %f0,[%i3+104]
+/* 0x04dc */ std %f0,[%i3+112]
+/* 0x04e0 */ std %f0,[%i3+120]
+/* 0x04e4 */ std %f0,[%i3+128]
+/* 0x04e8 */ std %f0,[%i3+136]
+/* 0x04ec */ std %f0,[%i3+144]
+/* 0x04f0 */ std %f0,[%i3+152]
+/* 0x04f4 */ std %f0,[%i3+160]
+/* 0x04f8 */ std %f0,[%i3+168]
+/* 0x04fc */ fxtod %f2,%f6
+/* 0x0500 */ std %f0,[%i3+176]
+/* 0x0504 281 */ or %g0,1,%o2
+/* 0x0508 273 */ std %f0,[%i3+184]
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+/* 0x050c 286 */ sra %o2,0,%g2
+/* 0x0510 279 */ or %g0,%i3,%o3
+/* 0x0514 273 */ std %f0,[%i3+192]
+/* 0x0518 278 */ fmuld %f6,%f14,%f6
+/* 0x051c 281 */ or %g0,0,%g1
+/* 0x0520 273 */ std %f0,[%i3+200]
+/* 0x0524 */ std %f0,[%i3+208]
+/* 0x0528 */ std %f0,[%i3+216]
+/* 0x052c */ std %f0,[%i3+224]
+/* 0x0530 */ std %f0,[%i3+232]
+/* 0x0534 */ std %f0,[%i3+240]
+/* 0x0538 */ std %f0,[%i3+248]
+/* 0x053c */ std %f0,[%i3+256]
+/* 0x0540 */ std %f0,[%i3+264]
+/* 0x0544 */ std %f0,[%i3+272]
+/* 0x0548 */ std %f0,[%i3+280]
+/* 0x054c */ std %f0,[%i3+288]
+/* 0x0550 */ std %f0,[%i3+296]
+/* 0x0554 */ std %f0,[%i3+304]
+/* 0x0558 */ std %f0,[%i3+312]
+/* 0x055c */ std %f0,[%i3+320]
+/* 0x0560 */ std %f0,[%i3+328]
+/* 0x0564 */ std %f0,[%i3+336]
+/* 0x0568 */ std %f0,[%i3+344]
+/* 0x056c */ std %f0,[%i3+352]
+/* 0x0570 */ std %f0,[%i3+360]
+/* 0x0574 */ std %f0,[%i3+368]
+/* 0x0578 */ std %f0,[%i3+376]
+/* 0x057c */ std %f0,[%i3+384]
+/* 0x0580 */ std %f0,[%i3+392]
+/* 0x0584 */ std %f0,[%i3+400]
+/* 0x0588 */ std %f0,[%i3+408]
+/* 0x058c */ std %f0,[%i3+416]
+/* 0x0590 */ std %f0,[%i3+424]
+/* 0x0594 */ std %f0,[%i3+432]
+/* 0x0598 */ std %f0,[%i3+440]
+/* 0x059c */ std %f0,[%i3+448]
+/* 0x05a0 */ std %f0,[%i3+456]
+/* 0x05a4 */ std %f0,[%i3+464]
+/* 0x05a8 */ std %f0,[%i3+472]
+/* 0x05ac */ std %f0,[%i3+480]
+/* 0x05b0 */ std %f0,[%i3+488]
+/* 0x05b4 */ std %f0,[%i3+496]
+/* 0x05b8 278 */ ldd [%o5],%f8
+/* 0x05bc */ ldd [%o4],%f10
+/* 0x05c0 */ fmuld %f6,%f8,%f8
+/* 0x05c4 273 */ std %f0,[%i3+504]
+/* 0x05c8 */ std %f0,[%i3+512]
+/* 0x05cc */ std %f0,[%i3+520]
+/* 0x05d0 */ fdtox %f8,%f8
+/* 0x05d4 275 */ ldd [%o0],%f0
+/* 0x05d8 */ fxtod %f8,%f8
+/* 0x05dc */ fmuld %f8,%f10,%f8
+/* 0x05e0 */ fsubd %f6,%f8,%f2
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f30,%f18 ! hand modified
+ ldd [%o0],%f2
+ ldd [%o3],%f8
+ ldd [%i1],%f10
+ ldd [%o5],%f14 ! hand modified
+ ldd [%o4],%f16 ! hand modified
+ ldd [%i2],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%o0+8],%f28
+ ldd [%o0+104],%f38
+ ldd [%o0+112],%f60
+
+ .L99999999: !1
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%o0+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%o0+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i2+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i2,8,%i2
+ ldd [%o0+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o3+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%o0+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o3+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o3+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o3+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o3+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%o0+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o3+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o3+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o3+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o3+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o3+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o3+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%o0+120],%f44 !24
+ ldd [%o3+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%o0+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%o0+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o3+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o3+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%o0+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o3+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o3+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o3+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%o0+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o3+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o3+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o3+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%o0+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%o0+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o3+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o3+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o3+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o3,8,%o3
+ faddd %f42,%f44,%f42 !47
+ ldd [%o3+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o3+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o3+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o3+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o3+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %g1,1,%g1
+ std %f42,[%o3+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %g1,31
+ std %f48,[%o3+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o3+224-8] !55
+ std %f8,[%o3]
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x0844 315 */ sllx %g5,3,%g2
+ .L900000650:
+/* 0x0848 315 */ ldd [%g2+%l4],%f2
+/* 0x084c */ add %l4,%g2,%o0
+/* 0x0850 */ or %g0,0,%g1
+/* 0x0854 */ ldd [%o0+8],%f4
+/* 0x0858 */ or %g0,0,%i2
+/* 0x085c */ cmp %l0,0
+/* 0x0860 */ fdtox %f2,%f2
+/* 0x0864 */ std %f2,[%sp+2255]
+/* 0x0868 311 */ sethi %hi(0xfc00),%o3
+/* 0x086c 315 */ fdtox %f4,%f2
+/* 0x0870 */ std %f2,[%sp+2247]
+/* 0x0874 311 */ or %g0,-1,%o2
+/* 0x0878 */ srl %o2,0,%o5
+/* 0x087c */ or %g0,2,%g5
+/* 0x0880 */ sub %l0,1,%g3
+/* 0x0884 */ or %g0,%o0,%o7
+/* 0x0888 */ add %o3,1023,%o4
+/* 0x088c 315 */ or %g0,64,%o3
+/* 0x0890 */ ldx [%sp+2255],%i0
+/* 0x0894 */ sub %l0,2,%o1
+/* 0x0898 */ ldx [%sp+2247],%i1
+/* 0x089c */ ble,pt %icc,.L900000648
+/* 0x08a0 */ sethi %hi(0xfc00),%g2
+/* 0x08a4 */ cmp %l0,6
+/* 0x08a8 */ and %i0,%o5,%o2
+/* 0x08ac */ bl,pn %icc,.L77000287
+/* 0x08b0 */ or %g0,3,%g4
+/* 0x08b4 */ ldd [%o7+16],%f0
+/* 0x08b8 */ and %i1,%o4,%i3
+/* 0x08bc */ sllx %i3,16,%o0
+/* 0x08c0 */ or %g0,5,%g4
+/* 0x08c4 */ srax %i1,16,%i4
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+2239]
+/* 0x08d0 */ srax %i0,32,%i1
+/* 0x08d4 */ add %o2,%o0,%i5
+/* 0x08d8 */ ldd [%o7+24],%f0
+/* 0x08dc */ and %i5,%o5,%l1
+/* 0x08e0 */ or %g0,72,%o2
+/* 0x08e4 */ or %g0,4,%o0
+/* 0x08e8 */ or %g0,4,%g5
+/* 0x08ec */ ldx [%sp+2239],%g1
+/* 0x08f0 */ fdtox %f0,%f0
+/* 0x08f4 */ or %g0,4,%i2
+/* 0x08f8 */ std %f0,[%sp+2231]
+/* 0x08fc */ ldd [%o7+40],%f2
+/* 0x0900 */ and %g1,%o5,%i3
+/* 0x0904 */ ldd [%o7+32],%f0
+/* 0x0908 */ srax %g1,32,%g1
+/* 0x090c */ ldd [%o7+56],%f4
+/* 0x0910 */ fdtox %f2,%f2
+/* 0x0914 */ ldx [%sp+2231],%g2
+/* 0x0918 */ fdtox %f0,%f0
+/* 0x091c */ st %l1,[%l2]
+/* 0x0920 */ srax %i5,32,%l1
+/* 0x0924 */ fdtox %f4,%f4
+/* 0x0928 */ std %f2,[%sp+2231]
+/* 0x092c */ and %g2,%o4,%i5
+/* 0x0930 */ add %i4,%l1,%i4
+/* 0x0934 */ std %f0,[%sp+2239]
+/* 0x0938 */ sllx %i5,16,%i0
+/* 0x093c */ add %i1,%i4,%i1
+/* 0x0940 */ ldd [%o7+48],%f2
+/* 0x0944 */ srax %g2,16,%g2
+/* 0x0948 */ add %i3,%i0,%i0
+/* 0x094c */ ldd [%o7+72],%f0
+/* 0x0950 */ add %i0,%i1,%i3
+/* 0x0954 */ srax %i3,32,%i4
+/* 0x0958 */ fdtox %f2,%f2
+/* 0x095c */ and %i3,%o5,%i3
+/* 0x0960 */ ldx [%sp+2231],%i1
+/* 0x0964 */ add %g2,%i4,%g2
+/* 0x0968 */ ldx [%sp+2239],%i0
+/* 0x096c */ add %g1,%g2,%g1
+/* 0x0970 */ std %f2,[%sp+2239]
+/* 0x0974 */ std %f4,[%sp+2231]
+/* 0x0978 */ ldd [%o7+64],%f2
+/* 0x097c */ st %i3,[%l2+4]
+ .L900000631:
+/* 0x0980 */ ldx [%sp+2231],%i3
+/* 0x0984 */ add %i2,2,%i2
+/* 0x0988 */ add %g4,4,%g4
+/* 0x098c */ ldx [%sp+2239],%i5
+/* 0x0990 */ add %o2,16,%o2
+/* 0x0994 */ and %i1,%o4,%g2
+/* 0x0998 */ sllx %g2,16,%i4
+/* 0x099c */ and %i0,%o5,%g2
+/* 0x09a0 */ ldd [%o7+%o2],%f4
+/* 0x09a4 */ fdtox %f0,%f0
+/* 0x09a8 */ std %f0,[%sp+2231]
+/* 0x09ac */ srax %i1,16,%i1
+/* 0x09b0 */ add %g2,%i4,%g2
+/* 0x09b4 */ fdtox %f2,%f0
+/* 0x09b8 */ add %o3,16,%o3
+/* 0x09bc */ std %f0,[%sp+2239]
+/* 0x09c0 */ add %g2,%g1,%g1
+/* 0x09c4 */ ldd [%o7+%o3],%f2
+/* 0x09c8 */ srax %g1,32,%i4
+/* 0x09cc */ cmp %i2,%o1
+/* 0x09d0 */ srax %i0,32,%g2
+/* 0x09d4 */ add %i1,%i4,%i0
+/* 0x09d8 */ add %g2,%i0,%i4
+/* 0x09dc */ add %o0,4,%o0
+/* 0x09e0 */ and %g1,%o5,%g2
+/* 0x09e4 */ or %g0,%i5,%g1
+/* 0x09e8 */ st %g2,[%l2+%o0]
+/* 0x09ec */ add %g5,4,%g5
+/* 0x09f0 */ ldx [%sp+2231],%i1
+/* 0x09f4 */ ldx [%sp+2239],%i0
+/* 0x09f8 */ add %o2,16,%o2
+/* 0x09fc */ and %i3,%o4,%g2
+/* 0x0a00 */ sllx %g2,16,%i5
+/* 0x0a04 */ and %g1,%o5,%g2
+/* 0x0a08 */ ldd [%o7+%o2],%f0
+/* 0x0a0c */ fdtox %f4,%f4
+/* 0x0a10 */ std %f4,[%sp+2231]
+/* 0x0a14 */ srax %i3,16,%i3
+/* 0x0a18 */ add %g2,%i5,%g2
+/* 0x0a1c */ fdtox %f2,%f2
+/* 0x0a20 */ add %o3,16,%o3
+/* 0x0a24 */ std %f2,[%sp+2239]
+/* 0x0a28 */ add %g2,%i4,%g2
+/* 0x0a2c */ ldd [%o7+%o3],%f2
+/* 0x0a30 */ srax %g2,32,%i4
+/* 0x0a34 */ srax %g1,32,%g1
+/* 0x0a38 */ add %i3,%i4,%i3
+/* 0x0a3c */ add %g1,%i3,%g1
+/* 0x0a40 */ add %o0,4,%o0
+/* 0x0a44 */ and %g2,%o5,%g2
+/* 0x0a48 */ ble,pt %icc,.L900000631
+/* 0x0a4c */ st %g2,[%l2+%o0]
+ .L900000634:
+/* 0x0a50 */ srax %i1,16,%i5
+/* 0x0a54 */ ldx [%sp+2231],%o1
+/* 0x0a58 */ and %i1,%o4,%i3
+/* 0x0a5c */ sllx %i3,16,%i3
+/* 0x0a60 */ ldx [%sp+2239],%i4
+/* 0x0a64 */ and %i0,%o5,%g2
+/* 0x0a68 */ add %g2,%i3,%g2
+/* 0x0a6c */ and %o1,%o4,%i3
+/* 0x0a70 */ fdtox %f0,%f4
+/* 0x0a74 */ sllx %i3,16,%i3
+/* 0x0a78 */ std %f4,[%sp+2231]
+/* 0x0a7c */ add %g2,%g1,%g2
+/* 0x0a80 */ srax %g2,32,%l1
+/* 0x0a84 */ and %i4,%o5,%i1
+/* 0x0a88 */ fdtox %f2,%f0
+/* 0x0a8c */ srax %i0,32,%g1
+/* 0x0a90 */ std %f0,[%sp+2239]
+/* 0x0a94 */ add %i5,%l1,%i0
+/* 0x0a98 */ srax %o1,16,%o1
+/* 0x0a9c */ add %g1,%i0,%i0
+/* 0x0aa0 */ add %o0,4,%g1
+/* 0x0aa4 */ add %i1,%i3,%o0
+/* 0x0aa8 */ and %g2,%o5,%g2
+/* 0x0aac */ st %g2,[%l2+%g1]
+/* 0x0ab0 */ add %o0,%i0,%o0
+/* 0x0ab4 */ srax %o0,32,%i3
+/* 0x0ab8 */ ldx [%sp+2231],%i1
+/* 0x0abc */ add %g1,4,%g1
+/* 0x0ac0 */ ldx [%sp+2239],%i0
+/* 0x0ac4 */ and %o0,%o5,%g2
+/* 0x0ac8 */ add %o1,%i3,%o1
+/* 0x0acc */ srax %i4,32,%o0
+/* 0x0ad0 */ cmp %i2,%g3
+/* 0x0ad4 */ st %g2,[%l2+%g1]
+/* 0x0ad8 */ bg,pn %icc,.L77000236
+/* 0x0adc */ add %o0,%o1,%g1
+/* 0x0ae0 */ add %g4,6,%g4
+/* 0x0ae4 */ add %g5,6,%g5
+ .L77000287:
+/* 0x0ae8 */ sra %g5,0,%o1
+ .L900000647:
+/* 0x0aec */ sllx %o1,3,%o2
+/* 0x0af0 */ and %i0,%o5,%o0
+/* 0x0af4 */ ldd [%o7+%o2],%f0
+/* 0x0af8 */ sra %g4,0,%o2
+/* 0x0afc */ and %i1,%o4,%o1
+/* 0x0b00 */ sllx %o2,3,%o2
+/* 0x0b04 */ add %g1,%o0,%o0
+/* 0x0b08 */ fdtox %f0,%f0
+/* 0x0b0c */ std %f0,[%sp+2239]
+/* 0x0b10 */ sllx %o1,16,%o1
+/* 0x0b14 */ add %o0,%o1,%o1
+/* 0x0b18 */ add %g5,2,%g5
+/* 0x0b1c */ ldd [%o7+%o2],%f0
+/* 0x0b20 */ srax %o1,32,%g1
+/* 0x0b24 */ and %o1,%o5,%o2
+/* 0x0b28 */ srax %i1,16,%o0
+/* 0x0b2c */ add %g4,2,%g4
+/* 0x0b30 */ fdtox %f0,%f0
+/* 0x0b34 */ std %f0,[%sp+2231]
+/* 0x0b38 */ sra %i2,0,%o1
+/* 0x0b3c */ sllx %o1,2,%o1
+/* 0x0b40 */ add %o0,%g1,%g2
+/* 0x0b44 */ srax %i0,32,%g1
+/* 0x0b48 */ add %i2,1,%i2
+/* 0x0b4c */ add %g1,%g2,%g1
+/* 0x0b50 */ cmp %i2,%g3
+/* 0x0b54 */ ldx [%sp+2239],%o3
+/* 0x0b58 */ ldx [%sp+2231],%i1
+/* 0x0b5c */ st %o2,[%l2+%o1]
+/* 0x0b60 */ or %g0,%o3,%i0
+/* 0x0b64 */ ble,pt %icc,.L900000647
+/* 0x0b68 */ sra %g5,0,%o1
+ .L77000236:
+/* 0x0b6c */ sethi %hi(0xfc00),%g2
+ .L900000648:
+/* 0x0b70 */ or %g0,-1,%o0
+/* 0x0b74 */ add %g2,1023,%g2
+/* 0x0b78 */ srl %o0,0,%g3
+/* 0x0b7c */ and %i1,%g2,%g2
+/* 0x0b80 */ and %i0,%g3,%g4
+/* 0x0b84 */ sllx %g2,16,%g2
+/* 0x0b88 */ add %g1,%g4,%g4
+/* 0x0b8c */ sra %i2,0,%g5
+/* 0x0b90 */ add %g4,%g2,%g4
+/* 0x0b94 */ sllx %g5,2,%g2
+/* 0x0b98 */ and %g4,%g3,%g3
+/* 0x0b9c */ st %g3,[%l2+%g2]
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x0ba0 317 */ sra %l0,0,%g4
+/* 0x0ba4 */ sllx %g4,2,%g2
+/* 0x0ba8 */ ld [%l2+%g2],%g2
+/* 0x0bac */ cmp %g2,0
+/* 0x0bb0 */ bleu,pn %icc,.L77000241
+/* 0x0bb4 */ or %g0,-1,%o1
+/* 0x0bb8 */ ba .L900000646
+/* 0x0bbc */ cmp %o1,0
+ .L77000241:
+/* 0x0bc0 */ sub %l0,1,%o1
+/* 0x0bc4 */ cmp %o1,0
+/* 0x0bc8 */ bl,pn %icc,.L77000244
+/* 0x0bcc */ sra %o1,0,%g2
+ .L900000645:
+/* 0x0bd0 */ sllx %g2,2,%g2
+/* 0x0bd4 */ sub %o1,1,%o0
+/* 0x0bd8 */ ld [%l3+%g2],%g3
+/* 0x0bdc */ ld [%l2+%g2],%g2
+/* 0x0be0 */ cmp %g2,%g3
+/* 0x0be4 */ bne,pn %icc,.L77000244
+/* 0x0be8 */ nop
+/* 0x0bec 0 */ or %g0,%o0,%o1
+/* 0x0bf0 317 */ cmp %o0,0
+/* 0x0bf4 */ bge,pt %icc,.L900000645
+/* 0x0bf8 */ sra %o1,0,%g2
+ .L77000244:
+/* 0x0bfc */ cmp %o1,0
+ .L900000646:
+/* 0x0c00 */ bl,pn %icc,.L77000288
+/* 0x0c04 */ sra %o1,0,%g2
+/* 0x0c08 */ sllx %g2,2,%g2
+/* 0x0c0c */ ld [%l3+%g2],%g3
+/* 0x0c10 */ ld [%l2+%g2],%g2
+/* 0x0c14 */ cmp %g2,%g3
+/* 0x0c18 */ bleu,pt %icc,.L77000224
+/* 0x0c1c */ nop
+ .L77000288:
+/* 0x0c20 */ cmp %l0,0
+/* 0x0c24 */ ble,pt %icc,.L77000224
+/* 0x0c28 */ nop
+/* 0x0c2c 317 */ or %g0,-1,%g2
+/* 0x0c30 315 */ or %g0,0,%i0
+/* 0x0c34 317 */ srl %g2,0,%g2
+/* 0x0c38 315 */ or %g0,0,%g4
+/* 0x0c3c */ or %g0,0,%o1
+/* 0x0c40 317 */ sub %l0,1,%g5
+/* 0x0c44 */ cmp %l0,9
+/* 0x0c48 315 */ or %g0,8,%o5
+/* 0x0c4c */ bl,pn %icc,.L77000289
+/* 0x0c50 */ sub %l0,4,%o7
+/* 0x0c54 */ ld [%l2],%o1
+/* 0x0c58 */ or %g0,5,%i0
+/* 0x0c5c */ ld [%l3],%o2
+/* 0x0c60 */ or %g0,12,%o4
+/* 0x0c64 */ or %g0,16,%g1
+/* 0x0c68 */ ld [%l3+4],%o3
+/* 0x0c6c */ ld [%l2+4],%o0
+/* 0x0c70 */ sub %o1,%o2,%o1
+/* 0x0c74 */ ld [%l3+8],%i1
+/* 0x0c78 */ and %o1,%g2,%g4
+/* 0x0c7c */ st %g4,[%l2]
+/* 0x0c80 */ srax %o1,32,%g4
+/* 0x0c84 */ sub %o0,%o3,%o0
+/* 0x0c88 */ ld [%l3+12],%o2
+/* 0x0c8c */ add %o0,%g4,%o0
+/* 0x0c90 */ and %o0,%g2,%g4
+/* 0x0c94 */ st %g4,[%l2+4]
+/* 0x0c98 */ srax %o0,32,%o0
+/* 0x0c9c */ ld [%l2+8],%o1
+/* 0x0ca0 */ ld [%l2+12],%o3
+/* 0x0ca4 */ sub %o1,%i1,%o1
+ .L900000635:
+/* 0x0ca8 */ add %g1,4,%g3
+/* 0x0cac */ ld [%g1+%l2],%g4
+/* 0x0cb0 */ add %o1,%o0,%o0
+/* 0x0cb4 */ ld [%l3+%g1],%i1
+/* 0x0cb8 */ sub %o3,%o2,%o1
+/* 0x0cbc */ and %o0,%g2,%o2
+/* 0x0cc0 */ st %o2,[%o5+%l2]
+/* 0x0cc4 */ srax %o0,32,%o2
+/* 0x0cc8 */ add %i0,4,%i0
+/* 0x0ccc */ add %g1,8,%o5
+/* 0x0cd0 */ ld [%g3+%l2],%o0
+/* 0x0cd4 */ add %o1,%o2,%o1
+/* 0x0cd8 */ ld [%l3+%g3],%o3
+/* 0x0cdc */ sub %g4,%i1,%o2
+/* 0x0ce0 */ and %o1,%g2,%g4
+/* 0x0ce4 */ st %g4,[%o4+%l2]
+/* 0x0ce8 */ srax %o1,32,%g4
+/* 0x0cec */ cmp %i0,%o7
+/* 0x0cf0 */ add %g1,12,%o4
+/* 0x0cf4 */ ld [%o5+%l2],%o1
+/* 0x0cf8 */ add %o2,%g4,%o2
+/* 0x0cfc */ ld [%l3+%o5],%i1
+/* 0x0d00 */ sub %o0,%o3,%o0
+/* 0x0d04 */ and %o2,%g2,%o3
+/* 0x0d08 */ st %o3,[%g1+%l2]
+/* 0x0d0c */ srax %o2,32,%g4
+/* 0x0d10 */ ld [%o4+%l2],%o3
+/* 0x0d14 */ add %g1,16,%g1
+/* 0x0d18 */ add %o0,%g4,%o0
+/* 0x0d1c */ ld [%l3+%o4],%o2
+/* 0x0d20 */ sub %o1,%i1,%o1
+/* 0x0d24 */ and %o0,%g2,%g4
+/* 0x0d28 */ st %g4,[%g3+%l2]
+/* 0x0d2c */ ble,pt %icc,.L900000635
+/* 0x0d30 */ srax %o0,32,%o0
+ .L900000638:
+/* 0x0d34 */ add %o1,%o0,%g3
+/* 0x0d38 */ sub %o3,%o2,%o1
+/* 0x0d3c */ ld [%g1+%l2],%o0
+/* 0x0d40 */ ld [%l3+%g1],%o2
+/* 0x0d44 */ srax %g3,32,%o7
+/* 0x0d48 */ and %g3,%g2,%o3
+/* 0x0d4c */ add %o1,%o7,%o1
+/* 0x0d50 */ st %o3,[%o5+%l2]
+/* 0x0d54 */ cmp %i0,%g5
+/* 0x0d58 */ sub %o0,%o2,%o0
+/* 0x0d5c */ and %o1,%g2,%o2
+/* 0x0d60 */ st %o2,[%o4+%l2]
+/* 0x0d64 */ srax %o1,32,%o1
+/* 0x0d68 */ sra %i0,0,%o2
+/* 0x0d6c */ add %o0,%o1,%o0
+/* 0x0d70 */ srax %o0,32,%g4
+/* 0x0d74 */ and %o0,%g2,%o1
+/* 0x0d78 */ st %o1,[%g1+%l2]
+/* 0x0d7c */ bg,pn %icc,.L77000224
+/* 0x0d80 */ sllx %o2,2,%o1
+ .L77000289:
+/* 0x0d84 0 */ or %g0,%o1,%g1
+ .L900000644:
+/* 0x0d88 */ ld [%o1+%l2],%o0
+/* 0x0d8c */ add %i0,1,%i0
+/* 0x0d90 */ ld [%l3+%o1],%o1
+/* 0x0d94 */ sra %i0,0,%o2
+/* 0x0d98 */ cmp %i0,%g5
+/* 0x0d9c */ add %g4,%o0,%o0
+/* 0x0da0 */ sub %o0,%o1,%o0
+/* 0x0da4 */ srax %o0,32,%g4
+/* 0x0da8 */ and %o0,%g2,%o1
+/* 0x0dac */ st %o1,[%g1+%l2]
+/* 0x0db0 */ sllx %o2,2,%o1
+/* 0x0db4 */ ble,pt %icc,.L900000644
+/* 0x0db8 */ or %g0,%o1,%g1
+ .L77000224:
+/* 0x0dbc */ ret ! Result =
+/* 0x0dc0 */ restore %g0,%g0,%g0
+/* 0x0dc4 0 */ .type mont_mulf_noconv,2
+/* 0x0dc4 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/mp_comba.c b/security/nss/lib/freebl/mpi/mp_comba.c
new file mode 100644
index 0000000000..3b4937b98a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba.c
@@ -0,0 +1,3235 @@
+/*
+ * The below file is derived from TFM v0.03.
+ * It contains code from fp_mul_comba.c and
+ * fp_sqr_comba.c, which contained the following license.
+ *
+ * Right now, the assembly in this file limits
+ * this code to AMD 64.
+ *
+ * This file is public domain.
+ */
+
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+
+#include "mpi-priv.h"
+
+/* clamp digits */
+#define mp_clamp(a) \
+ { \
+ while ((a)->used && (a)->dp[(a)->used - 1] == 0) \
+ --((a)->used); \
+ (a)->sign = (a)->used ? (a)->sign : ZPOS; \
+ }
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j */
+#define MULADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+/* sqr macros only */
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %%rax \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADD2(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDSC(i, j) \
+ __asm__( \
+ "movq %3,%%rax \n\t" \
+ "mulq %4 \n\t" \
+ "movq %%rax,%0 \n\t" \
+ "movq %%rdx,%1 \n\t" \
+ "xorq %2,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDAC(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDDB \
+ __asm__( \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ : "=&r"(c0), "=&r"(c1), "=&r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \
+ : "cc");
+
+void
+s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[8];
+
+ memcpy(at, A->dp, 4 * sizeof(mp_digit));
+ memcpy(at + 4, B->dp, 4 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[4]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[5]);
+ MULADD(at[1], at[4]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[6]);
+ MULADD(at[1], at[5]);
+ MULADD(at[2], at[4]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[7]);
+ MULADD(at[1], at[6]);
+ MULADD(at[2], at[5]);
+ MULADD(at[3], at[4]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[7]);
+ MULADD(at[2], at[6]);
+ MULADD(at[3], at[5]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[7]);
+ MULADD(at[3], at[6]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[7]);
+ COMBA_STORE(C->dp[6]);
+ COMBA_STORE2(C->dp[7]);
+ C->used = 8;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[16];
+
+ memcpy(at, A->dp, 8 * sizeof(mp_digit));
+ memcpy(at + 8, B->dp, 8 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[8]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[9]);
+ MULADD(at[1], at[8]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[10]);
+ MULADD(at[1], at[9]);
+ MULADD(at[2], at[8]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[11]);
+ MULADD(at[1], at[10]);
+ MULADD(at[2], at[9]);
+ MULADD(at[3], at[8]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[12]);
+ MULADD(at[1], at[11]);
+ MULADD(at[2], at[10]);
+ MULADD(at[3], at[9]);
+ MULADD(at[4], at[8]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[13]);
+ MULADD(at[1], at[12]);
+ MULADD(at[2], at[11]);
+ MULADD(at[3], at[10]);
+ MULADD(at[4], at[9]);
+ MULADD(at[5], at[8]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[14]);
+ MULADD(at[1], at[13]);
+ MULADD(at[2], at[12]);
+ MULADD(at[3], at[11]);
+ MULADD(at[4], at[10]);
+ MULADD(at[5], at[9]);
+ MULADD(at[6], at[8]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[15]);
+ MULADD(at[1], at[14]);
+ MULADD(at[2], at[13]);
+ MULADD(at[3], at[12]);
+ MULADD(at[4], at[11]);
+ MULADD(at[5], at[10]);
+ MULADD(at[6], at[9]);
+ MULADD(at[7], at[8]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[15]);
+ MULADD(at[2], at[14]);
+ MULADD(at[3], at[13]);
+ MULADD(at[4], at[12]);
+ MULADD(at[5], at[11]);
+ MULADD(at[6], at[10]);
+ MULADD(at[7], at[9]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[15]);
+ MULADD(at[3], at[14]);
+ MULADD(at[4], at[13]);
+ MULADD(at[5], at[12]);
+ MULADD(at[6], at[11]);
+ MULADD(at[7], at[10]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[15]);
+ MULADD(at[4], at[14]);
+ MULADD(at[5], at[13]);
+ MULADD(at[6], at[12]);
+ MULADD(at[7], at[11]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[15]);
+ MULADD(at[5], at[14]);
+ MULADD(at[6], at[13]);
+ MULADD(at[7], at[12]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[15]);
+ MULADD(at[6], at[14]);
+ MULADD(at[7], at[13]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[15]);
+ MULADD(at[7], at[14]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[15]);
+ COMBA_STORE(C->dp[14]);
+ COMBA_STORE2(C->dp[15]);
+ C->used = 16;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[32];
+
+ memcpy(at, A->dp, 16 * sizeof(mp_digit));
+ memcpy(at + 16, B->dp, 16 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[16]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[17]);
+ MULADD(at[1], at[16]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[18]);
+ MULADD(at[1], at[17]);
+ MULADD(at[2], at[16]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[19]);
+ MULADD(at[1], at[18]);
+ MULADD(at[2], at[17]);
+ MULADD(at[3], at[16]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[20]);
+ MULADD(at[1], at[19]);
+ MULADD(at[2], at[18]);
+ MULADD(at[3], at[17]);
+ MULADD(at[4], at[16]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[21]);
+ MULADD(at[1], at[20]);
+ MULADD(at[2], at[19]);
+ MULADD(at[3], at[18]);
+ MULADD(at[4], at[17]);
+ MULADD(at[5], at[16]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[22]);
+ MULADD(at[1], at[21]);
+ MULADD(at[2], at[20]);
+ MULADD(at[3], at[19]);
+ MULADD(at[4], at[18]);
+ MULADD(at[5], at[17]);
+ MULADD(at[6], at[16]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[23]);
+ MULADD(at[1], at[22]);
+ MULADD(at[2], at[21]);
+ MULADD(at[3], at[20]);
+ MULADD(at[4], at[19]);
+ MULADD(at[5], at[18]);
+ MULADD(at[6], at[17]);
+ MULADD(at[7], at[16]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[24]);
+ MULADD(at[1], at[23]);
+ MULADD(at[2], at[22]);
+ MULADD(at[3], at[21]);
+ MULADD(at[4], at[20]);
+ MULADD(at[5], at[19]);
+ MULADD(at[6], at[18]);
+ MULADD(at[7], at[17]);
+ MULADD(at[8], at[16]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[25]);
+ MULADD(at[1], at[24]);
+ MULADD(at[2], at[23]);
+ MULADD(at[3], at[22]);
+ MULADD(at[4], at[21]);
+ MULADD(at[5], at[20]);
+ MULADD(at[6], at[19]);
+ MULADD(at[7], at[18]);
+ MULADD(at[8], at[17]);
+ MULADD(at[9], at[16]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[26]);
+ MULADD(at[1], at[25]);
+ MULADD(at[2], at[24]);
+ MULADD(at[3], at[23]);
+ MULADD(at[4], at[22]);
+ MULADD(at[5], at[21]);
+ MULADD(at[6], at[20]);
+ MULADD(at[7], at[19]);
+ MULADD(at[8], at[18]);
+ MULADD(at[9], at[17]);
+ MULADD(at[10], at[16]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[27]);
+ MULADD(at[1], at[26]);
+ MULADD(at[2], at[25]);
+ MULADD(at[3], at[24]);
+ MULADD(at[4], at[23]);
+ MULADD(at[5], at[22]);
+ MULADD(at[6], at[21]);
+ MULADD(at[7], at[20]);
+ MULADD(at[8], at[19]);
+ MULADD(at[9], at[18]);
+ MULADD(at[10], at[17]);
+ MULADD(at[11], at[16]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[28]);
+ MULADD(at[1], at[27]);
+ MULADD(at[2], at[26]);
+ MULADD(at[3], at[25]);
+ MULADD(at[4], at[24]);
+ MULADD(at[5], at[23]);
+ MULADD(at[6], at[22]);
+ MULADD(at[7], at[21]);
+ MULADD(at[8], at[20]);
+ MULADD(at[9], at[19]);
+ MULADD(at[10], at[18]);
+ MULADD(at[11], at[17]);
+ MULADD(at[12], at[16]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[29]);
+ MULADD(at[1], at[28]);
+ MULADD(at[2], at[27]);
+ MULADD(at[3], at[26]);
+ MULADD(at[4], at[25]);
+ MULADD(at[5], at[24]);
+ MULADD(at[6], at[23]);
+ MULADD(at[7], at[22]);
+ MULADD(at[8], at[21]);
+ MULADD(at[9], at[20]);
+ MULADD(at[10], at[19]);
+ MULADD(at[11], at[18]);
+ MULADD(at[12], at[17]);
+ MULADD(at[13], at[16]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[30]);
+ MULADD(at[1], at[29]);
+ MULADD(at[2], at[28]);
+ MULADD(at[3], at[27]);
+ MULADD(at[4], at[26]);
+ MULADD(at[5], at[25]);
+ MULADD(at[6], at[24]);
+ MULADD(at[7], at[23]);
+ MULADD(at[8], at[22]);
+ MULADD(at[9], at[21]);
+ MULADD(at[10], at[20]);
+ MULADD(at[11], at[19]);
+ MULADD(at[12], at[18]);
+ MULADD(at[13], at[17]);
+ MULADD(at[14], at[16]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[31]);
+ MULADD(at[1], at[30]);
+ MULADD(at[2], at[29]);
+ MULADD(at[3], at[28]);
+ MULADD(at[4], at[27]);
+ MULADD(at[5], at[26]);
+ MULADD(at[6], at[25]);
+ MULADD(at[7], at[24]);
+ MULADD(at[8], at[23]);
+ MULADD(at[9], at[22]);
+ MULADD(at[10], at[21]);
+ MULADD(at[11], at[20]);
+ MULADD(at[12], at[19]);
+ MULADD(at[13], at[18]);
+ MULADD(at[14], at[17]);
+ MULADD(at[15], at[16]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[31]);
+ MULADD(at[2], at[30]);
+ MULADD(at[3], at[29]);
+ MULADD(at[4], at[28]);
+ MULADD(at[5], at[27]);
+ MULADD(at[6], at[26]);
+ MULADD(at[7], at[25]);
+ MULADD(at[8], at[24]);
+ MULADD(at[9], at[23]);
+ MULADD(at[10], at[22]);
+ MULADD(at[11], at[21]);
+ MULADD(at[12], at[20]);
+ MULADD(at[13], at[19]);
+ MULADD(at[14], at[18]);
+ MULADD(at[15], at[17]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[31]);
+ MULADD(at[3], at[30]);
+ MULADD(at[4], at[29]);
+ MULADD(at[5], at[28]);
+ MULADD(at[6], at[27]);
+ MULADD(at[7], at[26]);
+ MULADD(at[8], at[25]);
+ MULADD(at[9], at[24]);
+ MULADD(at[10], at[23]);
+ MULADD(at[11], at[22]);
+ MULADD(at[12], at[21]);
+ MULADD(at[13], at[20]);
+ MULADD(at[14], at[19]);
+ MULADD(at[15], at[18]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[31]);
+ MULADD(at[4], at[30]);
+ MULADD(at[5], at[29]);
+ MULADD(at[6], at[28]);
+ MULADD(at[7], at[27]);
+ MULADD(at[8], at[26]);
+ MULADD(at[9], at[25]);
+ MULADD(at[10], at[24]);
+ MULADD(at[11], at[23]);
+ MULADD(at[12], at[22]);
+ MULADD(at[13], at[21]);
+ MULADD(at[14], at[20]);
+ MULADD(at[15], at[19]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[31]);
+ MULADD(at[5], at[30]);
+ MULADD(at[6], at[29]);
+ MULADD(at[7], at[28]);
+ MULADD(at[8], at[27]);
+ MULADD(at[9], at[26]);
+ MULADD(at[10], at[25]);
+ MULADD(at[11], at[24]);
+ MULADD(at[12], at[23]);
+ MULADD(at[13], at[22]);
+ MULADD(at[14], at[21]);
+ MULADD(at[15], at[20]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[31]);
+ MULADD(at[6], at[30]);
+ MULADD(at[7], at[29]);
+ MULADD(at[8], at[28]);
+ MULADD(at[9], at[27]);
+ MULADD(at[10], at[26]);
+ MULADD(at[11], at[25]);
+ MULADD(at[12], at[24]);
+ MULADD(at[13], at[23]);
+ MULADD(at[14], at[22]);
+ MULADD(at[15], at[21]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[31]);
+ MULADD(at[7], at[30]);
+ MULADD(at[8], at[29]);
+ MULADD(at[9], at[28]);
+ MULADD(at[10], at[27]);
+ MULADD(at[11], at[26]);
+ MULADD(at[12], at[25]);
+ MULADD(at[13], at[24]);
+ MULADD(at[14], at[23]);
+ MULADD(at[15], at[22]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[31]);
+ MULADD(at[8], at[30]);
+ MULADD(at[9], at[29]);
+ MULADD(at[10], at[28]);
+ MULADD(at[11], at[27]);
+ MULADD(at[12], at[26]);
+ MULADD(at[13], at[25]);
+ MULADD(at[14], at[24]);
+ MULADD(at[15], at[23]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[31]);
+ MULADD(at[9], at[30]);
+ MULADD(at[10], at[29]);
+ MULADD(at[11], at[28]);
+ MULADD(at[12], at[27]);
+ MULADD(at[13], at[26]);
+ MULADD(at[14], at[25]);
+ MULADD(at[15], at[24]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[31]);
+ MULADD(at[10], at[30]);
+ MULADD(at[11], at[29]);
+ MULADD(at[12], at[28]);
+ MULADD(at[13], at[27]);
+ MULADD(at[14], at[26]);
+ MULADD(at[15], at[25]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[31]);
+ MULADD(at[11], at[30]);
+ MULADD(at[12], at[29]);
+ MULADD(at[13], at[28]);
+ MULADD(at[14], at[27]);
+ MULADD(at[15], at[26]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[31]);
+ MULADD(at[12], at[30]);
+ MULADD(at[13], at[29]);
+ MULADD(at[14], at[28]);
+ MULADD(at[15], at[27]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[31]);
+ MULADD(at[13], at[30]);
+ MULADD(at[14], at[29]);
+ MULADD(at[15], at[28]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[31]);
+ MULADD(at[14], at[30]);
+ MULADD(at[15], at[29]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[31]);
+ MULADD(at[15], at[30]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[31]);
+ COMBA_STORE(C->dp[30]);
+ COMBA_STORE2(C->dp[31]);
+ C->used = 32;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[64];
+
+ memcpy(at, A->dp, 32 * sizeof(mp_digit));
+ memcpy(at + 32, B->dp, 32 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[32]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[33]);
+ MULADD(at[1], at[32]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[34]);
+ MULADD(at[1], at[33]);
+ MULADD(at[2], at[32]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[35]);
+ MULADD(at[1], at[34]);
+ MULADD(at[2], at[33]);
+ MULADD(at[3], at[32]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[36]);
+ MULADD(at[1], at[35]);
+ MULADD(at[2], at[34]);
+ MULADD(at[3], at[33]);
+ MULADD(at[4], at[32]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[37]);
+ MULADD(at[1], at[36]);
+ MULADD(at[2], at[35]);
+ MULADD(at[3], at[34]);
+ MULADD(at[4], at[33]);
+ MULADD(at[5], at[32]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[38]);
+ MULADD(at[1], at[37]);
+ MULADD(at[2], at[36]);
+ MULADD(at[3], at[35]);
+ MULADD(at[4], at[34]);
+ MULADD(at[5], at[33]);
+ MULADD(at[6], at[32]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[39]);
+ MULADD(at[1], at[38]);
+ MULADD(at[2], at[37]);
+ MULADD(at[3], at[36]);
+ MULADD(at[4], at[35]);
+ MULADD(at[5], at[34]);
+ MULADD(at[6], at[33]);
+ MULADD(at[7], at[32]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[40]);
+ MULADD(at[1], at[39]);
+ MULADD(at[2], at[38]);
+ MULADD(at[3], at[37]);
+ MULADD(at[4], at[36]);
+ MULADD(at[5], at[35]);
+ MULADD(at[6], at[34]);
+ MULADD(at[7], at[33]);
+ MULADD(at[8], at[32]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[41]);
+ MULADD(at[1], at[40]);
+ MULADD(at[2], at[39]);
+ MULADD(at[3], at[38]);
+ MULADD(at[4], at[37]);
+ MULADD(at[5], at[36]);
+ MULADD(at[6], at[35]);
+ MULADD(at[7], at[34]);
+ MULADD(at[8], at[33]);
+ MULADD(at[9], at[32]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[42]);
+ MULADD(at[1], at[41]);
+ MULADD(at[2], at[40]);
+ MULADD(at[3], at[39]);
+ MULADD(at[4], at[38]);
+ MULADD(at[5], at[37]);
+ MULADD(at[6], at[36]);
+ MULADD(at[7], at[35]);
+ MULADD(at[8], at[34]);
+ MULADD(at[9], at[33]);
+ MULADD(at[10], at[32]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[43]);
+ MULADD(at[1], at[42]);
+ MULADD(at[2], at[41]);
+ MULADD(at[3], at[40]);
+ MULADD(at[4], at[39]);
+ MULADD(at[5], at[38]);
+ MULADD(at[6], at[37]);
+ MULADD(at[7], at[36]);
+ MULADD(at[8], at[35]);
+ MULADD(at[9], at[34]);
+ MULADD(at[10], at[33]);
+ MULADD(at[11], at[32]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[44]);
+ MULADD(at[1], at[43]);
+ MULADD(at[2], at[42]);
+ MULADD(at[3], at[41]);
+ MULADD(at[4], at[40]);
+ MULADD(at[5], at[39]);
+ MULADD(at[6], at[38]);
+ MULADD(at[7], at[37]);
+ MULADD(at[8], at[36]);
+ MULADD(at[9], at[35]);
+ MULADD(at[10], at[34]);
+ MULADD(at[11], at[33]);
+ MULADD(at[12], at[32]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[45]);
+ MULADD(at[1], at[44]);
+ MULADD(at[2], at[43]);
+ MULADD(at[3], at[42]);
+ MULADD(at[4], at[41]);
+ MULADD(at[5], at[40]);
+ MULADD(at[6], at[39]);
+ MULADD(at[7], at[38]);
+ MULADD(at[8], at[37]);
+ MULADD(at[9], at[36]);
+ MULADD(at[10], at[35]);
+ MULADD(at[11], at[34]);
+ MULADD(at[12], at[33]);
+ MULADD(at[13], at[32]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[46]);
+ MULADD(at[1], at[45]);
+ MULADD(at[2], at[44]);
+ MULADD(at[3], at[43]);
+ MULADD(at[4], at[42]);
+ MULADD(at[5], at[41]);
+ MULADD(at[6], at[40]);
+ MULADD(at[7], at[39]);
+ MULADD(at[8], at[38]);
+ MULADD(at[9], at[37]);
+ MULADD(at[10], at[36]);
+ MULADD(at[11], at[35]);
+ MULADD(at[12], at[34]);
+ MULADD(at[13], at[33]);
+ MULADD(at[14], at[32]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[47]);
+ MULADD(at[1], at[46]);
+ MULADD(at[2], at[45]);
+ MULADD(at[3], at[44]);
+ MULADD(at[4], at[43]);
+ MULADD(at[5], at[42]);
+ MULADD(at[6], at[41]);
+ MULADD(at[7], at[40]);
+ MULADD(at[8], at[39]);
+ MULADD(at[9], at[38]);
+ MULADD(at[10], at[37]);
+ MULADD(at[11], at[36]);
+ MULADD(at[12], at[35]);
+ MULADD(at[13], at[34]);
+ MULADD(at[14], at[33]);
+ MULADD(at[15], at[32]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[48]);
+ MULADD(at[1], at[47]);
+ MULADD(at[2], at[46]);
+ MULADD(at[3], at[45]);
+ MULADD(at[4], at[44]);
+ MULADD(at[5], at[43]);
+ MULADD(at[6], at[42]);
+ MULADD(at[7], at[41]);
+ MULADD(at[8], at[40]);
+ MULADD(at[9], at[39]);
+ MULADD(at[10], at[38]);
+ MULADD(at[11], at[37]);
+ MULADD(at[12], at[36]);
+ MULADD(at[13], at[35]);
+ MULADD(at[14], at[34]);
+ MULADD(at[15], at[33]);
+ MULADD(at[16], at[32]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[49]);
+ MULADD(at[1], at[48]);
+ MULADD(at[2], at[47]);
+ MULADD(at[3], at[46]);
+ MULADD(at[4], at[45]);
+ MULADD(at[5], at[44]);
+ MULADD(at[6], at[43]);
+ MULADD(at[7], at[42]);
+ MULADD(at[8], at[41]);
+ MULADD(at[9], at[40]);
+ MULADD(at[10], at[39]);
+ MULADD(at[11], at[38]);
+ MULADD(at[12], at[37]);
+ MULADD(at[13], at[36]);
+ MULADD(at[14], at[35]);
+ MULADD(at[15], at[34]);
+ MULADD(at[16], at[33]);
+ MULADD(at[17], at[32]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[50]);
+ MULADD(at[1], at[49]);
+ MULADD(at[2], at[48]);
+ MULADD(at[3], at[47]);
+ MULADD(at[4], at[46]);
+ MULADD(at[5], at[45]);
+ MULADD(at[6], at[44]);
+ MULADD(at[7], at[43]);
+ MULADD(at[8], at[42]);
+ MULADD(at[9], at[41]);
+ MULADD(at[10], at[40]);
+ MULADD(at[11], at[39]);
+ MULADD(at[12], at[38]);
+ MULADD(at[13], at[37]);
+ MULADD(at[14], at[36]);
+ MULADD(at[15], at[35]);
+ MULADD(at[16], at[34]);
+ MULADD(at[17], at[33]);
+ MULADD(at[18], at[32]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[51]);
+ MULADD(at[1], at[50]);
+ MULADD(at[2], at[49]);
+ MULADD(at[3], at[48]);
+ MULADD(at[4], at[47]);
+ MULADD(at[5], at[46]);
+ MULADD(at[6], at[45]);
+ MULADD(at[7], at[44]);
+ MULADD(at[8], at[43]);
+ MULADD(at[9], at[42]);
+ MULADD(at[10], at[41]);
+ MULADD(at[11], at[40]);
+ MULADD(at[12], at[39]);
+ MULADD(at[13], at[38]);
+ MULADD(at[14], at[37]);
+ MULADD(at[15], at[36]);
+ MULADD(at[16], at[35]);
+ MULADD(at[17], at[34]);
+ MULADD(at[18], at[33]);
+ MULADD(at[19], at[32]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[52]);
+ MULADD(at[1], at[51]);
+ MULADD(at[2], at[50]);
+ MULADD(at[3], at[49]);
+ MULADD(at[4], at[48]);
+ MULADD(at[5], at[47]);
+ MULADD(at[6], at[46]);
+ MULADD(at[7], at[45]);
+ MULADD(at[8], at[44]);
+ MULADD(at[9], at[43]);
+ MULADD(at[10], at[42]);
+ MULADD(at[11], at[41]);
+ MULADD(at[12], at[40]);
+ MULADD(at[13], at[39]);
+ MULADD(at[14], at[38]);
+ MULADD(at[15], at[37]);
+ MULADD(at[16], at[36]);
+ MULADD(at[17], at[35]);
+ MULADD(at[18], at[34]);
+ MULADD(at[19], at[33]);
+ MULADD(at[20], at[32]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[53]);
+ MULADD(at[1], at[52]);
+ MULADD(at[2], at[51]);
+ MULADD(at[3], at[50]);
+ MULADD(at[4], at[49]);
+ MULADD(at[5], at[48]);
+ MULADD(at[6], at[47]);
+ MULADD(at[7], at[46]);
+ MULADD(at[8], at[45]);
+ MULADD(at[9], at[44]);
+ MULADD(at[10], at[43]);
+ MULADD(at[11], at[42]);
+ MULADD(at[12], at[41]);
+ MULADD(at[13], at[40]);
+ MULADD(at[14], at[39]);
+ MULADD(at[15], at[38]);
+ MULADD(at[16], at[37]);
+ MULADD(at[17], at[36]);
+ MULADD(at[18], at[35]);
+ MULADD(at[19], at[34]);
+ MULADD(at[20], at[33]);
+ MULADD(at[21], at[32]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[54]);
+ MULADD(at[1], at[53]);
+ MULADD(at[2], at[52]);
+ MULADD(at[3], at[51]);
+ MULADD(at[4], at[50]);
+ MULADD(at[5], at[49]);
+ MULADD(at[6], at[48]);
+ MULADD(at[7], at[47]);
+ MULADD(at[8], at[46]);
+ MULADD(at[9], at[45]);
+ MULADD(at[10], at[44]);
+ MULADD(at[11], at[43]);
+ MULADD(at[12], at[42]);
+ MULADD(at[13], at[41]);
+ MULADD(at[14], at[40]);
+ MULADD(at[15], at[39]);
+ MULADD(at[16], at[38]);
+ MULADD(at[17], at[37]);
+ MULADD(at[18], at[36]);
+ MULADD(at[19], at[35]);
+ MULADD(at[20], at[34]);
+ MULADD(at[21], at[33]);
+ MULADD(at[22], at[32]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[55]);
+ MULADD(at[1], at[54]);
+ MULADD(at[2], at[53]);
+ MULADD(at[3], at[52]);
+ MULADD(at[4], at[51]);
+ MULADD(at[5], at[50]);
+ MULADD(at[6], at[49]);
+ MULADD(at[7], at[48]);
+ MULADD(at[8], at[47]);
+ MULADD(at[9], at[46]);
+ MULADD(at[10], at[45]);
+ MULADD(at[11], at[44]);
+ MULADD(at[12], at[43]);
+ MULADD(at[13], at[42]);
+ MULADD(at[14], at[41]);
+ MULADD(at[15], at[40]);
+ MULADD(at[16], at[39]);
+ MULADD(at[17], at[38]);
+ MULADD(at[18], at[37]);
+ MULADD(at[19], at[36]);
+ MULADD(at[20], at[35]);
+ MULADD(at[21], at[34]);
+ MULADD(at[22], at[33]);
+ MULADD(at[23], at[32]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[56]);
+ MULADD(at[1], at[55]);
+ MULADD(at[2], at[54]);
+ MULADD(at[3], at[53]);
+ MULADD(at[4], at[52]);
+ MULADD(at[5], at[51]);
+ MULADD(at[6], at[50]);
+ MULADD(at[7], at[49]);
+ MULADD(at[8], at[48]);
+ MULADD(at[9], at[47]);
+ MULADD(at[10], at[46]);
+ MULADD(at[11], at[45]);
+ MULADD(at[12], at[44]);
+ MULADD(at[13], at[43]);
+ MULADD(at[14], at[42]);
+ MULADD(at[15], at[41]);
+ MULADD(at[16], at[40]);
+ MULADD(at[17], at[39]);
+ MULADD(at[18], at[38]);
+ MULADD(at[19], at[37]);
+ MULADD(at[20], at[36]);
+ MULADD(at[21], at[35]);
+ MULADD(at[22], at[34]);
+ MULADD(at[23], at[33]);
+ MULADD(at[24], at[32]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[57]);
+ MULADD(at[1], at[56]);
+ MULADD(at[2], at[55]);
+ MULADD(at[3], at[54]);
+ MULADD(at[4], at[53]);
+ MULADD(at[5], at[52]);
+ MULADD(at[6], at[51]);
+ MULADD(at[7], at[50]);
+ MULADD(at[8], at[49]);
+ MULADD(at[9], at[48]);
+ MULADD(at[10], at[47]);
+ MULADD(at[11], at[46]);
+ MULADD(at[12], at[45]);
+ MULADD(at[13], at[44]);
+ MULADD(at[14], at[43]);
+ MULADD(at[15], at[42]);
+ MULADD(at[16], at[41]);
+ MULADD(at[17], at[40]);
+ MULADD(at[18], at[39]);
+ MULADD(at[19], at[38]);
+ MULADD(at[20], at[37]);
+ MULADD(at[21], at[36]);
+ MULADD(at[22], at[35]);
+ MULADD(at[23], at[34]);
+ MULADD(at[24], at[33]);
+ MULADD(at[25], at[32]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[58]);
+ MULADD(at[1], at[57]);
+ MULADD(at[2], at[56]);
+ MULADD(at[3], at[55]);
+ MULADD(at[4], at[54]);
+ MULADD(at[5], at[53]);
+ MULADD(at[6], at[52]);
+ MULADD(at[7], at[51]);
+ MULADD(at[8], at[50]);
+ MULADD(at[9], at[49]);
+ MULADD(at[10], at[48]);
+ MULADD(at[11], at[47]);
+ MULADD(at[12], at[46]);
+ MULADD(at[13], at[45]);
+ MULADD(at[14], at[44]);
+ MULADD(at[15], at[43]);
+ MULADD(at[16], at[42]);
+ MULADD(at[17], at[41]);
+ MULADD(at[18], at[40]);
+ MULADD(at[19], at[39]);
+ MULADD(at[20], at[38]);
+ MULADD(at[21], at[37]);
+ MULADD(at[22], at[36]);
+ MULADD(at[23], at[35]);
+ MULADD(at[24], at[34]);
+ MULADD(at[25], at[33]);
+ MULADD(at[26], at[32]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[59]);
+ MULADD(at[1], at[58]);
+ MULADD(at[2], at[57]);
+ MULADD(at[3], at[56]);
+ MULADD(at[4], at[55]);
+ MULADD(at[5], at[54]);
+ MULADD(at[6], at[53]);
+ MULADD(at[7], at[52]);
+ MULADD(at[8], at[51]);
+ MULADD(at[9], at[50]);
+ MULADD(at[10], at[49]);
+ MULADD(at[11], at[48]);
+ MULADD(at[12], at[47]);
+ MULADD(at[13], at[46]);
+ MULADD(at[14], at[45]);
+ MULADD(at[15], at[44]);
+ MULADD(at[16], at[43]);
+ MULADD(at[17], at[42]);
+ MULADD(at[18], at[41]);
+ MULADD(at[19], at[40]);
+ MULADD(at[20], at[39]);
+ MULADD(at[21], at[38]);
+ MULADD(at[22], at[37]);
+ MULADD(at[23], at[36]);
+ MULADD(at[24], at[35]);
+ MULADD(at[25], at[34]);
+ MULADD(at[26], at[33]);
+ MULADD(at[27], at[32]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[60]);
+ MULADD(at[1], at[59]);
+ MULADD(at[2], at[58]);
+ MULADD(at[3], at[57]);
+ MULADD(at[4], at[56]);
+ MULADD(at[5], at[55]);
+ MULADD(at[6], at[54]);
+ MULADD(at[7], at[53]);
+ MULADD(at[8], at[52]);
+ MULADD(at[9], at[51]);
+ MULADD(at[10], at[50]);
+ MULADD(at[11], at[49]);
+ MULADD(at[12], at[48]);
+ MULADD(at[13], at[47]);
+ MULADD(at[14], at[46]);
+ MULADD(at[15], at[45]);
+ MULADD(at[16], at[44]);
+ MULADD(at[17], at[43]);
+ MULADD(at[18], at[42]);
+ MULADD(at[19], at[41]);
+ MULADD(at[20], at[40]);
+ MULADD(at[21], at[39]);
+ MULADD(at[22], at[38]);
+ MULADD(at[23], at[37]);
+ MULADD(at[24], at[36]);
+ MULADD(at[25], at[35]);
+ MULADD(at[26], at[34]);
+ MULADD(at[27], at[33]);
+ MULADD(at[28], at[32]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[61]);
+ MULADD(at[1], at[60]);
+ MULADD(at[2], at[59]);
+ MULADD(at[3], at[58]);
+ MULADD(at[4], at[57]);
+ MULADD(at[5], at[56]);
+ MULADD(at[6], at[55]);
+ MULADD(at[7], at[54]);
+ MULADD(at[8], at[53]);
+ MULADD(at[9], at[52]);
+ MULADD(at[10], at[51]);
+ MULADD(at[11], at[50]);
+ MULADD(at[12], at[49]);
+ MULADD(at[13], at[48]);
+ MULADD(at[14], at[47]);
+ MULADD(at[15], at[46]);
+ MULADD(at[16], at[45]);
+ MULADD(at[17], at[44]);
+ MULADD(at[18], at[43]);
+ MULADD(at[19], at[42]);
+ MULADD(at[20], at[41]);
+ MULADD(at[21], at[40]);
+ MULADD(at[22], at[39]);
+ MULADD(at[23], at[38]);
+ MULADD(at[24], at[37]);
+ MULADD(at[25], at[36]);
+ MULADD(at[26], at[35]);
+ MULADD(at[27], at[34]);
+ MULADD(at[28], at[33]);
+ MULADD(at[29], at[32]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[62]);
+ MULADD(at[1], at[61]);
+ MULADD(at[2], at[60]);
+ MULADD(at[3], at[59]);
+ MULADD(at[4], at[58]);
+ MULADD(at[5], at[57]);
+ MULADD(at[6], at[56]);
+ MULADD(at[7], at[55]);
+ MULADD(at[8], at[54]);
+ MULADD(at[9], at[53]);
+ MULADD(at[10], at[52]);
+ MULADD(at[11], at[51]);
+ MULADD(at[12], at[50]);
+ MULADD(at[13], at[49]);
+ MULADD(at[14], at[48]);
+ MULADD(at[15], at[47]);
+ MULADD(at[16], at[46]);
+ MULADD(at[17], at[45]);
+ MULADD(at[18], at[44]);
+ MULADD(at[19], at[43]);
+ MULADD(at[20], at[42]);
+ MULADD(at[21], at[41]);
+ MULADD(at[22], at[40]);
+ MULADD(at[23], at[39]);
+ MULADD(at[24], at[38]);
+ MULADD(at[25], at[37]);
+ MULADD(at[26], at[36]);
+ MULADD(at[27], at[35]);
+ MULADD(at[28], at[34]);
+ MULADD(at[29], at[33]);
+ MULADD(at[30], at[32]);
+ COMBA_STORE(C->dp[30]);
+ /* 31 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[63]);
+ MULADD(at[1], at[62]);
+ MULADD(at[2], at[61]);
+ MULADD(at[3], at[60]);
+ MULADD(at[4], at[59]);
+ MULADD(at[5], at[58]);
+ MULADD(at[6], at[57]);
+ MULADD(at[7], at[56]);
+ MULADD(at[8], at[55]);
+ MULADD(at[9], at[54]);
+ MULADD(at[10], at[53]);
+ MULADD(at[11], at[52]);
+ MULADD(at[12], at[51]);
+ MULADD(at[13], at[50]);
+ MULADD(at[14], at[49]);
+ MULADD(at[15], at[48]);
+ MULADD(at[16], at[47]);
+ MULADD(at[17], at[46]);
+ MULADD(at[18], at[45]);
+ MULADD(at[19], at[44]);
+ MULADD(at[20], at[43]);
+ MULADD(at[21], at[42]);
+ MULADD(at[22], at[41]);
+ MULADD(at[23], at[40]);
+ MULADD(at[24], at[39]);
+ MULADD(at[25], at[38]);
+ MULADD(at[26], at[37]);
+ MULADD(at[27], at[36]);
+ MULADD(at[28], at[35]);
+ MULADD(at[29], at[34]);
+ MULADD(at[30], at[33]);
+ MULADD(at[31], at[32]);
+ COMBA_STORE(C->dp[31]);
+ /* 32 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[63]);
+ MULADD(at[2], at[62]);
+ MULADD(at[3], at[61]);
+ MULADD(at[4], at[60]);
+ MULADD(at[5], at[59]);
+ MULADD(at[6], at[58]);
+ MULADD(at[7], at[57]);
+ MULADD(at[8], at[56]);
+ MULADD(at[9], at[55]);
+ MULADD(at[10], at[54]);
+ MULADD(at[11], at[53]);
+ MULADD(at[12], at[52]);
+ MULADD(at[13], at[51]);
+ MULADD(at[14], at[50]);
+ MULADD(at[15], at[49]);
+ MULADD(at[16], at[48]);
+ MULADD(at[17], at[47]);
+ MULADD(at[18], at[46]);
+ MULADD(at[19], at[45]);
+ MULADD(at[20], at[44]);
+ MULADD(at[21], at[43]);
+ MULADD(at[22], at[42]);
+ MULADD(at[23], at[41]);
+ MULADD(at[24], at[40]);
+ MULADD(at[25], at[39]);
+ MULADD(at[26], at[38]);
+ MULADD(at[27], at[37]);
+ MULADD(at[28], at[36]);
+ MULADD(at[29], at[35]);
+ MULADD(at[30], at[34]);
+ MULADD(at[31], at[33]);
+ COMBA_STORE(C->dp[32]);
+ /* 33 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[63]);
+ MULADD(at[3], at[62]);
+ MULADD(at[4], at[61]);
+ MULADD(at[5], at[60]);
+ MULADD(at[6], at[59]);
+ MULADD(at[7], at[58]);
+ MULADD(at[8], at[57]);
+ MULADD(at[9], at[56]);
+ MULADD(at[10], at[55]);
+ MULADD(at[11], at[54]);
+ MULADD(at[12], at[53]);
+ MULADD(at[13], at[52]);
+ MULADD(at[14], at[51]);
+ MULADD(at[15], at[50]);
+ MULADD(at[16], at[49]);
+ MULADD(at[17], at[48]);
+ MULADD(at[18], at[47]);
+ MULADD(at[19], at[46]);
+ MULADD(at[20], at[45]);
+ MULADD(at[21], at[44]);
+ MULADD(at[22], at[43]);
+ MULADD(at[23], at[42]);
+ MULADD(at[24], at[41]);
+ MULADD(at[25], at[40]);
+ MULADD(at[26], at[39]);
+ MULADD(at[27], at[38]);
+ MULADD(at[28], at[37]);
+ MULADD(at[29], at[36]);
+ MULADD(at[30], at[35]);
+ MULADD(at[31], at[34]);
+ COMBA_STORE(C->dp[33]);
+ /* 34 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[63]);
+ MULADD(at[4], at[62]);
+ MULADD(at[5], at[61]);
+ MULADD(at[6], at[60]);
+ MULADD(at[7], at[59]);
+ MULADD(at[8], at[58]);
+ MULADD(at[9], at[57]);
+ MULADD(at[10], at[56]);
+ MULADD(at[11], at[55]);
+ MULADD(at[12], at[54]);
+ MULADD(at[13], at[53]);
+ MULADD(at[14], at[52]);
+ MULADD(at[15], at[51]);
+ MULADD(at[16], at[50]);
+ MULADD(at[17], at[49]);
+ MULADD(at[18], at[48]);
+ MULADD(at[19], at[47]);
+ MULADD(at[20], at[46]);
+ MULADD(at[21], at[45]);
+ MULADD(at[22], at[44]);
+ MULADD(at[23], at[43]);
+ MULADD(at[24], at[42]);
+ MULADD(at[25], at[41]);
+ MULADD(at[26], at[40]);
+ MULADD(at[27], at[39]);
+ MULADD(at[28], at[38]);
+ MULADD(at[29], at[37]);
+ MULADD(at[30], at[36]);
+ MULADD(at[31], at[35]);
+ COMBA_STORE(C->dp[34]);
+ /* 35 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[63]);
+ MULADD(at[5], at[62]);
+ MULADD(at[6], at[61]);
+ MULADD(at[7], at[60]);
+ MULADD(at[8], at[59]);
+ MULADD(at[9], at[58]);
+ MULADD(at[10], at[57]);
+ MULADD(at[11], at[56]);
+ MULADD(at[12], at[55]);
+ MULADD(at[13], at[54]);
+ MULADD(at[14], at[53]);
+ MULADD(at[15], at[52]);
+ MULADD(at[16], at[51]);
+ MULADD(at[17], at[50]);
+ MULADD(at[18], at[49]);
+ MULADD(at[19], at[48]);
+ MULADD(at[20], at[47]);
+ MULADD(at[21], at[46]);
+ MULADD(at[22], at[45]);
+ MULADD(at[23], at[44]);
+ MULADD(at[24], at[43]);
+ MULADD(at[25], at[42]);
+ MULADD(at[26], at[41]);
+ MULADD(at[27], at[40]);
+ MULADD(at[28], at[39]);
+ MULADD(at[29], at[38]);
+ MULADD(at[30], at[37]);
+ MULADD(at[31], at[36]);
+ COMBA_STORE(C->dp[35]);
+ /* 36 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[63]);
+ MULADD(at[6], at[62]);
+ MULADD(at[7], at[61]);
+ MULADD(at[8], at[60]);
+ MULADD(at[9], at[59]);
+ MULADD(at[10], at[58]);
+ MULADD(at[11], at[57]);
+ MULADD(at[12], at[56]);
+ MULADD(at[13], at[55]);
+ MULADD(at[14], at[54]);
+ MULADD(at[15], at[53]);
+ MULADD(at[16], at[52]);
+ MULADD(at[17], at[51]);
+ MULADD(at[18], at[50]);
+ MULADD(at[19], at[49]);
+ MULADD(at[20], at[48]);
+ MULADD(at[21], at[47]);
+ MULADD(at[22], at[46]);
+ MULADD(at[23], at[45]);
+ MULADD(at[24], at[44]);
+ MULADD(at[25], at[43]);
+ MULADD(at[26], at[42]);
+ MULADD(at[27], at[41]);
+ MULADD(at[28], at[40]);
+ MULADD(at[29], at[39]);
+ MULADD(at[30], at[38]);
+ MULADD(at[31], at[37]);
+ COMBA_STORE(C->dp[36]);
+ /* 37 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[63]);
+ MULADD(at[7], at[62]);
+ MULADD(at[8], at[61]);
+ MULADD(at[9], at[60]);
+ MULADD(at[10], at[59]);
+ MULADD(at[11], at[58]);
+ MULADD(at[12], at[57]);
+ MULADD(at[13], at[56]);
+ MULADD(at[14], at[55]);
+ MULADD(at[15], at[54]);
+ MULADD(at[16], at[53]);
+ MULADD(at[17], at[52]);
+ MULADD(at[18], at[51]);
+ MULADD(at[19], at[50]);
+ MULADD(at[20], at[49]);
+ MULADD(at[21], at[48]);
+ MULADD(at[22], at[47]);
+ MULADD(at[23], at[46]);
+ MULADD(at[24], at[45]);
+ MULADD(at[25], at[44]);
+ MULADD(at[26], at[43]);
+ MULADD(at[27], at[42]);
+ MULADD(at[28], at[41]);
+ MULADD(at[29], at[40]);
+ MULADD(at[30], at[39]);
+ MULADD(at[31], at[38]);
+ COMBA_STORE(C->dp[37]);
+ /* 38 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[63]);
+ MULADD(at[8], at[62]);
+ MULADD(at[9], at[61]);
+ MULADD(at[10], at[60]);
+ MULADD(at[11], at[59]);
+ MULADD(at[12], at[58]);
+ MULADD(at[13], at[57]);
+ MULADD(at[14], at[56]);
+ MULADD(at[15], at[55]);
+ MULADD(at[16], at[54]);
+ MULADD(at[17], at[53]);
+ MULADD(at[18], at[52]);
+ MULADD(at[19], at[51]);
+ MULADD(at[20], at[50]);
+ MULADD(at[21], at[49]);
+ MULADD(at[22], at[48]);
+ MULADD(at[23], at[47]);
+ MULADD(at[24], at[46]);
+ MULADD(at[25], at[45]);
+ MULADD(at[26], at[44]);
+ MULADD(at[27], at[43]);
+ MULADD(at[28], at[42]);
+ MULADD(at[29], at[41]);
+ MULADD(at[30], at[40]);
+ MULADD(at[31], at[39]);
+ COMBA_STORE(C->dp[38]);
+ /* 39 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[63]);
+ MULADD(at[9], at[62]);
+ MULADD(at[10], at[61]);
+ MULADD(at[11], at[60]);
+ MULADD(at[12], at[59]);
+ MULADD(at[13], at[58]);
+ MULADD(at[14], at[57]);
+ MULADD(at[15], at[56]);
+ MULADD(at[16], at[55]);
+ MULADD(at[17], at[54]);
+ MULADD(at[18], at[53]);
+ MULADD(at[19], at[52]);
+ MULADD(at[20], at[51]);
+ MULADD(at[21], at[50]);
+ MULADD(at[22], at[49]);
+ MULADD(at[23], at[48]);
+ MULADD(at[24], at[47]);
+ MULADD(at[25], at[46]);
+ MULADD(at[26], at[45]);
+ MULADD(at[27], at[44]);
+ MULADD(at[28], at[43]);
+ MULADD(at[29], at[42]);
+ MULADD(at[30], at[41]);
+ MULADD(at[31], at[40]);
+ COMBA_STORE(C->dp[39]);
+ /* 40 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[63]);
+ MULADD(at[10], at[62]);
+ MULADD(at[11], at[61]);
+ MULADD(at[12], at[60]);
+ MULADD(at[13], at[59]);
+ MULADD(at[14], at[58]);
+ MULADD(at[15], at[57]);
+ MULADD(at[16], at[56]);
+ MULADD(at[17], at[55]);
+ MULADD(at[18], at[54]);
+ MULADD(at[19], at[53]);
+ MULADD(at[20], at[52]);
+ MULADD(at[21], at[51]);
+ MULADD(at[22], at[50]);
+ MULADD(at[23], at[49]);
+ MULADD(at[24], at[48]);
+ MULADD(at[25], at[47]);
+ MULADD(at[26], at[46]);
+ MULADD(at[27], at[45]);
+ MULADD(at[28], at[44]);
+ MULADD(at[29], at[43]);
+ MULADD(at[30], at[42]);
+ MULADD(at[31], at[41]);
+ COMBA_STORE(C->dp[40]);
+ /* 41 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[63]);
+ MULADD(at[11], at[62]);
+ MULADD(at[12], at[61]);
+ MULADD(at[13], at[60]);
+ MULADD(at[14], at[59]);
+ MULADD(at[15], at[58]);
+ MULADD(at[16], at[57]);
+ MULADD(at[17], at[56]);
+ MULADD(at[18], at[55]);
+ MULADD(at[19], at[54]);
+ MULADD(at[20], at[53]);
+ MULADD(at[21], at[52]);
+ MULADD(at[22], at[51]);
+ MULADD(at[23], at[50]);
+ MULADD(at[24], at[49]);
+ MULADD(at[25], at[48]);
+ MULADD(at[26], at[47]);
+ MULADD(at[27], at[46]);
+ MULADD(at[28], at[45]);
+ MULADD(at[29], at[44]);
+ MULADD(at[30], at[43]);
+ MULADD(at[31], at[42]);
+ COMBA_STORE(C->dp[41]);
+ /* 42 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[63]);
+ MULADD(at[12], at[62]);
+ MULADD(at[13], at[61]);
+ MULADD(at[14], at[60]);
+ MULADD(at[15], at[59]);
+ MULADD(at[16], at[58]);
+ MULADD(at[17], at[57]);
+ MULADD(at[18], at[56]);
+ MULADD(at[19], at[55]);
+ MULADD(at[20], at[54]);
+ MULADD(at[21], at[53]);
+ MULADD(at[22], at[52]);
+ MULADD(at[23], at[51]);
+ MULADD(at[24], at[50]);
+ MULADD(at[25], at[49]);
+ MULADD(at[26], at[48]);
+ MULADD(at[27], at[47]);
+ MULADD(at[28], at[46]);
+ MULADD(at[29], at[45]);
+ MULADD(at[30], at[44]);
+ MULADD(at[31], at[43]);
+ COMBA_STORE(C->dp[42]);
+ /* 43 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[63]);
+ MULADD(at[13], at[62]);
+ MULADD(at[14], at[61]);
+ MULADD(at[15], at[60]);
+ MULADD(at[16], at[59]);
+ MULADD(at[17], at[58]);
+ MULADD(at[18], at[57]);
+ MULADD(at[19], at[56]);
+ MULADD(at[20], at[55]);
+ MULADD(at[21], at[54]);
+ MULADD(at[22], at[53]);
+ MULADD(at[23], at[52]);
+ MULADD(at[24], at[51]);
+ MULADD(at[25], at[50]);
+ MULADD(at[26], at[49]);
+ MULADD(at[27], at[48]);
+ MULADD(at[28], at[47]);
+ MULADD(at[29], at[46]);
+ MULADD(at[30], at[45]);
+ MULADD(at[31], at[44]);
+ COMBA_STORE(C->dp[43]);
+ /* 44 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[63]);
+ MULADD(at[14], at[62]);
+ MULADD(at[15], at[61]);
+ MULADD(at[16], at[60]);
+ MULADD(at[17], at[59]);
+ MULADD(at[18], at[58]);
+ MULADD(at[19], at[57]);
+ MULADD(at[20], at[56]);
+ MULADD(at[21], at[55]);
+ MULADD(at[22], at[54]);
+ MULADD(at[23], at[53]);
+ MULADD(at[24], at[52]);
+ MULADD(at[25], at[51]);
+ MULADD(at[26], at[50]);
+ MULADD(at[27], at[49]);
+ MULADD(at[28], at[48]);
+ MULADD(at[29], at[47]);
+ MULADD(at[30], at[46]);
+ MULADD(at[31], at[45]);
+ COMBA_STORE(C->dp[44]);
+ /* 45 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[63]);
+ MULADD(at[15], at[62]);
+ MULADD(at[16], at[61]);
+ MULADD(at[17], at[60]);
+ MULADD(at[18], at[59]);
+ MULADD(at[19], at[58]);
+ MULADD(at[20], at[57]);
+ MULADD(at[21], at[56]);
+ MULADD(at[22], at[55]);
+ MULADD(at[23], at[54]);
+ MULADD(at[24], at[53]);
+ MULADD(at[25], at[52]);
+ MULADD(at[26], at[51]);
+ MULADD(at[27], at[50]);
+ MULADD(at[28], at[49]);
+ MULADD(at[29], at[48]);
+ MULADD(at[30], at[47]);
+ MULADD(at[31], at[46]);
+ COMBA_STORE(C->dp[45]);
+ /* 46 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[63]);
+ MULADD(at[16], at[62]);
+ MULADD(at[17], at[61]);
+ MULADD(at[18], at[60]);
+ MULADD(at[19], at[59]);
+ MULADD(at[20], at[58]);
+ MULADD(at[21], at[57]);
+ MULADD(at[22], at[56]);
+ MULADD(at[23], at[55]);
+ MULADD(at[24], at[54]);
+ MULADD(at[25], at[53]);
+ MULADD(at[26], at[52]);
+ MULADD(at[27], at[51]);
+ MULADD(at[28], at[50]);
+ MULADD(at[29], at[49]);
+ MULADD(at[30], at[48]);
+ MULADD(at[31], at[47]);
+ COMBA_STORE(C->dp[46]);
+ /* 47 */
+ COMBA_FORWARD;
+ MULADD(at[16], at[63]);
+ MULADD(at[17], at[62]);
+ MULADD(at[18], at[61]);
+ MULADD(at[19], at[60]);
+ MULADD(at[20], at[59]);
+ MULADD(at[21], at[58]);
+ MULADD(at[22], at[57]);
+ MULADD(at[23], at[56]);
+ MULADD(at[24], at[55]);
+ MULADD(at[25], at[54]);
+ MULADD(at[26], at[53]);
+ MULADD(at[27], at[52]);
+ MULADD(at[28], at[51]);
+ MULADD(at[29], at[50]);
+ MULADD(at[30], at[49]);
+ MULADD(at[31], at[48]);
+ COMBA_STORE(C->dp[47]);
+ /* 48 */
+ COMBA_FORWARD;
+ MULADD(at[17], at[63]);
+ MULADD(at[18], at[62]);
+ MULADD(at[19], at[61]);
+ MULADD(at[20], at[60]);
+ MULADD(at[21], at[59]);
+ MULADD(at[22], at[58]);
+ MULADD(at[23], at[57]);
+ MULADD(at[24], at[56]);
+ MULADD(at[25], at[55]);
+ MULADD(at[26], at[54]);
+ MULADD(at[27], at[53]);
+ MULADD(at[28], at[52]);
+ MULADD(at[29], at[51]);
+ MULADD(at[30], at[50]);
+ MULADD(at[31], at[49]);
+ COMBA_STORE(C->dp[48]);
+ /* 49 */
+ COMBA_FORWARD;
+ MULADD(at[18], at[63]);
+ MULADD(at[19], at[62]);
+ MULADD(at[20], at[61]);
+ MULADD(at[21], at[60]);
+ MULADD(at[22], at[59]);
+ MULADD(at[23], at[58]);
+ MULADD(at[24], at[57]);
+ MULADD(at[25], at[56]);
+ MULADD(at[26], at[55]);
+ MULADD(at[27], at[54]);
+ MULADD(at[28], at[53]);
+ MULADD(at[29], at[52]);
+ MULADD(at[30], at[51]);
+ MULADD(at[31], at[50]);
+ COMBA_STORE(C->dp[49]);
+ /* 50 */
+ COMBA_FORWARD;
+ MULADD(at[19], at[63]);
+ MULADD(at[20], at[62]);
+ MULADD(at[21], at[61]);
+ MULADD(at[22], at[60]);
+ MULADD(at[23], at[59]);
+ MULADD(at[24], at[58]);
+ MULADD(at[25], at[57]);
+ MULADD(at[26], at[56]);
+ MULADD(at[27], at[55]);
+ MULADD(at[28], at[54]);
+ MULADD(at[29], at[53]);
+ MULADD(at[30], at[52]);
+ MULADD(at[31], at[51]);
+ COMBA_STORE(C->dp[50]);
+ /* 51 */
+ COMBA_FORWARD;
+ MULADD(at[20], at[63]);
+ MULADD(at[21], at[62]);
+ MULADD(at[22], at[61]);
+ MULADD(at[23], at[60]);
+ MULADD(at[24], at[59]);
+ MULADD(at[25], at[58]);
+ MULADD(at[26], at[57]);
+ MULADD(at[27], at[56]);
+ MULADD(at[28], at[55]);
+ MULADD(at[29], at[54]);
+ MULADD(at[30], at[53]);
+ MULADD(at[31], at[52]);
+ COMBA_STORE(C->dp[51]);
+ /* 52 */
+ COMBA_FORWARD;
+ MULADD(at[21], at[63]);
+ MULADD(at[22], at[62]);
+ MULADD(at[23], at[61]);
+ MULADD(at[24], at[60]);
+ MULADD(at[25], at[59]);
+ MULADD(at[26], at[58]);
+ MULADD(at[27], at[57]);
+ MULADD(at[28], at[56]);
+ MULADD(at[29], at[55]);
+ MULADD(at[30], at[54]);
+ MULADD(at[31], at[53]);
+ COMBA_STORE(C->dp[52]);
+ /* 53 */
+ COMBA_FORWARD;
+ MULADD(at[22], at[63]);
+ MULADD(at[23], at[62]);
+ MULADD(at[24], at[61]);
+ MULADD(at[25], at[60]);
+ MULADD(at[26], at[59]);
+ MULADD(at[27], at[58]);
+ MULADD(at[28], at[57]);
+ MULADD(at[29], at[56]);
+ MULADD(at[30], at[55]);
+ MULADD(at[31], at[54]);
+ COMBA_STORE(C->dp[53]);
+ /* 54 */
+ COMBA_FORWARD;
+ MULADD(at[23], at[63]);
+ MULADD(at[24], at[62]);
+ MULADD(at[25], at[61]);
+ MULADD(at[26], at[60]);
+ MULADD(at[27], at[59]);
+ MULADD(at[28], at[58]);
+ MULADD(at[29], at[57]);
+ MULADD(at[30], at[56]);
+ MULADD(at[31], at[55]);
+ COMBA_STORE(C->dp[54]);
+ /* 55 */
+ COMBA_FORWARD;
+ MULADD(at[24], at[63]);
+ MULADD(at[25], at[62]);
+ MULADD(at[26], at[61]);
+ MULADD(at[27], at[60]);
+ MULADD(at[28], at[59]);
+ MULADD(at[29], at[58]);
+ MULADD(at[30], at[57]);
+ MULADD(at[31], at[56]);
+ COMBA_STORE(C->dp[55]);
+ /* 56 */
+ COMBA_FORWARD;
+ MULADD(at[25], at[63]);
+ MULADD(at[26], at[62]);
+ MULADD(at[27], at[61]);
+ MULADD(at[28], at[60]);
+ MULADD(at[29], at[59]);
+ MULADD(at[30], at[58]);
+ MULADD(at[31], at[57]);
+ COMBA_STORE(C->dp[56]);
+ /* 57 */
+ COMBA_FORWARD;
+ MULADD(at[26], at[63]);
+ MULADD(at[27], at[62]);
+ MULADD(at[28], at[61]);
+ MULADD(at[29], at[60]);
+ MULADD(at[30], at[59]);
+ MULADD(at[31], at[58]);
+ COMBA_STORE(C->dp[57]);
+ /* 58 */
+ COMBA_FORWARD;
+ MULADD(at[27], at[63]);
+ MULADD(at[28], at[62]);
+ MULADD(at[29], at[61]);
+ MULADD(at[30], at[60]);
+ MULADD(at[31], at[59]);
+ COMBA_STORE(C->dp[58]);
+ /* 59 */
+ COMBA_FORWARD;
+ MULADD(at[28], at[63]);
+ MULADD(at[29], at[62]);
+ MULADD(at[30], at[61]);
+ MULADD(at[31], at[60]);
+ COMBA_STORE(C->dp[59]);
+ /* 60 */
+ COMBA_FORWARD;
+ MULADD(at[29], at[63]);
+ MULADD(at[30], at[62]);
+ MULADD(at[31], at[61]);
+ COMBA_STORE(C->dp[60]);
+ /* 61 */
+ COMBA_FORWARD;
+ MULADD(at[30], at[63]);
+ MULADD(at[31], at[62]);
+ COMBA_STORE(C->dp[61]);
+ /* 62 */
+ COMBA_FORWARD;
+ MULADD(at[31], at[63]);
+ COMBA_STORE(C->dp[62]);
+ COMBA_STORE2(C->dp[63]);
+ C->used = 64;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_sqr_comba_4(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[8], c0, c1, c2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADD2(a[2], a[3]);
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+ COMBA_STORE2(b[7]);
+ COMBA_FINI;
+
+ B->used = 8;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 8 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_8(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADD2(a[3], a[7]);
+ SQRADD2(a[4], a[6]);
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADD2(a[4], a[7]);
+ SQRADD2(a[5], a[6]);
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADD2(a[5], a[7]);
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADD2(a[6], a[7]);
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+ COMBA_STORE2(b[15]);
+ COMBA_FINI;
+
+ B->used = 16;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 16 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADD2(a[11], a[15]);
+ SQRADD2(a[12], a[14]);
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADD2(a[12], a[15]);
+ SQRADD2(a[13], a[14]);
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADD2(a[13], a[15]);
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADD2(a[14], a[15]);
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+ COMBA_STORE2(b[31]);
+ COMBA_FINI;
+
+ B->used = 32;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 32 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_32(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[16]);
+ SQRADDAC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[17]);
+ SQRADDAC(a[1], a[16]);
+ SQRADDAC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[18]);
+ SQRADDAC(a[1], a[17]);
+ SQRADDAC(a[2], a[16]);
+ SQRADDAC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[19]);
+ SQRADDAC(a[1], a[18]);
+ SQRADDAC(a[2], a[17]);
+ SQRADDAC(a[3], a[16]);
+ SQRADDAC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[20]);
+ SQRADDAC(a[1], a[19]);
+ SQRADDAC(a[2], a[18]);
+ SQRADDAC(a[3], a[17]);
+ SQRADDAC(a[4], a[16]);
+ SQRADDAC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[21]);
+ SQRADDAC(a[1], a[20]);
+ SQRADDAC(a[2], a[19]);
+ SQRADDAC(a[3], a[18]);
+ SQRADDAC(a[4], a[17]);
+ SQRADDAC(a[5], a[16]);
+ SQRADDAC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[22]);
+ SQRADDAC(a[1], a[21]);
+ SQRADDAC(a[2], a[20]);
+ SQRADDAC(a[3], a[19]);
+ SQRADDAC(a[4], a[18]);
+ SQRADDAC(a[5], a[17]);
+ SQRADDAC(a[6], a[16]);
+ SQRADDAC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[23]);
+ SQRADDAC(a[1], a[22]);
+ SQRADDAC(a[2], a[21]);
+ SQRADDAC(a[3], a[20]);
+ SQRADDAC(a[4], a[19]);
+ SQRADDAC(a[5], a[18]);
+ SQRADDAC(a[6], a[17]);
+ SQRADDAC(a[7], a[16]);
+ SQRADDAC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[24]);
+ SQRADDAC(a[1], a[23]);
+ SQRADDAC(a[2], a[22]);
+ SQRADDAC(a[3], a[21]);
+ SQRADDAC(a[4], a[20]);
+ SQRADDAC(a[5], a[19]);
+ SQRADDAC(a[6], a[18]);
+ SQRADDAC(a[7], a[17]);
+ SQRADDAC(a[8], a[16]);
+ SQRADDAC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[25]);
+ SQRADDAC(a[1], a[24]);
+ SQRADDAC(a[2], a[23]);
+ SQRADDAC(a[3], a[22]);
+ SQRADDAC(a[4], a[21]);
+ SQRADDAC(a[5], a[20]);
+ SQRADDAC(a[6], a[19]);
+ SQRADDAC(a[7], a[18]);
+ SQRADDAC(a[8], a[17]);
+ SQRADDAC(a[9], a[16]);
+ SQRADDAC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[26]);
+ SQRADDAC(a[1], a[25]);
+ SQRADDAC(a[2], a[24]);
+ SQRADDAC(a[3], a[23]);
+ SQRADDAC(a[4], a[22]);
+ SQRADDAC(a[5], a[21]);
+ SQRADDAC(a[6], a[20]);
+ SQRADDAC(a[7], a[19]);
+ SQRADDAC(a[8], a[18]);
+ SQRADDAC(a[9], a[17]);
+ SQRADDAC(a[10], a[16]);
+ SQRADDAC(a[11], a[15]);
+ SQRADDAC(a[12], a[14]);
+ SQRADDDB;
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[27]);
+ SQRADDAC(a[1], a[26]);
+ SQRADDAC(a[2], a[25]);
+ SQRADDAC(a[3], a[24]);
+ SQRADDAC(a[4], a[23]);
+ SQRADDAC(a[5], a[22]);
+ SQRADDAC(a[6], a[21]);
+ SQRADDAC(a[7], a[20]);
+ SQRADDAC(a[8], a[19]);
+ SQRADDAC(a[9], a[18]);
+ SQRADDAC(a[10], a[17]);
+ SQRADDAC(a[11], a[16]);
+ SQRADDAC(a[12], a[15]);
+ SQRADDAC(a[13], a[14]);
+ SQRADDDB;
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[28]);
+ SQRADDAC(a[1], a[27]);
+ SQRADDAC(a[2], a[26]);
+ SQRADDAC(a[3], a[25]);
+ SQRADDAC(a[4], a[24]);
+ SQRADDAC(a[5], a[23]);
+ SQRADDAC(a[6], a[22]);
+ SQRADDAC(a[7], a[21]);
+ SQRADDAC(a[8], a[20]);
+ SQRADDAC(a[9], a[19]);
+ SQRADDAC(a[10], a[18]);
+ SQRADDAC(a[11], a[17]);
+ SQRADDAC(a[12], a[16]);
+ SQRADDAC(a[13], a[15]);
+ SQRADDDB;
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[29]);
+ SQRADDAC(a[1], a[28]);
+ SQRADDAC(a[2], a[27]);
+ SQRADDAC(a[3], a[26]);
+ SQRADDAC(a[4], a[25]);
+ SQRADDAC(a[5], a[24]);
+ SQRADDAC(a[6], a[23]);
+ SQRADDAC(a[7], a[22]);
+ SQRADDAC(a[8], a[21]);
+ SQRADDAC(a[9], a[20]);
+ SQRADDAC(a[10], a[19]);
+ SQRADDAC(a[11], a[18]);
+ SQRADDAC(a[12], a[17]);
+ SQRADDAC(a[13], a[16]);
+ SQRADDAC(a[14], a[15]);
+ SQRADDDB;
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[30]);
+ SQRADDAC(a[1], a[29]);
+ SQRADDAC(a[2], a[28]);
+ SQRADDAC(a[3], a[27]);
+ SQRADDAC(a[4], a[26]);
+ SQRADDAC(a[5], a[25]);
+ SQRADDAC(a[6], a[24]);
+ SQRADDAC(a[7], a[23]);
+ SQRADDAC(a[8], a[22]);
+ SQRADDAC(a[9], a[21]);
+ SQRADDAC(a[10], a[20]);
+ SQRADDAC(a[11], a[19]);
+ SQRADDAC(a[12], a[18]);
+ SQRADDAC(a[13], a[17]);
+ SQRADDAC(a[14], a[16]);
+ SQRADDDB;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+
+ /* output 31 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[31]);
+ SQRADDAC(a[1], a[30]);
+ SQRADDAC(a[2], a[29]);
+ SQRADDAC(a[3], a[28]);
+ SQRADDAC(a[4], a[27]);
+ SQRADDAC(a[5], a[26]);
+ SQRADDAC(a[6], a[25]);
+ SQRADDAC(a[7], a[24]);
+ SQRADDAC(a[8], a[23]);
+ SQRADDAC(a[9], a[22]);
+ SQRADDAC(a[10], a[21]);
+ SQRADDAC(a[11], a[20]);
+ SQRADDAC(a[12], a[19]);
+ SQRADDAC(a[13], a[18]);
+ SQRADDAC(a[14], a[17]);
+ SQRADDAC(a[15], a[16]);
+ SQRADDDB;
+ COMBA_STORE(b[31]);
+
+ /* output 32 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[31]);
+ SQRADDAC(a[2], a[30]);
+ SQRADDAC(a[3], a[29]);
+ SQRADDAC(a[4], a[28]);
+ SQRADDAC(a[5], a[27]);
+ SQRADDAC(a[6], a[26]);
+ SQRADDAC(a[7], a[25]);
+ SQRADDAC(a[8], a[24]);
+ SQRADDAC(a[9], a[23]);
+ SQRADDAC(a[10], a[22]);
+ SQRADDAC(a[11], a[21]);
+ SQRADDAC(a[12], a[20]);
+ SQRADDAC(a[13], a[19]);
+ SQRADDAC(a[14], a[18]);
+ SQRADDAC(a[15], a[17]);
+ SQRADDDB;
+ SQRADD(a[16], a[16]);
+ COMBA_STORE(b[32]);
+
+ /* output 33 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[31]);
+ SQRADDAC(a[3], a[30]);
+ SQRADDAC(a[4], a[29]);
+ SQRADDAC(a[5], a[28]);
+ SQRADDAC(a[6], a[27]);
+ SQRADDAC(a[7], a[26]);
+ SQRADDAC(a[8], a[25]);
+ SQRADDAC(a[9], a[24]);
+ SQRADDAC(a[10], a[23]);
+ SQRADDAC(a[11], a[22]);
+ SQRADDAC(a[12], a[21]);
+ SQRADDAC(a[13], a[20]);
+ SQRADDAC(a[14], a[19]);
+ SQRADDAC(a[15], a[18]);
+ SQRADDAC(a[16], a[17]);
+ SQRADDDB;
+ COMBA_STORE(b[33]);
+
+ /* output 34 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[31]);
+ SQRADDAC(a[4], a[30]);
+ SQRADDAC(a[5], a[29]);
+ SQRADDAC(a[6], a[28]);
+ SQRADDAC(a[7], a[27]);
+ SQRADDAC(a[8], a[26]);
+ SQRADDAC(a[9], a[25]);
+ SQRADDAC(a[10], a[24]);
+ SQRADDAC(a[11], a[23]);
+ SQRADDAC(a[12], a[22]);
+ SQRADDAC(a[13], a[21]);
+ SQRADDAC(a[14], a[20]);
+ SQRADDAC(a[15], a[19]);
+ SQRADDAC(a[16], a[18]);
+ SQRADDDB;
+ SQRADD(a[17], a[17]);
+ COMBA_STORE(b[34]);
+
+ /* output 35 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[31]);
+ SQRADDAC(a[5], a[30]);
+ SQRADDAC(a[6], a[29]);
+ SQRADDAC(a[7], a[28]);
+ SQRADDAC(a[8], a[27]);
+ SQRADDAC(a[9], a[26]);
+ SQRADDAC(a[10], a[25]);
+ SQRADDAC(a[11], a[24]);
+ SQRADDAC(a[12], a[23]);
+ SQRADDAC(a[13], a[22]);
+ SQRADDAC(a[14], a[21]);
+ SQRADDAC(a[15], a[20]);
+ SQRADDAC(a[16], a[19]);
+ SQRADDAC(a[17], a[18]);
+ SQRADDDB;
+ COMBA_STORE(b[35]);
+
+ /* output 36 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[31]);
+ SQRADDAC(a[6], a[30]);
+ SQRADDAC(a[7], a[29]);
+ SQRADDAC(a[8], a[28]);
+ SQRADDAC(a[9], a[27]);
+ SQRADDAC(a[10], a[26]);
+ SQRADDAC(a[11], a[25]);
+ SQRADDAC(a[12], a[24]);
+ SQRADDAC(a[13], a[23]);
+ SQRADDAC(a[14], a[22]);
+ SQRADDAC(a[15], a[21]);
+ SQRADDAC(a[16], a[20]);
+ SQRADDAC(a[17], a[19]);
+ SQRADDDB;
+ SQRADD(a[18], a[18]);
+ COMBA_STORE(b[36]);
+
+ /* output 37 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[31]);
+ SQRADDAC(a[7], a[30]);
+ SQRADDAC(a[8], a[29]);
+ SQRADDAC(a[9], a[28]);
+ SQRADDAC(a[10], a[27]);
+ SQRADDAC(a[11], a[26]);
+ SQRADDAC(a[12], a[25]);
+ SQRADDAC(a[13], a[24]);
+ SQRADDAC(a[14], a[23]);
+ SQRADDAC(a[15], a[22]);
+ SQRADDAC(a[16], a[21]);
+ SQRADDAC(a[17], a[20]);
+ SQRADDAC(a[18], a[19]);
+ SQRADDDB;
+ COMBA_STORE(b[37]);
+
+ /* output 38 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[31]);
+ SQRADDAC(a[8], a[30]);
+ SQRADDAC(a[9], a[29]);
+ SQRADDAC(a[10], a[28]);
+ SQRADDAC(a[11], a[27]);
+ SQRADDAC(a[12], a[26]);
+ SQRADDAC(a[13], a[25]);
+ SQRADDAC(a[14], a[24]);
+ SQRADDAC(a[15], a[23]);
+ SQRADDAC(a[16], a[22]);
+ SQRADDAC(a[17], a[21]);
+ SQRADDAC(a[18], a[20]);
+ SQRADDDB;
+ SQRADD(a[19], a[19]);
+ COMBA_STORE(b[38]);
+
+ /* output 39 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[31]);
+ SQRADDAC(a[9], a[30]);
+ SQRADDAC(a[10], a[29]);
+ SQRADDAC(a[11], a[28]);
+ SQRADDAC(a[12], a[27]);
+ SQRADDAC(a[13], a[26]);
+ SQRADDAC(a[14], a[25]);
+ SQRADDAC(a[15], a[24]);
+ SQRADDAC(a[16], a[23]);
+ SQRADDAC(a[17], a[22]);
+ SQRADDAC(a[18], a[21]);
+ SQRADDAC(a[19], a[20]);
+ SQRADDDB;
+ COMBA_STORE(b[39]);
+
+ /* output 40 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[31]);
+ SQRADDAC(a[10], a[30]);
+ SQRADDAC(a[11], a[29]);
+ SQRADDAC(a[12], a[28]);
+ SQRADDAC(a[13], a[27]);
+ SQRADDAC(a[14], a[26]);
+ SQRADDAC(a[15], a[25]);
+ SQRADDAC(a[16], a[24]);
+ SQRADDAC(a[17], a[23]);
+ SQRADDAC(a[18], a[22]);
+ SQRADDAC(a[19], a[21]);
+ SQRADDDB;
+ SQRADD(a[20], a[20]);
+ COMBA_STORE(b[40]);
+
+ /* output 41 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[31]);
+ SQRADDAC(a[11], a[30]);
+ SQRADDAC(a[12], a[29]);
+ SQRADDAC(a[13], a[28]);
+ SQRADDAC(a[14], a[27]);
+ SQRADDAC(a[15], a[26]);
+ SQRADDAC(a[16], a[25]);
+ SQRADDAC(a[17], a[24]);
+ SQRADDAC(a[18], a[23]);
+ SQRADDAC(a[19], a[22]);
+ SQRADDAC(a[20], a[21]);
+ SQRADDDB;
+ COMBA_STORE(b[41]);
+
+ /* output 42 */
+ CARRY_FORWARD;
+ SQRADDSC(a[11], a[31]);
+ SQRADDAC(a[12], a[30]);
+ SQRADDAC(a[13], a[29]);
+ SQRADDAC(a[14], a[28]);
+ SQRADDAC(a[15], a[27]);
+ SQRADDAC(a[16], a[26]);
+ SQRADDAC(a[17], a[25]);
+ SQRADDAC(a[18], a[24]);
+ SQRADDAC(a[19], a[23]);
+ SQRADDAC(a[20], a[22]);
+ SQRADDDB;
+ SQRADD(a[21], a[21]);
+ COMBA_STORE(b[42]);
+
+ /* output 43 */
+ CARRY_FORWARD;
+ SQRADDSC(a[12], a[31]);
+ SQRADDAC(a[13], a[30]);
+ SQRADDAC(a[14], a[29]);
+ SQRADDAC(a[15], a[28]);
+ SQRADDAC(a[16], a[27]);
+ SQRADDAC(a[17], a[26]);
+ SQRADDAC(a[18], a[25]);
+ SQRADDAC(a[19], a[24]);
+ SQRADDAC(a[20], a[23]);
+ SQRADDAC(a[21], a[22]);
+ SQRADDDB;
+ COMBA_STORE(b[43]);
+
+ /* output 44 */
+ CARRY_FORWARD;
+ SQRADDSC(a[13], a[31]);
+ SQRADDAC(a[14], a[30]);
+ SQRADDAC(a[15], a[29]);
+ SQRADDAC(a[16], a[28]);
+ SQRADDAC(a[17], a[27]);
+ SQRADDAC(a[18], a[26]);
+ SQRADDAC(a[19], a[25]);
+ SQRADDAC(a[20], a[24]);
+ SQRADDAC(a[21], a[23]);
+ SQRADDDB;
+ SQRADD(a[22], a[22]);
+ COMBA_STORE(b[44]);
+
+ /* output 45 */
+ CARRY_FORWARD;
+ SQRADDSC(a[14], a[31]);
+ SQRADDAC(a[15], a[30]);
+ SQRADDAC(a[16], a[29]);
+ SQRADDAC(a[17], a[28]);
+ SQRADDAC(a[18], a[27]);
+ SQRADDAC(a[19], a[26]);
+ SQRADDAC(a[20], a[25]);
+ SQRADDAC(a[21], a[24]);
+ SQRADDAC(a[22], a[23]);
+ SQRADDDB;
+ COMBA_STORE(b[45]);
+
+ /* output 46 */
+ CARRY_FORWARD;
+ SQRADDSC(a[15], a[31]);
+ SQRADDAC(a[16], a[30]);
+ SQRADDAC(a[17], a[29]);
+ SQRADDAC(a[18], a[28]);
+ SQRADDAC(a[19], a[27]);
+ SQRADDAC(a[20], a[26]);
+ SQRADDAC(a[21], a[25]);
+ SQRADDAC(a[22], a[24]);
+ SQRADDDB;
+ SQRADD(a[23], a[23]);
+ COMBA_STORE(b[46]);
+
+ /* output 47 */
+ CARRY_FORWARD;
+ SQRADDSC(a[16], a[31]);
+ SQRADDAC(a[17], a[30]);
+ SQRADDAC(a[18], a[29]);
+ SQRADDAC(a[19], a[28]);
+ SQRADDAC(a[20], a[27]);
+ SQRADDAC(a[21], a[26]);
+ SQRADDAC(a[22], a[25]);
+ SQRADDAC(a[23], a[24]);
+ SQRADDDB;
+ COMBA_STORE(b[47]);
+
+ /* output 48 */
+ CARRY_FORWARD;
+ SQRADDSC(a[17], a[31]);
+ SQRADDAC(a[18], a[30]);
+ SQRADDAC(a[19], a[29]);
+ SQRADDAC(a[20], a[28]);
+ SQRADDAC(a[21], a[27]);
+ SQRADDAC(a[22], a[26]);
+ SQRADDAC(a[23], a[25]);
+ SQRADDDB;
+ SQRADD(a[24], a[24]);
+ COMBA_STORE(b[48]);
+
+ /* output 49 */
+ CARRY_FORWARD;
+ SQRADDSC(a[18], a[31]);
+ SQRADDAC(a[19], a[30]);
+ SQRADDAC(a[20], a[29]);
+ SQRADDAC(a[21], a[28]);
+ SQRADDAC(a[22], a[27]);
+ SQRADDAC(a[23], a[26]);
+ SQRADDAC(a[24], a[25]);
+ SQRADDDB;
+ COMBA_STORE(b[49]);
+
+ /* output 50 */
+ CARRY_FORWARD;
+ SQRADDSC(a[19], a[31]);
+ SQRADDAC(a[20], a[30]);
+ SQRADDAC(a[21], a[29]);
+ SQRADDAC(a[22], a[28]);
+ SQRADDAC(a[23], a[27]);
+ SQRADDAC(a[24], a[26]);
+ SQRADDDB;
+ SQRADD(a[25], a[25]);
+ COMBA_STORE(b[50]);
+
+ /* output 51 */
+ CARRY_FORWARD;
+ SQRADDSC(a[20], a[31]);
+ SQRADDAC(a[21], a[30]);
+ SQRADDAC(a[22], a[29]);
+ SQRADDAC(a[23], a[28]);
+ SQRADDAC(a[24], a[27]);
+ SQRADDAC(a[25], a[26]);
+ SQRADDDB;
+ COMBA_STORE(b[51]);
+
+ /* output 52 */
+ CARRY_FORWARD;
+ SQRADDSC(a[21], a[31]);
+ SQRADDAC(a[22], a[30]);
+ SQRADDAC(a[23], a[29]);
+ SQRADDAC(a[24], a[28]);
+ SQRADDAC(a[25], a[27]);
+ SQRADDDB;
+ SQRADD(a[26], a[26]);
+ COMBA_STORE(b[52]);
+
+ /* output 53 */
+ CARRY_FORWARD;
+ SQRADDSC(a[22], a[31]);
+ SQRADDAC(a[23], a[30]);
+ SQRADDAC(a[24], a[29]);
+ SQRADDAC(a[25], a[28]);
+ SQRADDAC(a[26], a[27]);
+ SQRADDDB;
+ COMBA_STORE(b[53]);
+
+ /* output 54 */
+ CARRY_FORWARD;
+ SQRADDSC(a[23], a[31]);
+ SQRADDAC(a[24], a[30]);
+ SQRADDAC(a[25], a[29]);
+ SQRADDAC(a[26], a[28]);
+ SQRADDDB;
+ SQRADD(a[27], a[27]);
+ COMBA_STORE(b[54]);
+
+ /* output 55 */
+ CARRY_FORWARD;
+ SQRADDSC(a[24], a[31]);
+ SQRADDAC(a[25], a[30]);
+ SQRADDAC(a[26], a[29]);
+ SQRADDAC(a[27], a[28]);
+ SQRADDDB;
+ COMBA_STORE(b[55]);
+
+ /* output 56 */
+ CARRY_FORWARD;
+ SQRADDSC(a[25], a[31]);
+ SQRADDAC(a[26], a[30]);
+ SQRADDAC(a[27], a[29]);
+ SQRADDDB;
+ SQRADD(a[28], a[28]);
+ COMBA_STORE(b[56]);
+
+ /* output 57 */
+ CARRY_FORWARD;
+ SQRADDSC(a[26], a[31]);
+ SQRADDAC(a[27], a[30]);
+ SQRADDAC(a[28], a[29]);
+ SQRADDDB;
+ COMBA_STORE(b[57]);
+
+ /* output 58 */
+ CARRY_FORWARD;
+ SQRADD2(a[27], a[31]);
+ SQRADD2(a[28], a[30]);
+ SQRADD(a[29], a[29]);
+ COMBA_STORE(b[58]);
+
+ /* output 59 */
+ CARRY_FORWARD;
+ SQRADD2(a[28], a[31]);
+ SQRADD2(a[29], a[30]);
+ COMBA_STORE(b[59]);
+
+ /* output 60 */
+ CARRY_FORWARD;
+ SQRADD2(a[29], a[31]);
+ SQRADD(a[30], a[30]);
+ COMBA_STORE(b[60]);
+
+ /* output 61 */
+ CARRY_FORWARD;
+ SQRADD2(a[30], a[31]);
+ COMBA_STORE(b[61]);
+
+ /* output 62 */
+ CARRY_FORWARD;
+ SQRADD(a[31], a[31]);
+ COMBA_STORE(b[62]);
+ COMBA_STORE2(b[63]);
+ COMBA_FINI;
+
+ B->used = 64;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 64 * sizeof(mp_digit));
+ mp_clamp(B);
+}
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
new file mode 100644
index 0000000000..e50efa8de3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
@@ -0,0 +1,13066 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;/* TomsFastMath, a fast ISO C bignum library.
+; *
+; * This project is meant to fill in where LibTomMath
+; * falls short. That is speed ;-)
+; *
+; * This project is public domain and free for all purposes.
+; *
+; * Tom St Denis, tomstdenis@iahu.ca
+; */
+
+;/*
+; * The source file from which this assembly was derived
+; * comes from TFM v0.03, which has the above license.
+; * This source was from mp_comba_amd64.sun.s and convert to
+; * MASM code set.
+; */
+
+.CODE
+
+externdef memcpy:PROC
+
+public s_mp_mul_comba_4
+public s_mp_mul_comba_8
+public s_mp_mul_comba_16
+public s_mp_mul_comba_32
+public s_mp_sqr_comba_8
+public s_mp_sqr_comba_16
+public s_mp_sqr_comba_32
+
+
+; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ sub rsp, 64
+ mov r9, qword ptr [16+rdi]
+ mov rbx, rdx
+ mov rdx, qword ptr [16+rsi]
+ mov rax, qword ptr [r9]
+ mov qword ptr [-64+64+rsp], rax
+ mov r8, qword ptr [8+r9]
+ mov qword ptr [-56+64+rsp], r8
+ mov rbp, qword ptr [16+r9]
+ mov qword ptr [-48+64+rsp], rbp
+ mov r12, qword ptr [24+r9]
+ mov qword ptr [-40+64+rsp], r12
+ mov rcx, qword ptr [rdx]
+ mov qword ptr [-32+64+rsp], rcx
+ mov r10, qword ptr [8+rdx]
+ mov qword ptr [-24+64+rsp], r10
+ mov r11, qword ptr [16+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-16+64+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [24+rdx]
+ mov qword ptr [-8+64+rsp], rax
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [48+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [56+r11], rcx
+ mov dword ptr [8+rbx], 8
+ jne L9
+ ALIGN 16
+L18:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L9
+ lea r10d, dword ptr [-2+rdx]
+ cmp qword ptr [r11+r10*8], 0
+ je L18
+L9:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 64
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_4 ENDP
+
+
+; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 8+128
+ mov rdx, qword ptr [16+rdi]
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-80+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ mov qword ptr [-72+128+rsp], r11
+ mov rax, qword ptr [56+rdx]
+ mov rdx, qword ptr [16+rsi]
+ mov qword ptr [-64+128+rsp], rax
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-16+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-8+128+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [56+rdx]
+ mov qword ptr [128+rsp], rax
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [112+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [120+r11], rcx
+ mov dword ptr [8+rbx], 16
+ jne L35
+ ALIGN 16
+L43:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L35
+ lea eax, dword ptr [-2+rdx]
+ cmp qword ptr [r11+rax*8], 0
+ je L43
+L35:
+ mov r11d, dword ptr [8+rbx]
+ xor edx, edx
+ test r11d, r11d
+ cmovne edx, esi
+ mov dword ptr [rbx], edx
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_8 ENDP
+
+
+; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+
+ ALIGN 16
+s_mp_mul_comba_16 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 136+128
+ mov rax, qword ptr [16+rdi]
+ mov r8, qword ptr [rax]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rax]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rax]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rax]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rax]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rax]
+ mov qword ptr [-80+128+rsp], r10
+ mov rdx, qword ptr [48+rax]
+ mov qword ptr [-72+128+rsp], rdx
+ mov r11, qword ptr [56+rax]
+ mov qword ptr [-64+128+rsp], r11
+ mov r8, qword ptr [64+rax]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [72+rax]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [80+rax]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [88+rax]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [96+rax]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [104+rax]
+ mov qword ptr [-16+128+rsp], r10
+ mov rdx, qword ptr [112+rax]
+ mov qword ptr [-8+128+rsp], rdx
+ mov r11, qword ptr [120+rax]
+ mov qword ptr [128+rsp], r11
+ mov r11, qword ptr [16+rsi]
+ mov r8, qword ptr [r11]
+ mov qword ptr [8+128+rsp], r8
+ mov rbp, qword ptr [8+r11]
+ mov qword ptr [16+128+rsp], rbp
+ mov r9, qword ptr [16+r11]
+ mov qword ptr [24+128+rsp], r9
+ mov r12, qword ptr [24+r11]
+ mov qword ptr [32+128+rsp], r12
+ mov rcx, qword ptr [32+r11]
+ mov qword ptr [40+128+rsp], rcx
+ mov r10, qword ptr [40+r11]
+ mov qword ptr [48+128+rsp], r10
+ mov rdx, qword ptr [48+r11]
+ mov qword ptr [56+128+rsp], rdx
+ mov rax, qword ptr [56+r11]
+ mov qword ptr [64+128+rsp], rax
+ mov r8, qword ptr [64+r11]
+ mov qword ptr [72+128+rsp], r8
+ mov rbp, qword ptr [72+r11]
+ mov qword ptr [80+128+rsp], rbp
+ mov r9, qword ptr [80+r11]
+ mov qword ptr [88+128+rsp], r9
+ mov r12, qword ptr [88+r11]
+ mov qword ptr [96+128+rsp], r12
+ mov rcx, qword ptr [96+r11]
+ mov qword ptr [104+128+rsp], rcx
+ mov r10, qword ptr [104+r11]
+ mov qword ptr [112+128+rsp], r10
+ mov rdx, qword ptr [112+r11]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [120+128+rsp], rdx
+ mov rax, qword ptr [120+r11]
+ mov qword ptr [128+128+rsp], rax
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [112+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [120+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [128+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [136+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [144+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [152+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [160+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [168+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [176+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [184+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [192+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [200+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [208+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [216+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [224+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [232+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [240+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [248+r11], rcx
+ mov dword ptr [8+rbx], 32
+ jne L76
+ ALIGN 16
+L84:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L76
+ lea eax, dword ptr [-2+rdx]
+ cmp qword ptr [r11+rax*8], 0
+ je L84
+L76:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 136+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_16 ENDP
+
+; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+
+
+ ALIGN 16
+s_mp_mul_comba_32 PROC ; a "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push rbp
+ mov rbp, rsp
+ push r13
+ mov r13, rdx
+; mov edx, 256
+ mov r8d, 256
+ push r12
+ mov r12, rsi
+ push rbx
+ mov rbx, rdi
+ sub rsp, 520+32 ; +32 for "home" storage
+; mov rsi, qword ptr [16+rdi]
+; lea rdi, qword ptr [-544+rbp]
+ mov rdx, qword ptr [16+rdi]
+ lea rcx, qword ptr [-544+rbp]
+ call memcpy
+; mov rsi, qword ptr [16+r12]
+; lea rdi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rdx, qword ptr [16+r12]
+ lea rcx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov r9, qword ptr [16+r13]
+ xor r8d, r8d
+ mov rsi, r8
+ mov rdi, r8
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [r9], rsi
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-280+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov r11, r10
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-288+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r11, 0
+ mov qword ptr [8+r9], rdi
+ mov rdi, r11
+ mov r11, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rcx, r11
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [16+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [24+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [32+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [40+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [48+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [56+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [64+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [72+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [80+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [88+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [96+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [104+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [112+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [120+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [128+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [136+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [144+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [152+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [160+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [168+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [176+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [184+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [192+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [200+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [208+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [216+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [224+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [232+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [240+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [248+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [256+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [264+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [272+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [280+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [288+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [296+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [304+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [312+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [320+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [328+r9], rcx
+ mov rdi, r11
+ mov r11, r10
+ mov r10, r8
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-40+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-48+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-56+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-64+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-72+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-80+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-88+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-96+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-104+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-112+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-120+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-128+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-136+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-144+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-152+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-160+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-168+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-176+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-184+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-192+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-200+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [336+r9], r11
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-40+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov rcx, r10
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-48+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-56+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-64+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-72+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-80+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-88+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-96+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-104+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-112+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-120+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-128+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-136+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-144+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-152+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-160+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-168+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-176+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-184+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov r11, rsi
+ mov r10, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-192+rbp]
+ add rdi, rax
+ adc r11, rdx
+ adc r10, 0
+ mov qword ptr [344+r9], rdi
+ mov rcx, r11
+ mov rdi, r10
+ mov r11, r8
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rsi, r11
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [352+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [360+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [368+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [376+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [384+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [392+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [400+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [408+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [416+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [424+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [432+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [440+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [448+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [456+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [464+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [472+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [480+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r11, rcx
+ mov r10, rdi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [488+r9], rsi
+ mov rcx, r10
+ mov rsi, r11
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rsi, rdx
+ adc r8, 0
+ mov qword ptr [496+r9], rcx
+ mov ecx, dword ptr [r12]
+ xor ecx, dword ptr [rbx]
+ test rsi, rsi
+ mov qword ptr [504+r9], rsi
+ mov dword ptr [8+r13], 64
+ jne L149
+ ALIGN 16
+L157:
+ mov edx, dword ptr [8+r13]
+ lea ebx, dword ptr [-1+rdx]
+ test ebx, ebx
+ mov dword ptr [8+r13], ebx
+ je L149
+ lea r12d, dword ptr [-2+rdx]
+ cmp qword ptr [r9+r12*8], 0
+ je L157
+L149:
+ mov r9d, dword ptr [8+r13]
+ xor edx, edx
+ test r9d, r9d
+ cmovne edx, ecx
+ mov dword ptr [r13], edx
+ add rsp, 520+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_32 ENDP
+
+
+; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ push rbx
+ sub rsp, 80
+ mov r11, rsi
+ xor esi, esi
+ mov r10, rsi
+ mov rbp, rsi
+ mov r8, rsi
+ mov rbx, rsi
+ mov rcx, qword ptr [16+rdi]
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc rdi, 0
+ mov qword ptr [-72+80+rsp], r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ mov qword ptr [-64+80+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ mov rbx, rbp
+ mov rbp, r8
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-56+80+rsp], rdi
+ mov r9, rbp
+ mov r8, rbx
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ mov rbx, r9
+ mov rbp, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-48+80+rsp], r8
+ mov r9, rbp
+ mov rdi, rbx
+ mov r8, rsi
+ mov dword ptr [8+r11], 8
+ mov dword ptr [r11], 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbx, r9
+ mov rbp, r8
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov rax, rbp
+ mov qword ptr [-40+80+rsp], rdi
+ mov rbp, rbx
+ mov rdi, rax
+ mov rbx, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ mov qword ptr [-32+80+rsp], rbp
+ mov r9, rbx
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add rdi, rax
+ adc r9, rdx
+ adc rsi, 0
+ mov rdx, qword ptr [16+r11]
+ mov qword ptr [-24+80+rsp], rdi
+ mov qword ptr [-16+80+rsp], r9
+ mov qword ptr [rdx], r10
+ mov r8, qword ptr [-64+80+rsp]
+ mov qword ptr [8+rdx], r8
+ mov rbp, qword ptr [-56+80+rsp]
+ mov qword ptr [16+rdx], rbp
+ mov rdi, qword ptr [-48+80+rsp]
+ mov qword ptr [24+rdx], rdi
+ mov rsi, qword ptr [-40+80+rsp]
+ mov qword ptr [32+rdx], rsi
+ mov rbx, qword ptr [-32+80+rsp]
+ mov qword ptr [40+rdx], rbx
+ mov rcx, qword ptr [-24+80+rsp]
+ mov qword ptr [48+rdx], rcx
+ mov rax, qword ptr [-16+80+rsp]
+ mov qword ptr [56+rdx], rax
+ mov edx, dword ptr [8+r11]
+ test edx, edx
+ je L168
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r11]
+ mov r10d, ecx
+ cmp qword ptr [rsi+r10*8], 0
+ jne L166
+ mov edx, ecx
+ ALIGN 16
+L167:
+ test edx, edx
+ mov ecx, edx
+ je L171
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L167
+ mov dword ptr [8+r11], ecx
+ mov edx, ecx
+L166:
+ test edx, edx
+ je L168
+ mov eax, dword ptr [r11]
+ jmp L169
+
+L171:
+ mov dword ptr [8+r11], edx
+L168:
+ xor eax, eax
+L169:
+ add rsp, 80
+ pop rbx
+ pop rbp
+ mov dword ptr [r11], eax
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_4 ENDP
+
+
+; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+
+ push r14
+ xor r9d, r9d
+ mov r14, r9
+ mov r10, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbp
+ mov rbp, rsi
+ mov rsi, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 8+128
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r14, rax
+ adc rbx, rdx
+ adc r12, 0
+ mov qword ptr [-120+128+rsp], r14
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ mov qword ptr [-112+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ mov rbx, r10
+ mov r10, r13
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-104+128+rsp], r12
+ mov rdi, r10
+ mov r11, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov r10, rsi
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov rsi, r9
+ mov qword ptr [-96+128+rsp], r11
+ mov r8, r10
+ mov r12, rbx
+ mov r11, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rbx, r8
+ mov r10, r13
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-88+128+rsp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-80+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-72+128+rsp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ mov qword ptr [-64+128+rsp], rbx
+ mov r11, rax
+ mov rbx, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rsi, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov r13, r12
+ mov r11, rsi
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-56+128+rsp], r10
+ mov r10, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor r13, r13
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov r12, rdi
+ mov rax, r13
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov qword ptr [-48+128+rsp], rbx
+ mov r12, r11
+ mov rsi, r10
+ mov rbx, r9
+ mov r11, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov r13, rbx
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov r13, r9
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-40+128+rsp], r12
+ mov r8, rbx
+ mov rdi, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r10, r8
+ mov rbx, r11
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-32+128+rsp], rdi
+ mov rsi, rbx
+ mov r12, r10
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-24+128+rsp], r12
+ mov rdi, r10
+ mov rsi, rbx
+ mov r10, r9
+ mov dword ptr [8+rbp], 16
+ mov dword ptr [rbp], 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov qword ptr [-16+128+rsp], rdi
+ mov r8, r10
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov rax, qword ptr [16+rbp]
+ mov qword ptr [-8+128+rsp], rsi
+ mov qword ptr [128+rsp], r8
+ mov qword ptr [rax], r14
+ mov rbx, qword ptr [-112+128+rsp]
+ mov qword ptr [8+rax], rbx
+ mov rcx, qword ptr [-104+128+rsp]
+ mov qword ptr [16+rax], rcx
+ mov rdx, qword ptr [-96+128+rsp]
+ mov qword ptr [24+rax], rdx
+ mov r14, qword ptr [-88+128+rsp]
+ mov qword ptr [32+rax], r14
+ mov r13, qword ptr [-80+128+rsp]
+ mov qword ptr [40+rax], r13
+ mov r12, qword ptr [-72+128+rsp]
+ mov qword ptr [48+rax], r12
+ mov r11, qword ptr [-64+128+rsp]
+ mov qword ptr [56+rax], r11
+ mov r10, qword ptr [-56+128+rsp]
+ mov qword ptr [64+rax], r10
+ mov r9, qword ptr [-48+128+rsp]
+ mov qword ptr [72+rax], r9
+ mov r8, qword ptr [-40+128+rsp]
+ mov qword ptr [80+rax], r8
+ mov rdi, qword ptr [-32+128+rsp]
+ mov qword ptr [88+rax], rdi
+ mov rsi, qword ptr [-24+128+rsp]
+ mov qword ptr [96+rax], rsi
+ mov rbx, qword ptr [-16+128+rsp]
+ mov qword ptr [104+rax], rbx
+ mov rcx, qword ptr [-8+128+rsp]
+ mov qword ptr [112+rax], rcx
+ mov rdx, qword ptr [128+rsp]
+ mov qword ptr [120+rax], rdx
+ mov edx, dword ptr [8+rbp]
+ test edx, edx
+ je L192
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+rbp]
+ mov r14d, ecx
+ cmp qword ptr [rsi+r14*8], 0
+ jne L190
+ mov edx, ecx
+ ALIGN 16
+L191:
+ test edx, edx
+ mov ecx, edx
+ je L195
+ dec edx
+ mov r9d, edx
+ cmp qword ptr [rsi+r9*8], 0
+ je L191
+ mov dword ptr [8+rbp], ecx
+ mov edx, ecx
+L190:
+ test edx, edx
+ je L192
+ mov eax, dword ptr [rbp]
+ jmp L193
+
+L195:
+ mov dword ptr [8+rbp], edx
+L192:
+ xor eax, eax
+L193:
+ mov dword ptr [rbp], eax
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+ pop r13
+ pop r14
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_8 ENDP
+
+
+; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+
+ ALIGN 16
+s_mp_sqr_comba_16 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r9d, r9d
+ mov r8, r9
+ mov r11, r9
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 256+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-288+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-280+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r10, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-272+rbp], rsi
+ mov rdi, r10
+ mov rsi, r9
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov rbx, r11
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov r11, r9
+ mov qword ptr [-264+rbp], r10
+ mov r8, rbx
+ mov r13, r12
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r10, r12
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-256+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-248+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-240+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-232+rbp], rbx
+ mov rbx, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-224+rbp], r10
+ mov rsi, r12
+ mov r10, rbx
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-216+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-208+rbp], r10
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-200+rbp], r11
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rdx, rdi
+ mov r11, rsi
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ mov rbx, rdx
+ mov r13, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-192+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-184+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r10
+ mov r11, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r10, r13
+ mov rbx, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r9
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r10
+ mov r10, r9
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r12, rdi
+ mov rax, rsi
+ mov rsi, r9
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov r12, r9
+ mov qword ptr [-88+rbp], rbx
+ mov r13, r11
+ mov r11, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ mov rdi, r12
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ mov rbx, r11
+ mov r10, rdi
+ mov r11, r9
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-80+rbp], r13
+ mov r8, r10
+ mov r10, rbx
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ mov r12, r8
+ mov rbx, rsi
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov qword ptr [-72+rbp], r10
+ mov r13, rbx
+ mov rbx, r12
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ mov r12, r11
+ mov r10, r13
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add rbx, rax
+ adc r10, rdx
+ adc r12, 0
+ mov qword ptr [-64+rbp], rbx
+ mov rdi, r10
+ mov rbx, r9
+ mov rsi, r12
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, rbx
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 32
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L232
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r9d, ecx
+ cmp qword ptr [rsi+r9*8], 0
+ jne L230
+ mov edx, ecx
+ ALIGN 16
+L231:
+ test edx, edx
+ mov ecx, edx
+ je L235
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L231
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L230:
+ test edx, edx
+ je L232
+ mov eax, dword ptr [r14]
+ jmp L233
+
+L235:
+ mov dword ptr [8+r14], edx
+L232:
+ xor eax, eax
+L233:
+ mov dword ptr [r14], eax
+ add rsp, 256+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_16 ENDP
+
+
+; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_32 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r10d, r10d
+ mov r8, r10
+ mov r11, r10
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r10
+ push r13
+ mov r13, r10
+ push r12
+ mov r12, r10
+ push rbx
+ mov rbx, r10
+ sub rsp, 512+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-544+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-536+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r9, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-528+rbp], rsi
+ mov rdi, r9
+ mov rsi, r10
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov r13, r11
+ mov rdi, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-520+rbp], r9
+ mov r8, r13
+ mov r13, r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r9, r12
+ mov r8, r10
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-512+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ mov qword ptr [-504+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-496+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-488+rbp], rbx
+ mov rbx, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-480+rbp], r9
+ mov rsi, r12
+ mov r9, rbx
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-472+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-464+rbp], r9
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-456+rbp], r11
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, rdi
+ mov r11, rsi
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ mov rbx, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-448+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-440+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r13
+ mov rbx, rdi
+ mov r13, rsi
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rsi, r11
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-432+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-424+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [128+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-416+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [136+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-408+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [144+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-400+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [152+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-392+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [160+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-384+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [168+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-376+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [176+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-368+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [184+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov rdi, rdx
+ mov qword ptr [-360+rbp], r12
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [192+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rbx, r8
+ mov rax, rdi
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov r8, rbx
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r11
+ mov qword ptr [-352+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [200+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-344+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [208+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-336+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [216+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-328+rbp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [224+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-320+rbp], r9
+ mov rbx, r13
+ mov r9, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [232+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ mov qword ptr [-312+rbp], r12
+ mov r13, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [240+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov r11, r8
+ mov rdx, rdi
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ mov r9, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov r8, r11
+ mov r13, rdx
+ mov rdi, r9
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-304+rbp], rbx
+ mov rbx, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ mov qword ptr [-296+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov r11, r8
+ mov rax, rdi
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ mov r9, rax
+ mov r8, r11
+ mov rax, qword ptr [128+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r9
+ mov qword ptr [-288+rbp], rbx
+ mov r9, r13
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-280+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [136+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-272+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-264+rbp], r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [144+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-256+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ mov qword ptr [-248+rbp], r12
+ mov r13, r11
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov rdx, rsi
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ mov r11, r12
+ mov r8, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov rdi, rbx
+ mov r13, rdx
+ mov rsi, r11
+ mov rax, qword ptr [152+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-240+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-232+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [160+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-224+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-216+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [168+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-208+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-200+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [176+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-192+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-184+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [184+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov rax, rsi
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov rdi, rbx
+ mov rbx, r10
+ mov rax, qword ptr [192+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ mov rax, rdx
+ mov rdi, r13
+ mov rsi, r12
+ mov r11, rax
+ mov r12, r10
+ mov rax, qword ptr [200+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [208+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r10
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r10
+ mov rax, qword ptr [216+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov r12, r10
+ mov rax, qword ptr [224+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r9
+ mov r9, r10
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov rax, rsi
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ mov qword ptr [-88+rbp], rbx
+ mov rsi, r11
+ mov r8, r9
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [248+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ mov r11, r12
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r13, r8
+ mov rbx, r11
+ mov rax, qword ptr [232+rcx]
+ mul rax
+ add rsi, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-80+rbp], rsi
+ mov r12, rbx
+ mov rdi, r13
+ mov r13, r10
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r9, r12
+ mov r12, r13
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [240+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ mov qword ptr [-72+rbp], rdi
+ mov r11, r9
+ mov rbx, r12
+ mov r9, r10
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [248+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov r13, rbx
+ mov rbx, r9
+ mov r9, r10
+ mov rax, qword ptr [240+rcx]
+ mul rax
+ add r11, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-64+rbp], r11
+ mov rdi, r13
+ mov rsi, rbx
+ mov rax, qword ptr [240+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, r9
+ mov rax, qword ptr [248+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r10, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 64
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-544+rbp]
+; mov edx, 512
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-544+rbp]
+ mov r8d, 512
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L304
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r10d, ecx
+ cmp qword ptr [rsi+r10*8], 0
+ jne L302
+ mov edx, ecx
+ ALIGN 16
+L303:
+ test edx, edx
+ mov ecx, edx
+ je L307
+ dec edx
+ mov eax, edx
+ cmp qword ptr [rsi+rax*8], 0
+ je L303
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L302:
+ test edx, edx
+ je L304
+ mov eax, dword ptr [r14]
+ jmp L305
+
+L307:
+ mov dword ptr [8+r14], edx
+L304:
+ xor eax, eax
+L305:
+ mov dword ptr [r14], eax
+ add rsp, 512+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_32 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
new file mode 100644
index 0000000000..a5181df332
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
@@ -0,0 +1,16097 @@
+//* TomsFastMath, a fast ISO C bignum library.
+/ *
+/ * This project is meant to fill in where LibTomMath
+/ * falls short. That is speed ;-)
+/ *
+/ * This project is public domain and free for all purposes.
+/ *
+/ * Tom St Denis, tomstdenis@iahu.ca
+/ */
+
+//*
+/ * The source file from which this assembly was derived
+/ * comes from TFM v0.03, which has the above license.
+/ * This source was compiled with an unnamed compiler at
+/ * the highest optimization level. Afterwards, the
+/ * trailing .section was removed because it causes errors
+/ * in the Studio 10 compiler on AMD 64.
+/ */
+
+ .file "mp_comba.c"
+ .text
+ .align 16
+.globl s_mp_mul_comba_4
+ .type s_mp_mul_comba_4, @function
+s_mp_mul_comba_4:
+.LFB2:
+ pushq %r12
+.LCFI0:
+ pushq %rbp
+.LCFI1:
+ pushq %rbx
+.LCFI2:
+ movq 16(%rdi), %r9
+ movq %rdx, %rbx
+ movq 16(%rsi), %rdx
+ movq (%r9), %rax
+ movq %rax, -64(%rsp)
+ movq 8(%r9), %r8
+ movq %r8, -56(%rsp)
+ movq 16(%r9), %rbp
+ movq %rbp, -48(%rsp)
+ movq 24(%r9), %r12
+ movq %r12, -40(%rsp)
+ movq (%rdx), %rcx
+ movq %rcx, -32(%rsp)
+ movq 8(%rdx), %r10
+ movq %r10, -24(%rsp)
+ movq 16(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -16(%rsp)
+ movq 16(%rbx), %r11
+ movq 24(%rdx), %rax
+ movq %rax, -8(%rsp)
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -48(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -40(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -40(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 56(%r11)
+ movl $8, 8(%rbx)
+ jne .L9
+ .align 16
+.L18:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L9
+ leal -2(%rdx), %r10d
+ cmpq $0, (%r11,%r10,8)
+ je .L18
+.L9:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE2:
+ .size s_mp_mul_comba_4, .-s_mp_mul_comba_4
+ .align 16
+.globl s_mp_mul_comba_8
+ .type s_mp_mul_comba_8, @function
+s_mp_mul_comba_8:
+.LFB3:
+ pushq %r12
+.LCFI3:
+ pushq %rbp
+.LCFI4:
+ pushq %rbx
+.LCFI5:
+ movq %rdx, %rbx
+ subq $8, %rsp
+.LCFI6:
+ movq 16(%rdi), %rdx
+ movq (%rdx), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rdx), %r11
+ movq %r11, -72(%rsp)
+ movq 56(%rdx), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, -64(%rsp)
+ movq (%rdx), %r8
+ movq %r8, -56(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -48(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -40(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -32(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -24(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -16(%rsp)
+ movq 48(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -8(%rsp)
+ movq 16(%rbx), %r11
+ movq 56(%rdx), %rax
+ movq %rax, (%rsp)
+/APP
+ movq -120(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 120(%r11)
+ movl $16, 8(%rbx)
+ jne .L35
+ .align 16
+.L43:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L35
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L43
+.L35:
+ movl 8(%rbx), %r11d
+ xorl %edx, %edx
+ testl %r11d, %r11d
+ cmovne %esi, %edx
+ movl %edx, (%rbx)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE3:
+ .size s_mp_mul_comba_8, .-s_mp_mul_comba_8
+ .align 16
+.globl s_mp_mul_comba_16
+ .type s_mp_mul_comba_16, @function
+s_mp_mul_comba_16:
+.LFB4:
+ pushq %r12
+.LCFI7:
+ pushq %rbp
+.LCFI8:
+ pushq %rbx
+.LCFI9:
+ movq %rdx, %rbx
+ subq $136, %rsp
+.LCFI10:
+ movq 16(%rdi), %rax
+ movq (%rax), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rax), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rax), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rax), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rax), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rax), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rax), %rdx
+ movq %rdx, -72(%rsp)
+ movq 56(%rax), %r11
+ movq %r11, -64(%rsp)
+ movq 64(%rax), %r8
+ movq %r8, -56(%rsp)
+ movq 72(%rax), %rbp
+ movq %rbp, -48(%rsp)
+ movq 80(%rax), %r9
+ movq %r9, -40(%rsp)
+ movq 88(%rax), %r12
+ movq %r12, -32(%rsp)
+ movq 96(%rax), %rcx
+ movq %rcx, -24(%rsp)
+ movq 104(%rax), %r10
+ movq %r10, -16(%rsp)
+ movq 112(%rax), %rdx
+ movq %rdx, -8(%rsp)
+ movq 120(%rax), %r11
+ movq %r11, (%rsp)
+ movq 16(%rsi), %r11
+ movq (%r11), %r8
+ movq %r8, 8(%rsp)
+ movq 8(%r11), %rbp
+ movq %rbp, 16(%rsp)
+ movq 16(%r11), %r9
+ movq %r9, 24(%rsp)
+ movq 24(%r11), %r12
+ movq %r12, 32(%rsp)
+ movq 32(%r11), %rcx
+ movq %rcx, 40(%rsp)
+ movq 40(%r11), %r10
+ movq %r10, 48(%rsp)
+ movq 48(%r11), %rdx
+ movq %rdx, 56(%rsp)
+ movq 56(%r11), %rax
+ movq %rax, 64(%rsp)
+ movq 64(%r11), %r8
+ movq %r8, 72(%rsp)
+ movq 72(%r11), %rbp
+ movq %rbp, 80(%rsp)
+ movq 80(%r11), %r9
+ movq %r9, 88(%rsp)
+ movq 88(%r11), %r12
+ movq %r12, 96(%rsp)
+ movq 96(%r11), %rcx
+ movq %rcx, 104(%rsp)
+ movq 104(%r11), %r10
+ movq %r10, 112(%rsp)
+ movq 112(%r11), %rdx
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %rdx, 120(%rsp)
+ movq 120(%r11), %rax
+ movq %rax, 128(%rsp)
+ movq 16(%rbx), %r11
+/APP
+ movq -120(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -32(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -24(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -16(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -8(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 120(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 128(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 136(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 144(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 152(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 160(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 168(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 176(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -56(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 184(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -48(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 192(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -40(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 200(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -32(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 208(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -24(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 216(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -16(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 224(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -8(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq (%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 232(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq (%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 240(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 248(%r11)
+ movl $32, 8(%rbx)
+ jne .L76
+ .align 16
+.L84:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L76
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L84
+.L76:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ addq $136, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE4:
+ .size s_mp_mul_comba_16, .-s_mp_mul_comba_16
+ .align 16
+.globl s_mp_mul_comba_32
+ .type s_mp_mul_comba_32, @function
+s_mp_mul_comba_32:
+.LFB5:
+ pushq %rbp
+.LCFI11:
+ movq %rsp, %rbp
+.LCFI12:
+ pushq %r13
+.LCFI13:
+ movq %rdx, %r13
+ movl $256, %edx
+ pushq %r12
+.LCFI14:
+ movq %rsi, %r12
+ pushq %rbx
+.LCFI15:
+ movq %rdi, %rbx
+ subq $520, %rsp
+.LCFI16:
+ movq 16(%rdi), %rsi
+ leaq -544(%rbp), %rdi
+ call memcpy@PLT
+ movq 16(%r12), %rsi
+ leaq -288(%rbp), %rdi
+ movl $256, %edx
+ call memcpy@PLT
+ movq 16(%r13), %r9
+ xorl %r8d, %r8d
+ movq %r8, %rsi
+ movq %r8, %rdi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, (%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %r11
+/APP
+ movq -536(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, 8(%r9)
+ movq %r11, %rdi
+ movq %r8, %r11
+/APP
+ movq -544(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -528(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 16(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -520(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 24(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -512(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 32(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -504(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 40(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -496(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 48(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -488(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 56(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -480(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 64(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -472(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 72(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -464(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 80(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -456(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 88(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -448(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 96(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 104(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -432(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 112(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -424(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 120(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -416(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 128(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -408(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 136(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -400(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 144(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -392(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 152(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -384(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 160(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -376(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 168(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -368(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 176(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -360(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 184(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -352(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 192(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -344(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 200(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -336(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 208(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -328(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 216(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -320(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 224(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -312(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 232(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -304(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 240(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 248(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 256(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -528(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 264(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -520(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 272(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -512(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 280(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -504(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 288(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -496(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 296(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -488(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 304(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -480(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 312(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -472(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 320(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -464(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 328(%r9)
+ movq %r11, %rdi
+ movq %r10, %r11
+ movq %r8, %r10
+/APP
+ movq -456(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -448(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -440(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -432(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -424(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -416(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -408(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -400(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -392(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -384(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -376(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -368(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -360(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -352(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -344(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -336(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -328(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -320(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -312(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -304(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -296(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r11, 336(%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -448(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %rcx
+/APP
+ movq -440(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rsi, %r11
+ movq %rcx, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%r11
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, 344(%r9)
+ movq %r11, %rcx
+ movq %r10, %rdi
+ movq %r8, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rsi
+/APP
+ movq -432(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 352(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -432(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 360(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -424(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 368(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -416(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 376(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -408(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 384(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -400(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 392(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -392(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 400(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -384(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 408(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -376(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 416(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -368(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 424(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -360(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 432(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -352(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 440(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -344(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 448(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -336(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 456(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -328(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 464(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -320(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 472(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -312(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 480(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -304(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rcx, %r11
+ movq %rdi, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 488(%r9)
+ movq %r10, %rcx
+ movq %r11, %rsi
+/APP
+ movq -296(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rsi
+ adcq $0,%r8
+
+/NO_APP
+ movq %rcx, 496(%r9)
+ movl (%r12), %ecx
+ xorl (%rbx), %ecx
+ testq %rsi, %rsi
+ movq %rsi, 504(%r9)
+ movl $64, 8(%r13)
+ jne .L149
+ .align 16
+.L157:
+ movl 8(%r13), %edx
+ leal -1(%rdx), %ebx
+ testl %ebx, %ebx
+ movl %ebx, 8(%r13)
+ je .L149
+ leal -2(%rdx), %r12d
+ cmpq $0, (%r9,%r12,8)
+ je .L157
+.L149:
+ movl 8(%r13), %r9d
+ xorl %edx, %edx
+ testl %r9d, %r9d
+ cmovne %ecx, %edx
+ movl %edx, (%r13)
+ addq $520, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ leave
+ ret
+.LFE5:
+ .size s_mp_mul_comba_32, .-s_mp_mul_comba_32
+ .align 16
+.globl s_mp_sqr_comba_4
+ .type s_mp_sqr_comba_4, @function
+s_mp_sqr_comba_4:
+.LFB6:
+ pushq %rbp
+.LCFI17:
+ movq %rsi, %r11
+ xorl %esi, %esi
+ movq %rsi, %r10
+ movq %rsi, %rbp
+ movq %rsi, %r8
+ pushq %rbx
+.LCFI18:
+ movq %rsi, %rbx
+ movq 16(%rdi), %rcx
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, -72(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+
+/NO_APP
+ movq %rbp, %rbx
+ movq %r8, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rdi, -56(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %r8
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r9, %rbx
+ movq %rdi, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, -48(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %rdi
+ movq %rsi, %r8
+ movl $8, 8(%r11)
+ movl $0, (%r11)
+/APP
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbx
+ movq %r8, %rbp
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %rax
+ movq %rdi, -40(%rsp)
+ movq %rbx, %rbp
+ movq %rax, %rdi
+ movq %rsi, %rbx
+/APP
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbp, -32(%rsp)
+ movq %rbx, %r9
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%rsi
+
+/NO_APP
+ movq 16(%r11), %rdx
+ movq %rdi, -24(%rsp)
+ movq %r9, -16(%rsp)
+ movq %r10, (%rdx)
+ movq -64(%rsp), %r8
+ movq %r8, 8(%rdx)
+ movq -56(%rsp), %rbp
+ movq %rbp, 16(%rdx)
+ movq -48(%rsp), %rdi
+ movq %rdi, 24(%rdx)
+ movq -40(%rsp), %rsi
+ movq %rsi, 32(%rdx)
+ movq -32(%rsp), %rbx
+ movq %rbx, 40(%rdx)
+ movq -24(%rsp), %rcx
+ movq %rcx, 48(%rdx)
+ movq -16(%rsp), %rax
+ movq %rax, 56(%rdx)
+ movl 8(%r11), %edx
+ testl %edx, %edx
+ je .L168
+ leal -1(%rdx), %ecx
+ movq 16(%r11), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L166
+ movl %ecx, %edx
+ .align 16
+.L167:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L171
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L167
+ movl %ecx, 8(%r11)
+ movl %ecx, %edx
+.L166:
+ testl %edx, %edx
+ je .L168
+ popq %rbx
+ popq %rbp
+ movl (%r11), %eax
+ movl %eax, (%r11)
+ ret
+.L171:
+ movl %edx, 8(%r11)
+ .align 16
+.L168:
+ popq %rbx
+ popq %rbp
+ xorl %eax, %eax
+ movl %eax, (%r11)
+ ret
+.LFE6:
+ .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4
+ .align 16
+.globl s_mp_sqr_comba_8
+ .type s_mp_sqr_comba_8, @function
+s_mp_sqr_comba_8:
+.LFB7:
+ pushq %r14
+.LCFI19:
+ xorl %r9d, %r9d
+ movq %r9, %r14
+ movq %r9, %r10
+ pushq %r13
+.LCFI20:
+ movq %r9, %r13
+ pushq %r12
+.LCFI21:
+ movq %r9, %r12
+ pushq %rbp
+.LCFI22:
+ movq %rsi, %rbp
+ movq %r9, %rsi
+ pushq %rbx
+.LCFI23:
+ movq %r9, %rbx
+ subq $8, %rsp
+.LCFI24:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r14
+ adcq %rdx,%rbx
+ adcq $0,%r12
+
+/NO_APP
+ movq %r14, -120(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+
+/NO_APP
+ movq %rbx, -112(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %rbx
+ movq %r13, %r10
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -104(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %r10
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r9, %rsi
+ movq %r11, -96(%rsp)
+ movq %r10, %r8
+ movq %rbx, %r12
+ movq %r9, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r13, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -88(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -80(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -72(%rsp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+ movq %rax, %r11
+ movq %r9, %rbx
+/APP
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rsi
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %r13
+ movq %rsi, %r11
+/APP
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -56(%rsp)
+ movq %r9, %r10
+/APP
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %r13,%r13
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r13, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %rbx, -48(%rsp)
+ movq %r11, %r12
+ movq %r10, %rsi
+ movq %r9, %rbx
+ movq %r9, %r11
+/APP
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbx, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+ movq %r9, %r13
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -40(%rsp)
+ movq %rbx, %r8
+ movq %r10, %rdi
+/APP
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r10
+ movq %r11, %rbx
+/APP
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -32(%rsp)
+ movq %rbx, %rsi
+ movq %r10, %r12
+/APP
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -24(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %rsi
+ movq %r9, %r10
+ movl $16, 8(%rbp)
+ movl $0, (%rbp)
+/APP
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, -16(%rsp)
+ movq %r10, %r8
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq 16(%rbp), %rax
+ movq %rsi, -8(%rsp)
+ movq %r8, (%rsp)
+ movq %r14, (%rax)
+ movq -112(%rsp), %rbx
+ movq %rbx, 8(%rax)
+ movq -104(%rsp), %rcx
+ movq %rcx, 16(%rax)
+ movq -96(%rsp), %rdx
+ movq %rdx, 24(%rax)
+ movq -88(%rsp), %r14
+ movq %r14, 32(%rax)
+ movq -80(%rsp), %r13
+ movq %r13, 40(%rax)
+ movq -72(%rsp), %r12
+ movq %r12, 48(%rax)
+ movq -64(%rsp), %r11
+ movq %r11, 56(%rax)
+ movq -56(%rsp), %r10
+ movq %r10, 64(%rax)
+ movq -48(%rsp), %r9
+ movq %r9, 72(%rax)
+ movq -40(%rsp), %r8
+ movq %r8, 80(%rax)
+ movq -32(%rsp), %rdi
+ movq %rdi, 88(%rax)
+ movq -24(%rsp), %rsi
+ movq %rsi, 96(%rax)
+ movq -16(%rsp), %rbx
+ movq %rbx, 104(%rax)
+ movq -8(%rsp), %rcx
+ movq %rcx, 112(%rax)
+ movq (%rsp), %rdx
+ movq %rdx, 120(%rax)
+ movl 8(%rbp), %edx
+ testl %edx, %edx
+ je .L192
+ leal -1(%rdx), %ecx
+ movq 16(%rbp), %rsi
+ mov %ecx, %r14d
+ cmpq $0, (%rsi,%r14,8)
+ jne .L190
+ movl %ecx, %edx
+ .align 16
+.L191:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L195
+ decl %edx
+ mov %edx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ je .L191
+ movl %ecx, 8(%rbp)
+ movl %ecx, %edx
+.L190:
+ testl %edx, %edx
+ je .L192
+ movl (%rbp), %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.L195:
+ movl %edx, 8(%rbp)
+ .align 16
+.L192:
+ xorl %eax, %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.LFE7:
+ .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8
+ .align 16
+.globl s_mp_sqr_comba_16
+ .type s_mp_sqr_comba_16, @function
+s_mp_sqr_comba_16:
+.LFB8:
+ pushq %rbp
+.LCFI25:
+ xorl %r9d, %r9d
+ movq %r9, %r8
+ movq %r9, %r11
+ movq %rsp, %rbp
+.LCFI26:
+ pushq %r14
+.LCFI27:
+ movq %rsi, %r14
+ movq %r9, %rsi
+ pushq %r13
+.LCFI28:
+ movq %r9, %r13
+ pushq %r12
+.LCFI29:
+ movq %r9, %r12
+ pushq %rbx
+.LCFI30:
+ movq %r9, %rbx
+ subq $256, %rsp
+.LCFI31:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -288(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -280(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r10
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -272(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rsi
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %rbx
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, %r11
+ movq %r10, -264(%rbp)
+ movq %rbx, %r8
+ movq %r12, %r13
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -256(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -248(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -240(%rbp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -232(%rbp)
+ movq %r9, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r10, -224(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r10
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -216(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -208(%rbp)
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -200(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rdx
+ movq %rsi, %r11
+/APP
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+
+/NO_APP
+ movq %rdx, %rbx
+ movq %rax, %r13
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r10, -192(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -176(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+/APP
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -160(%rbp)
+ movq %r9, %r11
+/APP
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r13, %r10
+ movq %r9, %rbx
+/APP
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -144(%rbp)
+ movq %r11, %r10
+/APP
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -128(%rbp)
+ movq %r11, %r10
+/APP
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r9, %rbx
+/APP
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -112(%rbp)
+ movq %r11, %r10
+/APP
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -96(%rbp)
+ movq %r9, %r10
+/APP
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r12
+ movq %rsi, %rax
+ movq %r9, %rsi
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %r9, %r12
+ movq %rbx, -88(%rbp)
+ movq %r11, %r13
+ movq %r10, %r11
+/APP
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rdi
+/APP
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r11, %rbx
+ movq %rdi, %r10
+ movq %r9, %r11
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -80(%rbp)
+ movq %r10, %r8
+ movq %rbx, %r10
+/APP
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %r12
+ movq %rsi, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -72(%rbp)
+ movq %rbx, %r13
+ movq %r12, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %r12
+ movq %r13, %r10
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r10
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -64(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rbx
+ movq %r12, %rsi
+/APP
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %rbx, %r8
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -288(%rbp), %rsi
+ movl $256, %edx
+ movq %r8, -40(%rbp)
+ movl $32, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L232
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ jne .L230
+ movl %ecx, %edx
+ .align 16
+.L231:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L235
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L231
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L230:
+ testl %edx, %edx
+ je .L232
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L235:
+ movl %edx, 8(%r14)
+ .align 16
+.L232:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE8:
+ .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16
+ .align 16
+.globl s_mp_sqr_comba_32
+ .type s_mp_sqr_comba_32, @function
+s_mp_sqr_comba_32:
+.LFB9:
+ pushq %rbp
+.LCFI32:
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r11
+ movq %rsp, %rbp
+.LCFI33:
+ pushq %r14
+.LCFI34:
+ movq %rsi, %r14
+ movq %r10, %rsi
+ pushq %r13
+.LCFI35:
+ movq %r10, %r13
+ pushq %r12
+.LCFI36:
+ movq %r10, %r12
+ pushq %rbx
+.LCFI37:
+ movq %r10, %rbx
+ subq $512, %rsp
+.LCFI38:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -544(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -536(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r9
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -528(%rbp)
+ movq %r9, %rdi
+ movq %r10, %rsi
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %r13
+ movq %r10, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -520(%rbp)
+ movq %r13, %r8
+ movq %r12, %r13
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r9
+ movq %r10, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %r13, -512(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -504(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -496(%rbp)
+ movq %r11, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -488(%rbp)
+ movq %r10, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r9, -480(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r9
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -472(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, -464(%rbp)
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -456(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rax
+ movq %rsi, %r11
+/APP
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+
+/NO_APP
+ movq %rax, %rbx
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r9, -448(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -440(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %rdi, %rbx
+ movq %rsi, %r13
+/APP
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %rdi
+ movq %r11, %rsi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -432(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -424(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 128(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -416(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 136(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -408(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 144(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -400(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 152(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -392(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 160(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -384(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 168(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -376(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 176(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -368(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 184(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, -360(%rbp)
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 192(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %r8
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rdi
+ movq %r9, -352(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 200(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -344(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 208(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -336(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 216(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -328(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 224(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -320(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 232(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+
+/NO_APP
+ movq %r12, -312(%rbp)
+ movq %r9, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 240(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %r8, %r11
+ movq %rdi, %rdx
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r9
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %r11, %r8
+ movq %rdx, %r13
+ movq %r9, %rdi
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, -304(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -296(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %r11
+ movq %rdi, %rax
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r9
+ movq %r11, %r8
+/APP
+ movq 128(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, %rdi
+ movq %rbx, -288(%rbp)
+ movq %r13, %r9
+/APP
+ movq 16(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 24(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -280(%rbp)
+ movq %r10, %r12
+/APP
+ movq 24(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 136(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -272(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -264(%rbp)
+/APP
+ movq 40(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 144(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -256(%rbp)
+ movq %r13, %r9
+/APP
+ movq 48(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+
+/NO_APP
+ movq %r12, -248(%rbp)
+ movq %r11, %r13
+/APP
+ movq 56(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rsi, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+
+/NO_APP
+ movq %r12, %r11
+ movq %rdx, %r8
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %rbx, %rdi
+ movq %rdx, %r13
+ movq %r11, %rsi
+/APP
+ movq 152(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -240(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 64(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -232(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 72(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 160(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -224(%rbp)
+ movq %r13, %r9
+/APP
+ movq 80(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -216(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 168(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -208(%rbp)
+ movq %r13, %r9
+/APP
+ movq 96(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -200(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 176(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -192(%rbp)
+ movq %r13, %r9
+/APP
+ movq 112(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 176(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 184(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -176(%rbp)
+ movq %r13, %r9
+/APP
+ movq 128(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %rax
+/APP
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %rdi
+ movq %r10, %rbx
+/APP
+ movq 192(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -160(%rbp)
+ movq %r13, %r9
+/APP
+ movq 144(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 152(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rax
+ movq %r13, %rdi
+ movq %r12, %rsi
+ movq %rax, %r11
+ movq %r10, %r12
+/APP
+ movq 200(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -144(%rbp)
+ movq %r11, %r9
+/APP
+ movq 160(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 168(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 208(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -128(%rbp)
+ movq %r11, %r9
+/APP
+ movq 176(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r10, %rbx
+/APP
+ movq 184(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r10, %r12
+/APP
+ movq 216(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -112(%rbp)
+ movq %r11, %r9
+/APP
+ movq 192(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 200(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+ movq %r10, %r12
+/APP
+ movq 224(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -96(%rbp)
+ movq %r10, %r9
+/APP
+ movq 208(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 224(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+
+/NO_APP
+ movq %rbx, -88(%rbp)
+ movq %r11, %rsi
+ movq %r9, %r8
+/APP
+ movq 216(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %r11
+/APP
+ movq 224(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r13
+ movq %r11, %rbx
+/APP
+ movq 232(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rsi, -80(%rbp)
+ movq %rbx, %r12
+ movq %r13, %rdi
+ movq %r10, %r13
+/APP
+ movq 224(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %r9
+ movq %r13, %r12
+/APP
+ movq 232(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %rdi, -72(%rbp)
+ movq %r9, %r11
+ movq %r12, %rbx
+ movq %r10, %r9
+/APP
+ movq 232(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rbx, %r13
+ movq %r9, %rbx
+ movq %r10, %r9
+/APP
+ movq 240(%rcx),%rax
+ mulq %rax
+ addq %rax,%r11
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r11, -64(%rbp)
+ movq %r13, %rdi
+ movq %rbx, %rsi
+/APP
+ movq 240(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %r9, %r8
+/APP
+ movq 248(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -544(%rbp), %rsi
+ movl $512, %edx
+ movq %r8, -40(%rbp)
+ movl $64, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L304
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L302
+ movl %ecx, %edx
+ .align 16
+.L303:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L307
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L303
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L302:
+ testl %edx, %edx
+ je .L304
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L307:
+ movl %edx, 8(%r14)
+ .align 16
+.L304:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE9:
+ .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
new file mode 100644
index 0000000000..5be4da4bf2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_PRIV_H_
+#define _MP_GF2M_PRIV_H_
+
+#include "mpi-priv.h"
+
+extern const mp_digit mp_gf2m_sqr_tb[16];
+
+#if defined(MP_USE_UINT_DIGIT)
+#define MP_DIGIT_BITS 32
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 5
+#define MP_DIGIT_BITS_MASK 0x1f
+#else
+#define MP_DIGIT_BITS 64
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 6
+#define MP_DIGIT_BITS_MASK 0x3f
+#endif
+
+/* Platform-specific macros for fast binary polynomial squaring. */
+#if MP_DIGIT_BITS == 32
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#else
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#endif
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0);
+
+#endif /* _MP_GF2M_PRIV_H_ */
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c
new file mode 100644
index 0000000000..878b7cae8c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.c
@@ -0,0 +1,677 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mp_gf2m.h"
+#include "mp_gf2m-priv.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+const mp_digit mp_gf2m_sqr_tb[16] = {
+ 0, 1, 4, 5, 16, 17, 20, 21,
+ 64, 65, 68, 69, 80, 81, 84, 85
+};
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+#if MP_DIGIT_BITS == 32
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[8], top2b = a >> 30;
+ register mp_digit a1, a2, a4;
+
+ a1 = a & (0x3FFFFFFF);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+
+ s = tab[b & 0x7];
+ l = s;
+ s = tab[b >> 3 & 0x7];
+ l ^= s << 3;
+ h = s >> 29;
+ s = tab[b >> 6 & 0x7];
+ l ^= s << 6;
+ h ^= s >> 26;
+ s = tab[b >> 9 & 0x7];
+ l ^= s << 9;
+ h ^= s >> 23;
+ s = tab[b >> 12 & 0x7];
+ l ^= s << 12;
+ h ^= s >> 20;
+ s = tab[b >> 15 & 0x7];
+ l ^= s << 15;
+ h ^= s >> 17;
+ s = tab[b >> 18 & 0x7];
+ l ^= s << 18;
+ h ^= s >> 14;
+ s = tab[b >> 21 & 0x7];
+ l ^= s << 21;
+ h ^= s >> 11;
+ s = tab[b >> 24 & 0x7];
+ l ^= s << 24;
+ h ^= s >> 8;
+ s = tab[b >> 27 & 0x7];
+ l ^= s << 27;
+ h ^= s >> 5;
+ s = tab[b >> 30];
+ l ^= s << 30;
+ h ^= s >> 2;
+
+ /* compensate for the top two bits of a */
+
+ if (top2b & 01) {
+ l ^= b << 30;
+ h ^= b >> 2;
+ }
+ if (top2b & 02) {
+ l ^= b << 31;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#else
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[16], top3b = a >> 61;
+ register mp_digit a1, a2, a4, a8;
+
+ a1 = a & (0x1FFFFFFFFFFFFFFFULL);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+ a8 = a4 << 1;
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+ tab[8] = a8;
+ tab[9] = a1 ^ a8;
+ tab[10] = a2 ^ a8;
+ tab[11] = a1 ^ a2 ^ a8;
+ tab[12] = a4 ^ a8;
+ tab[13] = a1 ^ a4 ^ a8;
+ tab[14] = a2 ^ a4 ^ a8;
+ tab[15] = a1 ^ a2 ^ a4 ^ a8;
+
+ s = tab[b & 0xF];
+ l = s;
+ s = tab[b >> 4 & 0xF];
+ l ^= s << 4;
+ h = s >> 60;
+ s = tab[b >> 8 & 0xF];
+ l ^= s << 8;
+ h ^= s >> 56;
+ s = tab[b >> 12 & 0xF];
+ l ^= s << 12;
+ h ^= s >> 52;
+ s = tab[b >> 16 & 0xF];
+ l ^= s << 16;
+ h ^= s >> 48;
+ s = tab[b >> 20 & 0xF];
+ l ^= s << 20;
+ h ^= s >> 44;
+ s = tab[b >> 24 & 0xF];
+ l ^= s << 24;
+ h ^= s >> 40;
+ s = tab[b >> 28 & 0xF];
+ l ^= s << 28;
+ h ^= s >> 36;
+ s = tab[b >> 32 & 0xF];
+ l ^= s << 32;
+ h ^= s >> 32;
+ s = tab[b >> 36 & 0xF];
+ l ^= s << 36;
+ h ^= s >> 28;
+ s = tab[b >> 40 & 0xF];
+ l ^= s << 40;
+ h ^= s >> 24;
+ s = tab[b >> 44 & 0xF];
+ l ^= s << 44;
+ h ^= s >> 20;
+ s = tab[b >> 48 & 0xF];
+ l ^= s << 48;
+ h ^= s >> 16;
+ s = tab[b >> 52 & 0xF];
+ l ^= s << 52;
+ h ^= s >> 12;
+ s = tab[b >> 56 & 0xF];
+ l ^= s << 56;
+ h ^= s >> 8;
+ s = tab[b >> 60];
+ l ^= s << 60;
+ h ^= s >> 4;
+
+ /* compensate for the top three bits of a */
+
+ if (top3b & 01) {
+ l ^= b << 61;
+ h ^= b >> 3;
+ }
+ if (top3b & 02) {
+ l ^= b << 62;
+ h ^= b >> 2;
+ }
+ if (top3b & 04) {
+ l ^= b << 63;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#endif
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit m1, m0;
+ /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
+ s_bmul_1x1(r + 3, r + 2, a1, b1);
+ s_bmul_1x1(r + 1, r, a0, b0);
+ s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
+ /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
+ r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
+ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
+}
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */
+ s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3];
+ zm[2] ^= r[2];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */
+ s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3] ^ r[7];
+ zm[2] ^= r[2] ^ r[6];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute addition of two binary polynomials a and b,
+ * store result in c; c could be a or b, a and b could be equal;
+ * c is the bitwise XOR of a and b.
+ */
+mp_err
+mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+ mp_size ix;
+ mp_size used_pa, used_pb;
+ mp_err res = MP_OKAY;
+
+ /* Add all digits up to the precision of b. If b had more
+ * precision than a initially, swap a, b first
+ */
+ if (MP_USED(a) >= MP_USED(b)) {
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used_pa = MP_USED(a);
+ used_pb = MP_USED(b);
+ } else {
+ pa = MP_DIGITS(b);
+ pb = MP_DIGITS(a);
+ used_pa = MP_USED(b);
+ used_pb = MP_USED(a);
+ }
+
+ /* Make sure c has enough precision for the output value */
+ MP_CHECKOK(s_mp_pad(c, used_pa));
+
+ /* Do word-by-word xor */
+ pc = MP_DIGITS(c);
+ for (ix = 0; ix < used_pb; ix++) {
+ (*pc++) = (*pa++) ^ (*pb++);
+ }
+
+ /* Finish the rest of digits until we're actually done */
+ for (; ix < used_pa; ++ix) {
+ *pc++ = *pa++;
+ }
+
+ MP_USED(c) = used_pa;
+ MP_SIGN(c) = ZPOS;
+ s_mp_clamp(c);
+
+CLEANUP:
+ return res;
+}
+
+#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1));
+
+/* Compute binary polynomial multiply d = a * b */
+static void
+s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ = a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d = carry;
+}
+
+/* Compute binary polynomial xor multiply accumulate d ^= a * b */
+static void
+s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ ^= a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d ^= carry;
+}
+
+/* Compute binary polynomial xor multiply c = a * b.
+ * All parameters may be identical.
+ */
+mp_err
+mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pb, b_i;
+ mp_int tmp;
+ mp_size ib, a_used, b_used;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&tmp) = 0;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, b));
+ b = &tmp;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b if b longer */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b)));
+
+ pb = MP_DIGITS(b);
+ s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ a_used = MP_USED(a);
+ b_used = MP_USED(b);
+ MP_USED(c) = a_used + b_used;
+ for (ib = 1; ib < b_used; ib++) {
+ b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + a_used) = b_i;
+ }
+
+ s_mp_clamp(c);
+
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute modular reduction of a and store result in r.
+ * r could be a.
+ * For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err
+mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ int j, k;
+ int n, dN, d0, d1;
+ mp_digit zz, *z, tmp;
+ mp_size used;
+ mp_err res = MP_OKAY;
+
+ /* The algorithm does the reduction in place in r,
+ * if a != r, copy a into r first so reduction can be done in r
+ */
+ if (a != r) {
+ MP_CHECKOK(mp_copy(a, r));
+ }
+ z = MP_DIGITS(r);
+
+ /* start reduction */
+ /*dN = p[0] / MP_DIGIT_BITS; */
+ dN = p[0] >> MP_DIGIT_BITS_LOG_2;
+ used = MP_USED(r);
+
+ for (j = used - 1; j > dN;) {
+
+ zz = z[j];
+ if (zz == 0) {
+ j--;
+ continue;
+ }
+ z[j] = 0;
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k] */
+ n = p[0] - p[k];
+ /*d0 = n % MP_DIGIT_BITS; */
+ d0 = n & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ /*n /= MP_DIGIT_BITS; */
+ n >>= MP_DIGIT_BITS_LOG_2;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* reducing component t^0 */
+ n = dN;
+ /*d0 = p[0] % MP_DIGIT_BITS;*/
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* final round of reduction */
+ while (j == dN) {
+
+ /* d0 = p[0] % MP_DIGIT_BITS; */
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ zz = z[dN] >> d0;
+ if (zz == 0)
+ break;
+ d1 = MP_DIGIT_BITS - d0;
+
+ /* clear up the top d1 bits */
+ if (d0) {
+ z[dN] = (z[dN] << d1) >> d1;
+ } else {
+ z[dN] = 0;
+ }
+ *z ^= zz; /* reduction t^0 component */
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k]*/
+ /* n = p[k] / MP_DIGIT_BITS; */
+ n = p[k] >> MP_DIGIT_BITS_LOG_2;
+ /* d0 = p[k] % MP_DIGIT_BITS; */
+ d0 = p[k] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[n] ^= (zz << d0);
+ tmp = zz >> d1;
+ if (d0 && tmp)
+ z[n + 1] ^= tmp;
+ }
+ }
+
+ s_mp_clamp(r);
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p,
+ * Store the result in r. r could be a or b; a could be b.
+ */
+mp_err
+mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
+{
+ mp_err res;
+
+ if (a == b)
+ return mp_bsqrmod(a, p, r);
+ if ((res = mp_bmul(a, b, r)) != MP_OKAY)
+ return res;
+ return mp_bmod(r, p, r);
+}
+
+/* Compute binary polynomial squaring c = a*a mod p .
+ * Parameter r and a can be identical.
+ */
+
+mp_err
+mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ mp_digit *pa, *pr, a_i;
+ mp_int tmp;
+ mp_size ia, a_used;
+ mp_err res;
+
+ ARGCHK(a != NULL && r != NULL, MP_BADARG);
+ MP_DIGITS(&tmp) = 0;
+
+ if (a == r) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ a = &tmp;
+ }
+
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+ MP_CHECKOK(s_mp_pad(r, 2 * USED(a)));
+
+ pa = MP_DIGITS(a);
+ pr = MP_DIGITS(r);
+ a_used = MP_USED(a);
+ MP_USED(r) = 2 * a_used;
+
+ for (ia = 0; ia < a_used; ia++) {
+ a_i = *pa++;
+ *pr++ = gf2m_SQR0(a_i);
+ *pr++ = gf2m_SQR1(a_i);
+ }
+
+ MP_CHECKOK(mp_bmod(r, p, r));
+ s_mp_clamp(r);
+ SIGN(r) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
+ * Store the result in r. r could be x or y, and x could equal y.
+ * Uses algorithm Modular_Division_GF(2^m) from
+ * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
+ * the Great Divide".
+ */
+int
+mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r)
+{
+ mp_int aa, bb, uu;
+ mp_int *a, *b, *u, *v;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&aa) = 0;
+ MP_DIGITS(&bb) = 0;
+ MP_DIGITS(&uu) = 0;
+
+ MP_CHECKOK(mp_init_copy(&aa, x));
+ MP_CHECKOK(mp_init_copy(&uu, y));
+ MP_CHECKOK(mp_init_copy(&bb, pp));
+ MP_CHECKOK(s_mp_pad(r, USED(pp)));
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+
+ a = &aa;
+ b = &bb;
+ u = &uu;
+ v = r;
+ /* reduce x and y mod p */
+ MP_CHECKOK(mp_bmod(a, p, a));
+ MP_CHECKOK(mp_bmod(u, p, u));
+
+ while (!mp_isodd(a)) {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ }
+
+ do {
+ if (mp_cmp_mag(b, a) > 0) {
+ MP_CHECKOK(mp_badd(b, a, b));
+ MP_CHECKOK(mp_badd(v, u, v));
+ do {
+ s_mp_div2(b);
+ if (mp_isodd(v)) {
+ MP_CHECKOK(mp_badd(v, pp, v));
+ }
+ s_mp_div2(v);
+ } while (!mp_isodd(b));
+ } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1))
+ break;
+ else {
+ MP_CHECKOK(mp_badd(a, b, a));
+ MP_CHECKOK(mp_badd(u, v, u));
+ do {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ } while (!mp_isodd(a));
+ }
+ } while (1);
+
+ MP_CHECKOK(mp_copy(u, r));
+
+CLEANUP:
+ mp_clear(&aa);
+ mp_clear(&bb);
+ mp_clear(&uu);
+ return res;
+}
+
+/* Convert the bit-string representation of a polynomial a into an array
+ * of integers corresponding to the bits with non-zero coefficient.
+ * Up to max elements of the array will be filled. Return value is total
+ * number of coefficients that would be extracted if array was large enough.
+ */
+int
+mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
+{
+ int i, j, k;
+ mp_digit top_bit, mask;
+
+ top_bit = 1;
+ top_bit <<= MP_DIGIT_BIT - 1;
+
+ for (k = 0; k < max; k++)
+ p[k] = 0;
+ k = 0;
+
+ for (i = MP_USED(a) - 1; i >= 0; i--) {
+ mask = top_bit;
+ for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
+ if (MP_DIGITS(a)[i] & mask) {
+ if (k < max)
+ p[k] = MP_DIGIT_BIT * i + j;
+ k++;
+ }
+ mask >>= 1;
+ }
+ }
+
+ return k;
+}
+
+/* Convert the coefficient array representation of a polynomial to a
+ * bit-string. The array must be terminated by 0.
+ */
+mp_err
+mp_barr2poly(const unsigned int p[], mp_int *a)
+{
+
+ mp_err res = MP_OKAY;
+ int i;
+
+ mp_zero(a);
+ for (i = 0; p[i] > 0; i++) {
+ MP_CHECKOK(mpl_set_bit(a, p[i], 1));
+ }
+ MP_CHECKOK(mpl_set_bit(a, 0, 1));
+
+CLEANUP:
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h
new file mode 100644
index 0000000000..ed2c85493c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_H_
+#define _MP_GF2M_H_
+
+#include "mpi.h"
+
+mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
+
+/* For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
+ mp_int *r);
+mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r);
+
+int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
+mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
+
+#endif /* _MP_GF2M_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c
new file mode 100644
index 0000000000..ddc21ec1cb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache.c
@@ -0,0 +1,788 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "prtypes.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes.
+ *
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "cpuid\n\t"
+ : "=a"(*eax),
+ "=b"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ int intrinsic_out[4];
+
+ __cpuid(intrinsic_out, op);
+ *eax = intrinsic_out[0];
+ *ebx = intrinsic_out[1];
+ *ecx = intrinsic_out[2];
+ *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ /* Some older processors don't fill the ecx register with cpuid, so clobber it
+ * before calling cpuid, so that there's no risk of picking random bits that
+ * erroneously indicate that absent CPU features are present.
+ * Also, GCC isn't smart enough to save the ebx PIC register on its own
+ * in this case, so do it by hand. Use edi to store ebx and pass the
+ * value returned in ebx from cpuid through edi. */
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "mov %%ebx,%%edi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx,%%edi\n\t"
+ : "=a"(*eax),
+ "=D"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm__("pushfl\n\t" /* get the flags */
+ "popl %0\n\t"
+ "movl %0,%1\n\t" /* save the original flags */
+ "xorl %2,%0\n\t" /* flip the bit */
+ "pushl %0\n\t" /* set the flags */
+ "popfl\n\t"
+ "pushfl\n\t" /* get the flags again (for return) */
+ "popl %0\n\t"
+ "pushl %1\n\t" /* restore the original flags */
+ "popfl\n\t"
+ : "=r"(changedFlags),
+ "=r"(originalFlags),
+ "=r"(flag)
+ : "2"(flag));
+ return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void
+freebl_cpuid(unsigned long op, unsigned long *Reax,
+ unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+ unsigned long Leax, Lebx, Lecx, Ledx;
+ __asm {
+ pushad
+ xor ecx,ecx
+ mov eax,op
+ wcpuid
+ mov Leax,eax
+ mov Lebx,ebx
+ mov Lecx,ecx
+ mov Ledx,edx
+ popad
+ }
+ *Reax = Leax;
+ *Rebx = Lebx;
+ *Recx = Lecx;
+ *Redx = Ledx;
+}
+
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm {
+ push eax
+ push ebx
+ pushfd /* get the flags */
+ pop eax
+ push eax /* save the flags on the stack */
+ mov originalFlags,eax /* save the original flags */
+ mov ebx,flag
+ xor eax,ebx /* flip the bit */
+ push eax /* set the flags */
+ popfd
+ pushfd /* get the flags again (for return) */
+ pop eax
+ popfd /* restore the original flags */
+ mov changedFlags,eax
+ pop ebx
+ pop eax
+ }
+ return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int
+is386()
+{
+ return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int
+is486()
+{
+ return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+ Cache_NONE = 0,
+ Cache_UNKNOWN = 1,
+ Cache_TLB = 2,
+ Cache_TLBi = 3,
+ Cache_TLBd = 4,
+ Cache_Trace = 5,
+ Cache_L1 = 6,
+ Cache_L1i = 7,
+ Cache_L1d = 8,
+ Cache_L2 = 9,
+ Cache_L2i = 10,
+ Cache_L2d = 11,
+ Cache_L3 = 12,
+ Cache_L3i = 13,
+ Cache_L3d = 14
+} CacheType;
+
+struct _cache {
+ CacheTypeEntry type;
+ unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+ /* 00 */ { Cache_NONE, 0 },
+ /* 01 */ { Cache_TLBi, 0 },
+ /* 02 */ { Cache_TLBi, 0 },
+ /* 03 */ { Cache_TLBd, 0 },
+ /* 04 */ {
+ Cache_TLBd,
+ },
+ /* 05 */ { Cache_UNKNOWN, 0 },
+ /* 06 */ { Cache_L1i, 32 },
+ /* 07 */ { Cache_UNKNOWN, 0 },
+ /* 08 */ { Cache_L1i, 32 },
+ /* 09 */ { Cache_UNKNOWN, 0 },
+ /* 0a */ { Cache_L1d, 32 },
+ /* 0b */ { Cache_UNKNOWN, 0 },
+ /* 0c */ { Cache_L1d, 32 },
+ /* 0d */ { Cache_UNKNOWN, 0 },
+ /* 0e */ { Cache_UNKNOWN, 0 },
+ /* 0f */ { Cache_UNKNOWN, 0 },
+ /* 10 */ { Cache_UNKNOWN, 0 },
+ /* 11 */ { Cache_UNKNOWN, 0 },
+ /* 12 */ { Cache_UNKNOWN, 0 },
+ /* 13 */ { Cache_UNKNOWN, 0 },
+ /* 14 */ { Cache_UNKNOWN, 0 },
+ /* 15 */ { Cache_UNKNOWN, 0 },
+ /* 16 */ { Cache_UNKNOWN, 0 },
+ /* 17 */ { Cache_UNKNOWN, 0 },
+ /* 18 */ { Cache_UNKNOWN, 0 },
+ /* 19 */ { Cache_UNKNOWN, 0 },
+ /* 1a */ { Cache_UNKNOWN, 0 },
+ /* 1b */ { Cache_UNKNOWN, 0 },
+ /* 1c */ { Cache_UNKNOWN, 0 },
+ /* 1d */ { Cache_UNKNOWN, 0 },
+ /* 1e */ { Cache_UNKNOWN, 0 },
+ /* 1f */ { Cache_UNKNOWN, 0 },
+ /* 20 */ { Cache_UNKNOWN, 0 },
+ /* 21 */ { Cache_UNKNOWN, 0 },
+ /* 22 */ { Cache_L3, 64 },
+ /* 23 */ { Cache_L3, 64 },
+ /* 24 */ { Cache_UNKNOWN, 0 },
+ /* 25 */ { Cache_L3, 64 },
+ /* 26 */ { Cache_UNKNOWN, 0 },
+ /* 27 */ { Cache_UNKNOWN, 0 },
+ /* 28 */ { Cache_UNKNOWN, 0 },
+ /* 29 */ { Cache_L3, 64 },
+ /* 2a */ { Cache_UNKNOWN, 0 },
+ /* 2b */ { Cache_UNKNOWN, 0 },
+ /* 2c */ { Cache_L1d, 64 },
+ /* 2d */ { Cache_UNKNOWN, 0 },
+ /* 2e */ { Cache_UNKNOWN, 0 },
+ /* 2f */ { Cache_UNKNOWN, 0 },
+ /* 30 */ { Cache_L1i, 64 },
+ /* 31 */ { Cache_UNKNOWN, 0 },
+ /* 32 */ { Cache_UNKNOWN, 0 },
+ /* 33 */ { Cache_UNKNOWN, 0 },
+ /* 34 */ { Cache_UNKNOWN, 0 },
+ /* 35 */ { Cache_UNKNOWN, 0 },
+ /* 36 */ { Cache_UNKNOWN, 0 },
+ /* 37 */ { Cache_UNKNOWN, 0 },
+ /* 38 */ { Cache_UNKNOWN, 0 },
+ /* 39 */ { Cache_L2, 64 },
+ /* 3a */ { Cache_UNKNOWN, 0 },
+ /* 3b */ { Cache_L2, 64 },
+ /* 3c */ { Cache_L2, 64 },
+ /* 3d */ { Cache_UNKNOWN, 0 },
+ /* 3e */ { Cache_UNKNOWN, 0 },
+ /* 3f */ { Cache_UNKNOWN, 0 },
+ /* 40 */ { Cache_L2, 0 },
+ /* 41 */ { Cache_L2, 32 },
+ /* 42 */ { Cache_L2, 32 },
+ /* 43 */ { Cache_L2, 32 },
+ /* 44 */ { Cache_L2, 32 },
+ /* 45 */ { Cache_L2, 32 },
+ /* 46 */ { Cache_UNKNOWN, 0 },
+ /* 47 */ { Cache_UNKNOWN, 0 },
+ /* 48 */ { Cache_UNKNOWN, 0 },
+ /* 49 */ { Cache_UNKNOWN, 0 },
+ /* 4a */ { Cache_UNKNOWN, 0 },
+ /* 4b */ { Cache_UNKNOWN, 0 },
+ /* 4c */ { Cache_UNKNOWN, 0 },
+ /* 4d */ { Cache_UNKNOWN, 0 },
+ /* 4e */ { Cache_UNKNOWN, 0 },
+ /* 4f */ { Cache_UNKNOWN, 0 },
+ /* 50 */ { Cache_TLBi, 0 },
+ /* 51 */ { Cache_TLBi, 0 },
+ /* 52 */ { Cache_TLBi, 0 },
+ /* 53 */ { Cache_UNKNOWN, 0 },
+ /* 54 */ { Cache_UNKNOWN, 0 },
+ /* 55 */ { Cache_UNKNOWN, 0 },
+ /* 56 */ { Cache_UNKNOWN, 0 },
+ /* 57 */ { Cache_UNKNOWN, 0 },
+ /* 58 */ { Cache_UNKNOWN, 0 },
+ /* 59 */ { Cache_UNKNOWN, 0 },
+ /* 5a */ { Cache_UNKNOWN, 0 },
+ /* 5b */ { Cache_TLBd, 0 },
+ /* 5c */ { Cache_TLBd, 0 },
+ /* 5d */ { Cache_TLBd, 0 },
+ /* 5e */ { Cache_UNKNOWN, 0 },
+ /* 5f */ { Cache_UNKNOWN, 0 },
+ /* 60 */ { Cache_UNKNOWN, 0 },
+ /* 61 */ { Cache_UNKNOWN, 0 },
+ /* 62 */ { Cache_UNKNOWN, 0 },
+ /* 63 */ { Cache_UNKNOWN, 0 },
+ /* 64 */ { Cache_UNKNOWN, 0 },
+ /* 65 */ { Cache_UNKNOWN, 0 },
+ /* 66 */ { Cache_L1d, 64 },
+ /* 67 */ { Cache_L1d, 64 },
+ /* 68 */ { Cache_L1d, 64 },
+ /* 69 */ { Cache_UNKNOWN, 0 },
+ /* 6a */ { Cache_UNKNOWN, 0 },
+ /* 6b */ { Cache_UNKNOWN, 0 },
+ /* 6c */ { Cache_UNKNOWN, 0 },
+ /* 6d */ { Cache_UNKNOWN, 0 },
+ /* 6e */ { Cache_UNKNOWN, 0 },
+ /* 6f */ { Cache_UNKNOWN, 0 },
+ /* 70 */ { Cache_Trace, 1 },
+ /* 71 */ { Cache_Trace, 1 },
+ /* 72 */ { Cache_Trace, 1 },
+ /* 73 */ { Cache_UNKNOWN, 0 },
+ /* 74 */ { Cache_UNKNOWN, 0 },
+ /* 75 */ { Cache_UNKNOWN, 0 },
+ /* 76 */ { Cache_UNKNOWN, 0 },
+ /* 77 */ { Cache_UNKNOWN, 0 },
+ /* 78 */ { Cache_UNKNOWN, 0 },
+ /* 79 */ { Cache_L2, 64 },
+ /* 7a */ { Cache_L2, 64 },
+ /* 7b */ { Cache_L2, 64 },
+ /* 7c */ { Cache_L2, 64 },
+ /* 7d */ { Cache_UNKNOWN, 0 },
+ /* 7e */ { Cache_UNKNOWN, 0 },
+ /* 7f */ { Cache_UNKNOWN, 0 },
+ /* 80 */ { Cache_UNKNOWN, 0 },
+ /* 81 */ { Cache_UNKNOWN, 0 },
+ /* 82 */ { Cache_L2, 32 },
+ /* 83 */ { Cache_L2, 32 },
+ /* 84 */ { Cache_L2, 32 },
+ /* 85 */ { Cache_L2, 32 },
+ /* 86 */ { Cache_L2, 64 },
+ /* 87 */ { Cache_L2, 64 },
+ /* 88 */ { Cache_UNKNOWN, 0 },
+ /* 89 */ { Cache_UNKNOWN, 0 },
+ /* 8a */ { Cache_UNKNOWN, 0 },
+ /* 8b */ { Cache_UNKNOWN, 0 },
+ /* 8c */ { Cache_UNKNOWN, 0 },
+ /* 8d */ { Cache_UNKNOWN, 0 },
+ /* 8e */ { Cache_UNKNOWN, 0 },
+ /* 8f */ { Cache_UNKNOWN, 0 },
+ /* 90 */ { Cache_UNKNOWN, 0 },
+ /* 91 */ { Cache_UNKNOWN, 0 },
+ /* 92 */ { Cache_UNKNOWN, 0 },
+ /* 93 */ { Cache_UNKNOWN, 0 },
+ /* 94 */ { Cache_UNKNOWN, 0 },
+ /* 95 */ { Cache_UNKNOWN, 0 },
+ /* 96 */ { Cache_UNKNOWN, 0 },
+ /* 97 */ { Cache_UNKNOWN, 0 },
+ /* 98 */ { Cache_UNKNOWN, 0 },
+ /* 99 */ { Cache_UNKNOWN, 0 },
+ /* 9a */ { Cache_UNKNOWN, 0 },
+ /* 9b */ { Cache_UNKNOWN, 0 },
+ /* 9c */ { Cache_UNKNOWN, 0 },
+ /* 9d */ { Cache_UNKNOWN, 0 },
+ /* 9e */ { Cache_UNKNOWN, 0 },
+ /* 9f */ { Cache_UNKNOWN, 0 },
+ /* a0 */ { Cache_UNKNOWN, 0 },
+ /* a1 */ { Cache_UNKNOWN, 0 },
+ /* a2 */ { Cache_UNKNOWN, 0 },
+ /* a3 */ { Cache_UNKNOWN, 0 },
+ /* a4 */ { Cache_UNKNOWN, 0 },
+ /* a5 */ { Cache_UNKNOWN, 0 },
+ /* a6 */ { Cache_UNKNOWN, 0 },
+ /* a7 */ { Cache_UNKNOWN, 0 },
+ /* a8 */ { Cache_UNKNOWN, 0 },
+ /* a9 */ { Cache_UNKNOWN, 0 },
+ /* aa */ { Cache_UNKNOWN, 0 },
+ /* ab */ { Cache_UNKNOWN, 0 },
+ /* ac */ { Cache_UNKNOWN, 0 },
+ /* ad */ { Cache_UNKNOWN, 0 },
+ /* ae */ { Cache_UNKNOWN, 0 },
+ /* af */ { Cache_UNKNOWN, 0 },
+ /* b0 */ { Cache_TLBi, 0 },
+ /* b1 */ { Cache_UNKNOWN, 0 },
+ /* b2 */ { Cache_UNKNOWN, 0 },
+ /* b3 */ { Cache_TLBd, 0 },
+ /* b4 */ { Cache_UNKNOWN, 0 },
+ /* b5 */ { Cache_UNKNOWN, 0 },
+ /* b6 */ { Cache_UNKNOWN, 0 },
+ /* b7 */ { Cache_UNKNOWN, 0 },
+ /* b8 */ { Cache_UNKNOWN, 0 },
+ /* b9 */ { Cache_UNKNOWN, 0 },
+ /* ba */ { Cache_UNKNOWN, 0 },
+ /* bb */ { Cache_UNKNOWN, 0 },
+ /* bc */ { Cache_UNKNOWN, 0 },
+ /* bd */ { Cache_UNKNOWN, 0 },
+ /* be */ { Cache_UNKNOWN, 0 },
+ /* bf */ { Cache_UNKNOWN, 0 },
+ /* c0 */ { Cache_UNKNOWN, 0 },
+ /* c1 */ { Cache_UNKNOWN, 0 },
+ /* c2 */ { Cache_UNKNOWN, 0 },
+ /* c3 */ { Cache_UNKNOWN, 0 },
+ /* c4 */ { Cache_UNKNOWN, 0 },
+ /* c5 */ { Cache_UNKNOWN, 0 },
+ /* c6 */ { Cache_UNKNOWN, 0 },
+ /* c7 */ { Cache_UNKNOWN, 0 },
+ /* c8 */ { Cache_UNKNOWN, 0 },
+ /* c9 */ { Cache_UNKNOWN, 0 },
+ /* ca */ { Cache_UNKNOWN, 0 },
+ /* cb */ { Cache_UNKNOWN, 0 },
+ /* cc */ { Cache_UNKNOWN, 0 },
+ /* cd */ { Cache_UNKNOWN, 0 },
+ /* ce */ { Cache_UNKNOWN, 0 },
+ /* cf */ { Cache_UNKNOWN, 0 },
+ /* d0 */ { Cache_UNKNOWN, 0 },
+ /* d1 */ { Cache_UNKNOWN, 0 },
+ /* d2 */ { Cache_UNKNOWN, 0 },
+ /* d3 */ { Cache_UNKNOWN, 0 },
+ /* d4 */ { Cache_UNKNOWN, 0 },
+ /* d5 */ { Cache_UNKNOWN, 0 },
+ /* d6 */ { Cache_UNKNOWN, 0 },
+ /* d7 */ { Cache_UNKNOWN, 0 },
+ /* d8 */ { Cache_UNKNOWN, 0 },
+ /* d9 */ { Cache_UNKNOWN, 0 },
+ /* da */ { Cache_UNKNOWN, 0 },
+ /* db */ { Cache_UNKNOWN, 0 },
+ /* dc */ { Cache_UNKNOWN, 0 },
+ /* dd */ { Cache_UNKNOWN, 0 },
+ /* de */ { Cache_UNKNOWN, 0 },
+ /* df */ { Cache_UNKNOWN, 0 },
+ /* e0 */ { Cache_UNKNOWN, 0 },
+ /* e1 */ { Cache_UNKNOWN, 0 },
+ /* e2 */ { Cache_UNKNOWN, 0 },
+ /* e3 */ { Cache_UNKNOWN, 0 },
+ /* e4 */ { Cache_UNKNOWN, 0 },
+ /* e5 */ { Cache_UNKNOWN, 0 },
+ /* e6 */ { Cache_UNKNOWN, 0 },
+ /* e7 */ { Cache_UNKNOWN, 0 },
+ /* e8 */ { Cache_UNKNOWN, 0 },
+ /* e9 */ { Cache_UNKNOWN, 0 },
+ /* ea */ { Cache_UNKNOWN, 0 },
+ /* eb */ { Cache_UNKNOWN, 0 },
+ /* ec */ { Cache_UNKNOWN, 0 },
+ /* ed */ { Cache_UNKNOWN, 0 },
+ /* ee */ { Cache_UNKNOWN, 0 },
+ /* ef */ { Cache_UNKNOWN, 0 },
+ /* f0 */ { Cache_UNKNOWN, 0 },
+ /* f1 */ { Cache_UNKNOWN, 0 },
+ /* f2 */ { Cache_UNKNOWN, 0 },
+ /* f3 */ { Cache_UNKNOWN, 0 },
+ /* f4 */ { Cache_UNKNOWN, 0 },
+ /* f5 */ { Cache_UNKNOWN, 0 },
+ /* f6 */ { Cache_UNKNOWN, 0 },
+ /* f7 */ { Cache_UNKNOWN, 0 },
+ /* f8 */ { Cache_UNKNOWN, 0 },
+ /* f9 */ { Cache_UNKNOWN, 0 },
+ /* fa */ { Cache_UNKNOWN, 0 },
+ /* fb */ { Cache_UNKNOWN, 0 },
+ /* fc */ { Cache_UNKNOWN, 0 },
+ /* fd */ { Cache_UNKNOWN, 0 },
+ /* fe */ { Cache_UNKNOWN, 0 },
+ /* ff */ { Cache_UNKNOWN, 0 }
+};
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level,
+ unsigned long *lineSize)
+{
+ CacheType type;
+
+ type = CacheMap[val].type;
+ /* only interested in data caches */
+ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+ * this data check has the side effect of rejecting that entry. If
+ * that wasn't the case, we could have to reject it explicitly */
+ if (CacheMap[val].lineSize == 0) {
+ return;
+ }
+ /* look at the caches, skip types we aren't interested in.
+ * if we already have a value for a lower level cache, skip the
+ * current entry */
+ if ((type == Cache_L1) || (type == Cache_L1d)) {
+ *level = 1;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+ *level = 2;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+ *level = 3;
+ *lineSize = CacheMap[val].lineSize;
+ }
+ return;
+}
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val,
+ int *level, unsigned long *lineSize)
+{
+ getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+ int level = 4;
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+ int repeat, count;
+
+ if (cpuidLevel < 2) {
+ return 0;
+ }
+
+ /* command '2' of the cpuid is intel's cache info call. Each byte of the
+ * 4 registers contain a potential descriptor for the cache. The CacheMap
+ * table maps the cache entry with the processor cache. Register 'al'
+ * contains a count value that cpuid '2' needs to be called in order to
+ * find all the cache descriptors. Only registers with the high bit set
+ * to 'zero' have valid descriptors. This code loops through all the
+ * required calls to cpuid '2' and passes any valid descriptors it finds
+ * to the getIntelRegisterCacheLineSize code, which breaks the registers
+ * down into their component descriptors. In the end the lineSize of the
+ * lowest level cache data cache is returned. */
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ repeat = eax & 0xf;
+ for (count = 0; count < repeat; count++) {
+ if ((eax & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+ }
+ if ((ebx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+ }
+ if ((ecx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+ }
+ if ((edx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+ }
+ if (count + 1 != repeat) {
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ }
+ }
+ return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid.
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+
+ /* get the Extended CPUID level */
+ freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+
+ if (cpuidLevel >= 0x80000005) {
+ freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+ }
+ return lineSize;
+}
+
+static const char *const manMap[] = {
+#define INTEL 0
+ "GenuineIntel",
+#define AMD 1
+ "AuthenticAMD",
+#define CYRIX 2
+ "CyrixInstead",
+#define CENTAUR 2
+ "CentaurHauls",
+#define NEXGEN 3
+ "NexGenDriven",
+#define TRANSMETA 4
+ "GenuineTMx86",
+#define RISE 5
+ "RiseRiseRise",
+#define UMC 6
+ "UMC UMC UMC ",
+#define SIS 7
+ "Sis Sis Sis ",
+#define NATIONAL 8
+ "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1 << 26)
+unsigned long
+s_mpi_is_sse2()
+{
+ unsigned long eax, ebx, ecx, edx;
+
+ if (is386() || is486()) {
+ return 0;
+ }
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+
+ /* has no SSE2 extensions */
+ if (eax == 0) {
+ return 0;
+ }
+
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ unsigned long eax, ebx, ecx, edx;
+ PRUint32 cpuid[3];
+ unsigned long cpuidLevel;
+ unsigned long cacheLineSize = 0;
+ int manufacturer = MAN_UNKNOWN;
+ int i;
+ char string[13];
+
+#if !defined(AMD_64)
+ if (is386()) {
+ return 0; /* 386 had no cache */
+ }
+ if (is486()) {
+ return 32; /* really? need more info */
+ }
+#endif
+
+ /* Pentium, cpuid command is available */
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+ /* string holds the CPU's manufacturer ID string - a twelve
+ * character ASCII string stored in ebx, edx, ecx, and
+ * the 32-bit extended feature flags are in edx, ecx.
+ */
+ cpuid[0] = ebx;
+ cpuid[1] = ecx;
+ cpuid[2] = edx;
+ memcpy(string, cpuid, sizeof(cpuid));
+ string[12] = 0;
+
+ manufacturer = MAN_UNKNOWN;
+ for (i = 0; i < n_manufacturers; i++) {
+ if (strcmp(manMap[i], string) == 0) {
+ manufacturer = i;
+ }
+ }
+
+ if (manufacturer == INTEL) {
+ cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+ } else {
+ cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+ }
+ /* doesn't support cache info based on cpuid. This means
+ * an old pentium class processor, which have cache lines of
+ * 32. If we learn differently, we can use a switch based on
+ * the Manufacturer id */
+ if (cacheLineSize == 0) {
+ cacheLineSize = 32;
+ }
+ return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__)
+/*
+ * Sigh, The PPC has some really nice features to help us determine cache
+ * size, since it had lots of direct control functions to do so. The POWER
+ * processor even has an instruction to do this, but it was dropped in
+ * PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ * The dcbz function would be a great way to determine cache line size except
+ * 1) it only works on write-back memory (it throws an exception otherwise),
+ * and 2) because so many mac programs 'knew' the processor cache size was
+ * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ * these programs happy. dcbzl work if 64 bit instructions are supported.
+ * If you know 64 bit instructions are supported, and that stack is
+ * write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void
+dcbzl(char *array)
+{
+ __asm__("dcbzl %0, %1"
+ : /*no result*/
+ : "b%"(array), "r"(0)
+ : "memory");
+}
+
+#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ char testArray[2 * PPC_MAX_LINE_SIZE + 1];
+ char *test;
+ int i;
+
+ /* align the array on a maximum line size boundary, so we
+ * know we are starting to clear from the first address */
+ test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
+ /* set all the values to 1's */
+ memset(test, 0xff, PPC_MAX_LINE_SIZE);
+ /* clear one cache block starting at 'test' */
+ dcbzl(test);
+
+ /* find the size of the cleared area, that's our block size */
+ for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
+ if (test[i - 1] == 0) {
+ return i;
+ }
+ }
+ return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ *
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ return 32;
+}
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
new file mode 100644
index 0000000000..d493b4762f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
@@ -0,0 +1,861 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 80
+manMap:
+ .quad .LC0
+ .quad .LC1
+ .quad .LC2
+ .quad .LC3
+ .quad .LC4
+ .quad .LC5
+ .quad .LC6
+ .quad .LC7
+ .quad .LC8
+ .quad .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 16
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+.LFB2:
+ movq %rdx, %r10
+ pushq %rbx
+.LCFI0:
+ movq %rcx, %r11
+ movq %rdi, %rax
+/APP
+ cpuid
+
+/NO_APP
+ movq %rax, (%rsi)
+ movq %rbx, (%r10)
+ popq %rbx
+ movq %rcx, (%r11)
+ movq %rdx, (%r8)
+ ret
+.LFE2:
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 16
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+.LFB3:
+ leaq CacheMap(%rip), %r9
+ movq %rdx, %r10
+ movzbl 1(%r9,%rdi,2), %ecx
+ movzbl (%r9,%rdi,2), %r8d
+ testb %cl, %cl
+ je .L2
+ cmpl $6, %r8d
+ sete %dl
+ cmpl $8, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L4
+ movl $1, (%rsi)
+.L9:
+ movzbl %cl, %eax
+ movq %rax, (%r10)
+ ret
+ .align 16
+.L4:
+ movl (%rsi), %r11d
+ cmpl $1, %r11d
+ jg .L11
+.L6:
+ cmpl $2, %r11d
+ jle .L2
+ cmpl $12, %r8d
+ sete %dl
+ cmpl $14, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L2
+ movzbq 1(%r9,%rdi,2), %rax
+ movl $3, (%rsi)
+ movq %rax, (%r10)
+ .align 16
+.L2:
+ rep ; ret
+ .align 16
+.L11:
+ cmpl $9, %r8d
+ sete %dl
+ cmpl $11, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L6
+ movl $2, (%rsi)
+ jmp .L9
+.LFE3:
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 16
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+.LFB4:
+ pushq %rbp
+.LCFI1:
+ movq %rsp, %rbp
+.LCFI2:
+ movq %rbx, -24(%rbp)
+.LCFI3:
+ movq %rdi, %rbx
+ shrq $24, %rdi
+ movq %r12, -16(%rbp)
+.LCFI4:
+ movq %r13, -8(%rbp)
+.LCFI5:
+ andl $255, %edi
+ subq $24, %rsp
+.LCFI6:
+ movq %rsi, %r13
+ movq %rdx, %r12
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $16, %rdi
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $8, %rdi
+ andl $255, %ebx
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %r12, %rdx
+ movq %r13, %rsi
+ movq %rbx, %rdi
+ movq 8(%rsp), %r12
+ movq (%rsp), %rbx
+ movq 16(%rsp), %r13
+ leave
+ jmp getIntelCacheEntryLineSize
+.LFE4:
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 16
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+.LFB7:
+ pushq %rbp
+.LCFI7:
+ xorl %edi, %edi
+ movq %rsp, %rbp
+.LCFI8:
+ pushq %r15
+.LCFI9:
+ leaq -136(%rbp), %r8
+ leaq -144(%rbp), %rcx
+ leaq -152(%rbp), %rdx
+ pushq %r14
+.LCFI10:
+ leaq -160(%rbp), %rsi
+ leaq -128(%rbp), %r14
+ pushq %r13
+.LCFI11:
+ leaq manMap(%rip), %r13
+ pushq %r12
+.LCFI12:
+ movl $9, %r12d
+ pushq %rbx
+.LCFI13:
+ xorl %ebx, %ebx
+ subq $200, %rsp
+.LCFI14:
+ call freebl_cpuid
+ movq -152(%rbp), %rax
+ movq -160(%rbp), %r15
+ movb $0, -116(%rbp)
+ movl %eax, -128(%rbp)
+ movq -136(%rbp), %rax
+ movl %eax, -124(%rbp)
+ movq -144(%rbp), %rax
+ movl %eax, -120(%rbp)
+ .align 16
+.L18:
+ movslq %ebx,%rax
+ movq %r14, %rsi
+ movq (%r13,%rax,8), %rdi
+ call strcmp@PLT
+ testl %eax, %eax
+ cmove %ebx, %r12d
+ incl %ebx
+ cmpl $9, %ebx
+ jle .L18
+ testl %r12d, %r12d
+ jne .L19
+ xorl %eax, %eax
+ decl %r15d
+ movl $4, -204(%rbp)
+ movq $0, -200(%rbp)
+ jle .L21
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movq -192(%rbp), %rdi
+ movl %edi, %r12d
+ andl $15, %r12d
+ cmpl %r12d, %ebx
+ jl .L30
+ jmp .L38
+ .align 16
+.L25:
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L40
+.L26:
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L41
+.L27:
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L42
+.L28:
+ incl %ebx
+ cmpl %r12d, %ebx
+ je .L24
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ call freebl_cpuid
+.L24:
+ cmpl %r12d, %ebx
+ jge .L38
+ movq -192(%rbp), %rdi
+.L30:
+ testl $2147483648, %edi
+ jne .L25
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ andl $4294967040, %edi
+ call getIntelRegisterCacheLineSize
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L26
+.L40:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L27
+.L41:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L28
+.L42:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ jmp .L28
+.L38:
+ movq -200(%rbp), %rax
+.L21:
+ movq %rax, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L19:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483648, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movl $2147483652, %eax
+ cmpq %rax, -240(%rbp)
+ ja .L43
+.L32:
+ movq %rbx, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L43:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483653, %edi
+ call freebl_cpuid
+ movzbq -224(%rbp), %rbx
+ jmp .L32
+.LFE7:
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
new file mode 100644
index 0000000000..af17ebcb42
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
@@ -0,0 +1,902 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 40
+manMap:
+ .long .LC0
+ .long .LC1
+ .long .LC2
+ .long .LC3
+ .long .LC4
+ .long .LC5
+ .long .LC6
+ .long .LC7
+ .long .LC8
+ .long .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 4
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ subl $8, %esp
+ movl %edx, %ebp
+/APP
+ pushl %ebx
+ xorl %ecx, %ecx
+ cpuid
+ mov %ebx,%esi
+ popl %ebx
+
+/NO_APP
+ movl %eax, (%ebp)
+ movl 24(%esp), %eax
+ movl %esi, (%eax)
+ movl 28(%esp), %eax
+ movl %ecx, (%eax)
+ movl 32(%esp), %eax
+ movl %edx, (%eax)
+ addl $8, %esp
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 4
+ .type changeFlag, @function
+changeFlag:
+/APP
+ pushfl
+ popl %edx
+ movl %edx,%ecx
+ xorl %eax,%edx
+ pushl %edx
+ popfl
+ pushfl
+ popl %edx
+ pushl %ecx
+ popfl
+
+/NO_APP
+ xorl %ecx, %edx
+ movl %edx, %eax
+ ret
+ .size changeFlag, .-changeFlag
+ .align 4
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ call .L17
+.L17:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx
+ movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx
+ movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al
+ testb %al, %al
+ movl 16(%esp), %edi
+ je .L3
+ cmpl $6, %ecx
+ je .L6
+ cmpl $8, %ecx
+ je .L6
+ movl (%edx), %esi
+ cmpl $1, %esi
+ jg .L15
+.L8:
+ cmpl $2, %esi
+ jle .L3
+ cmpl $12, %ecx
+ je .L12
+ cmpl $14, %ecx
+ je .L12
+ .align 4
+.L3:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L6:
+ movzbl %al, %eax
+ movl $1, (%edx)
+ movl %eax, (%edi)
+.L16:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L15:
+ cmpl $9, %ecx
+ je .L9
+ cmpl $11, %ecx
+ jne .L8
+.L9:
+ movzbl %al, %eax
+ movl $2, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+.L12:
+ movzbl %al, %eax
+ movl $3, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 4
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ movl 8(%ebp), %edi
+ movl %eax, %esi
+ movl %edx, -12(%ebp)
+ shrl $24, %eax
+ pushl %edi
+ call getIntelCacheEntryLineSize
+ movl %esi, %eax
+ pushl %edi
+ shrl $16, %eax
+ movl -12(%ebp), %edx
+ andl $255, %eax
+ call getIntelCacheEntryLineSize
+ pushl %edi
+ movl %esi, %edx
+ movzbl %dh, %eax
+ movl -12(%ebp), %edx
+ call getIntelCacheEntryLineSize
+ andl $255, %esi
+ movl %edi, 8(%ebp)
+ movl -12(%ebp), %edx
+ addl $12, %esp
+ leal -8(%ebp), %esp
+ movl %esi, %eax
+ popl %esi
+ popl %edi
+ leave
+ jmp getIntelCacheEntryLineSize
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 4
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ subl $188, %esp
+ call .L52
+.L52:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx
+ movl $9, -168(%ebp)
+ movl $262144, %eax
+ call changeFlag
+ xorl %edx, %edx
+ testl %eax, %eax
+ jne .L50
+.L19:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+ .align 4
+.L50:
+ movl $2097152, %eax
+ call changeFlag
+ testl %eax, %eax
+ movl $32, %edx
+ je .L19
+ leal -108(%ebp), %eax
+ pushl %eax
+ leal -112(%ebp), %eax
+ pushl %eax
+ leal -116(%ebp), %eax
+ pushl %eax
+ leal -120(%ebp), %edx
+ xorl %eax, %eax
+ call freebl_cpuid
+ movl -120(%ebp), %eax
+ movl %eax, -164(%ebp)
+ movl -116(%ebp), %eax
+ movl %eax, -104(%ebp)
+ movl -108(%ebp), %eax
+ movl %eax, -100(%ebp)
+ movl -112(%ebp), %eax
+ movl %eax, -96(%ebp)
+ movb $0, -92(%ebp)
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -104(%ebp), %edi
+ .align 4
+.L28:
+ subl $8, %esp
+ pushl %edi
+ pushl manMap@GOTOFF(%ebx,%esi,4)
+ call strcmp@PLT
+ addl $16, %esp
+ testl %eax, %eax
+ jne .L26
+ movl %esi, -168(%ebp)
+.L26:
+ incl %esi
+ cmpl $9, %esi
+ jle .L28
+ movl -168(%ebp), %eax
+ testl %eax, %eax
+ jne .L29
+ xorl %eax, %eax
+ cmpl $1, -164(%ebp)
+ movl $4, -144(%ebp)
+ movl $0, -140(%ebp)
+ jle .L41
+ leal -124(%ebp), %edx
+ movl %edx, -188(%ebp)
+ leal -128(%ebp), %eax
+ pushl %edx
+ movl %eax, -184(%ebp)
+ leal -132(%ebp), %edx
+ pushl %eax
+ movl %edx, -180(%ebp)
+ movl $2, %eax
+ pushl %edx
+ leal -136(%ebp), %edx
+ call freebl_cpuid
+ movl -136(%ebp), %eax
+ movl %eax, %edi
+ andl $15, %edi
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -140(%ebp), %edx
+ cmpl %edi, %esi
+ movl %edx, -176(%ebp)
+ jl .L40
+ jmp .L48
+ .align 4
+.L49:
+ movl -136(%ebp), %eax
+.L40:
+ testl %eax, %eax
+ js .L35
+ xorb %al, %al
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L35:
+ movl -132(%ebp), %eax
+ testl %eax, %eax
+ js .L36
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L36:
+ movl -128(%ebp), %eax
+ testl %eax, %eax
+ js .L37
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L37:
+ movl -124(%ebp), %eax
+ testl %eax, %eax
+ js .L38
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L38:
+ incl %esi
+ cmpl %edi, %esi
+ je .L34
+ pushl -188(%ebp)
+ pushl -184(%ebp)
+ pushl -180(%ebp)
+ leal -136(%ebp), %edx
+ movl $2, %eax
+ call freebl_cpuid
+ addl $12, %esp
+.L34:
+ cmpl %edi, %esi
+ jl .L49
+.L48:
+ movl -140(%ebp), %eax
+.L41:
+ testl %eax, %eax
+ jne .L44
+ movb $32, %al
+.L44:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %eax, %edx
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+.L29:
+ leal -148(%ebp), %eax
+ movl %eax, -192(%ebp)
+ movl $0, -172(%ebp)
+ leal -152(%ebp), %edi
+ pushl %eax
+ pushl %edi
+ leal -156(%ebp), %esi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483648, %eax
+ call freebl_cpuid
+ addl $12, %esp
+ cmpl $-2147483644, -160(%ebp)
+ ja .L51
+.L42:
+ movl -172(%ebp), %eax
+ jmp .L41
+.L51:
+ pushl -192(%ebp)
+ pushl %edi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483643, %eax
+ call freebl_cpuid
+ movzbl -152(%ebp), %edx
+ addl $12, %esp
+ movl %edx, -172(%ebp)
+ jmp .L42
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h
new file mode 100644
index 0000000000..0cc868a14b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-config.h
@@ -0,0 +1,56 @@
+/* Default configuration for MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_CONFIG_H_
+#define MPI_CONFIG_H_
+
+/*
+ For boolean options,
+ 0 = no
+ 1 = yes
+
+ Other options are documented individually.
+
+ */
+
+#ifndef MP_IOFUNC
+#define MP_IOFUNC 0 /* include mp_print() ? */
+#endif
+
+#ifndef MP_MODARITH
+#define MP_MODARITH 1 /* include modular arithmetic ? */
+#endif
+
+#ifndef MP_LOGTAB
+#define MP_LOGTAB 1 /* use table of logs instead of log()? */
+#endif
+
+#ifndef MP_ARGCHK
+/*
+ 0 = no parameter checks
+ 1 = runtime checks, continue execution and return an error to caller
+ 2 = assertions; dump core on parameter errors
+ */
+#ifdef DEBUG
+#define MP_ARGCHK 2 /* how to check input arguments */
+#else
+#define MP_ARGCHK 1 /* how to check input arguments */
+#endif
+#endif
+
+#ifndef MP_DEBUG
+#define MP_DEBUG 0 /* print diagnostic output? */
+#endif
+
+#ifndef MP_DEFPREC
+#define MP_DEFPREC 64 /* default precision, in digits */
+#endif
+
+#ifndef MP_SQUARE
+#define MP_SQUARE 1 /* use separate squaring code? */
+#endif
+
+#endif /* ifndef MPI_CONFIG_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h
new file mode 100644
index 0000000000..b4333fb6b4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-priv.h
@@ -0,0 +1,246 @@
+/*
+ * mpi-priv.h - Private header file for MPI
+ * Arbitrary precision integer arithmetic library
+ *
+ * NOTE WELL: the content of this header file is NOT part of the "public"
+ * API for the MPI library, and may change at any time.
+ * Application programs that use libmpi should NOT include this header file.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef _MPI_PRIV_H_
+#define _MPI_PRIV_H_ 1
+
+#include "mpi.h"
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if MP_DEBUG
+#include <stdio.h>
+
+#define DIAG(T, V) \
+ { \
+ fprintf(stderr, T); \
+ mp_print(V, stderr); \
+ fputc('\n', stderr); \
+ }
+#else
+#define DIAG(T, V)
+#endif
+
+/* If we aren't using a wired-in logarithm table, we need to include
+ the math library to get the log() function
+ */
+
+/* {{{ s_logv_2[] - log table for 2 in various bases */
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+
+extern const float s_logv_2[];
+#define LOG_V_2(R) s_logv_2[(R)]
+
+#else
+
+/*
+ If MP_LOGTAB is not defined, use the math library to compute the
+ logarithms on the fly. Otherwise, use the table.
+ Pick which works best for your system.
+ */
+
+#include <math.h>
+#define LOG_V_2(R) (log(2.0) / log(R))
+
+#endif /* if MP_LOGTAB */
+
+/* }}} */
+
+/* {{{ Digit arithmetic macros */
+
+/*
+ When adding and multiplying digits, the results can be larger than
+ can be contained in an mp_digit. Thus, an mp_word is used. These
+ macros mask off the upper and lower digits of the mp_word (the
+ mp_word may be more than 2 mp_digits wide, but we only concern
+ ourselves with the low-order 2 mp_digits)
+ */
+
+#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT)
+#define ACCUM(W) (mp_digit)(W)
+
+#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b))
+#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b))
+
+/* }}} */
+
+/* {{{ Comparison constants */
+
+#define MP_LT -1
+#define MP_EQ 0
+#define MP_GT 1
+
+/* }}} */
+
+/* {{{ private function declarations */
+
+void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */
+void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
+void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */
+void s_mp_free(void *ptr); /* general free function */
+
+mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
+mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */
+
+void s_mp_clamp(mp_int *mp); /* clip leading zeroes */
+
+void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */
+
+mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */
+void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */
+mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
+void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */
+void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */
+void s_mp_div_2(mp_int *mp); /* divide by 2 in place */
+mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */
+mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
+/* normalize for division */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
+mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
+mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
+/* unsigned digit divide */
+mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
+/* Barrett reduction */
+mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
+mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
+mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
+/* a += b * RADIX^offset */
+mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
+#if MP_SQUARE
+mp_err s_mp_sqr(mp_int *a); /* magnitude square */
+#else
+#define s_mp_sqr(a) s_mp_mul(a, a)
+#endif
+mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
+mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */
+int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
+int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */
+int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */
+int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */
+
+int s_mp_tovalue(char ch, int r); /* convert ch to value */
+char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
+int s_mp_outlen(int bits, int r); /* output length in bytes */
+mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */
+mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c);
+mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
+
+#ifdef NSS_USE_COMBA
+PR_STATIC_ASSERT(sizeof(mp_digit) == 8);
+#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1)))
+
+void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C);
+
+void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_16(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+#endif /* end NSS_USE_COMBA */
+
+/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
+#if defined(__OS2__) && defined(__IBMC__)
+#define MPI_ASM_DECL __cdecl
+#else
+#define MPI_ASM_DECL
+#endif
+
+#ifdef MPI_AMD64
+
+mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit);
+mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit);
+
+/* c = a * b */
+#define s_mpv_mul_d(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
+
+/* c += a * b */
+#define s_mpv_mul_d_add(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
+
+#else
+
+void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+
+#endif
+
+void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len);
+void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
+ mp_size a_len,
+ mp_digit *sqrs);
+
+mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
+ mp_digit divisor, mp_digit *quot, mp_digit *rem);
+
+/* c += a * b * (MP_RADIX ** offset); */
+/* Callers of this macro should be aware that the return type might vary;
+ * it should be treated as a void function. */
+#define s_mp_mul_d_add_offset(a, b, c, off) \
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off)
+
+typedef struct {
+ mp_int N; /* modulus N */
+ mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
+} mp_mont_modulus;
+
+mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm);
+mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
+
+/*
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * see mpcpucache.c for the implementation.
+ */
+unsigned long s_mpi_getProcessorLineSize();
+
+/* }}} */
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
new file mode 100644
index 0000000000..7749dc710f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -0,0 +1,5241 @@
+/*
+ * mpi.c
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+#include <assert.h>
+
+#if defined(__arm__) && \
+ ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
+/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
+#undef MP_ASSEMBLY_MULTIPLY
+#undef MP_ASSEMBLY_SQUARE
+#endif
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+#include "logtab.h"
+#endif
+
+#ifdef CT_VERIF
+#include <valgrind/memcheck.h>
+#endif
+
+/* {{{ Constant strings */
+
+/* Constant strings returned by mp_strerror() */
+static const char *mp_err_string[] = {
+ "unknown result code", /* say what? */
+ "boolean true", /* MP_OKAY, MP_YES */
+ "boolean false", /* MP_NO */
+ "out of memory", /* MP_MEM */
+ "argument out of range", /* MP_RANGE */
+ "invalid input parameter", /* MP_BADARG */
+ "result is undefined" /* MP_UNDEF */
+};
+
+/* Value to digit maps for radix conversion */
+
+/* s_dmap_1 - standard digits and letters */
+static const char *s_dmap_1 =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+/* }}} */
+
+/* {{{ Default precision manipulation */
+
+/* Default precision for newly created mp_int's */
+static mp_size s_mp_defprec = MP_DEFPREC;
+
+mp_size
+mp_get_prec(void)
+{
+ return s_mp_defprec;
+
+} /* end mp_get_prec() */
+
+void
+mp_set_prec(mp_size prec)
+{
+ if (prec == 0)
+ s_mp_defprec = MP_DEFPREC;
+ else
+ s_mp_defprec = prec;
+
+} /* end mp_set_prec() */
+
+/* }}} */
+
+#ifdef CT_VERIF
+void
+mp_taint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+
+void
+mp_untaint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+#endif
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_init(mp) */
+
+/*
+ mp_init(mp)
+
+ Initialize a new zero-valued mp_int. Returns MP_OKAY if successful,
+ MP_MEM if memory could not be allocated for the structure.
+ */
+
+mp_err
+mp_init(mp_int *mp)
+{
+ return mp_init_size(mp, s_mp_defprec);
+
+} /* end mp_init() */
+
+/* }}} */
+
+/* {{{ mp_init_size(mp, prec) */
+
+/*
+ mp_init_size(mp, prec)
+
+ Initialize a new zero-valued mp_int with at least the given
+ precision; returns MP_OKAY if successful, or MP_MEM if memory could
+ not be allocated for the structure.
+ */
+
+mp_err
+mp_init_size(mp_int *mp, mp_size prec)
+{
+ ARGCHK(mp != NULL && prec > 0, MP_BADARG);
+
+ prec = MP_ROUNDUP(prec, s_mp_defprec);
+ if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ SIGN(mp) = ZPOS;
+ USED(mp) = 1;
+ ALLOC(mp) = prec;
+
+ return MP_OKAY;
+
+} /* end mp_init_size() */
+
+/* }}} */
+
+/* {{{ mp_init_copy(mp, from) */
+
+/*
+ mp_init_copy(mp, from)
+
+ Initialize mp as an exact copy of from. Returns MP_OKAY if
+ successful, MP_MEM if memory could not be allocated for the new
+ structure.
+ */
+
+mp_err
+mp_init_copy(mp_int *mp, const mp_int *from)
+{
+ ARGCHK(mp != NULL && from != NULL, MP_BADARG);
+
+ if (mp == from)
+ return MP_OKAY;
+
+ if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
+ USED(mp) = USED(from);
+ ALLOC(mp) = ALLOC(from);
+ SIGN(mp) = SIGN(from);
+
+ return MP_OKAY;
+
+} /* end mp_init_copy() */
+
+/* }}} */
+
+/* {{{ mp_copy(from, to) */
+
+/*
+ mp_copy(from, to)
+
+ Copies the mp_int 'from' to the mp_int 'to'. It is presumed that
+ 'to' has already been initialized (if not, use mp_init_copy()
+ instead). If 'from' and 'to' are identical, nothing happens.
+ */
+
+mp_err
+mp_copy(const mp_int *from, mp_int *to)
+{
+ ARGCHK(from != NULL && to != NULL, MP_BADARG);
+
+ if (from == to)
+ return MP_OKAY;
+
+ { /* copy */
+ mp_digit *tmp;
+
+ /*
+ If the allocated buffer in 'to' already has enough space to hold
+ all the used digits of 'from', we'll re-use it to avoid hitting
+ the memory allocater more than necessary; otherwise, we'd have
+ to grow anyway, so we just allocate a hunk and make the copy as
+ usual
+ */
+ if (ALLOC(to) >= USED(from)) {
+ s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
+ s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
+
+ } else {
+ if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), tmp, USED(from));
+
+ if (DIGITS(to) != NULL) {
+ s_mp_setz(DIGITS(to), ALLOC(to));
+ s_mp_free(DIGITS(to));
+ }
+
+ DIGITS(to) = tmp;
+ ALLOC(to) = ALLOC(from);
+ }
+
+ /* Copy the precision and sign from the original */
+ USED(to) = USED(from);
+ SIGN(to) = SIGN(from);
+ } /* end copy */
+
+ return MP_OKAY;
+
+} /* end mp_copy() */
+
+/* }}} */
+
+/* {{{ mp_exch(mp1, mp2) */
+
+/*
+ mp_exch(mp1, mp2)
+
+ Exchange mp1 and mp2 without allocating any intermediate memory
+ (well, unless you count the stack space needed for this call and the
+ locals it creates...). This cannot fail.
+ */
+
+void
+mp_exch(mp_int *mp1, mp_int *mp2)
+{
+#if MP_ARGCHK == 2
+ assert(mp1 != NULL && mp2 != NULL);
+#else
+ if (mp1 == NULL || mp2 == NULL)
+ return;
+#endif
+
+ s_mp_exch(mp1, mp2);
+
+} /* end mp_exch() */
+
+/* }}} */
+
+/* {{{ mp_clear(mp) */
+
+/*
+ mp_clear(mp)
+
+ Release the storage used by an mp_int, and void its fields so that
+ if someone calls mp_clear() again for the same int later, we won't
+ get tollchocked.
+ */
+
+void
+mp_clear(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ if (DIGITS(mp) != NULL) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = NULL;
+ }
+
+ USED(mp) = 0;
+ ALLOC(mp) = 0;
+
+} /* end mp_clear() */
+
+/* }}} */
+
+/* {{{ mp_zero(mp) */
+
+/*
+ mp_zero(mp)
+
+ Set mp to zero. Does not change the allocated size of the structure,
+ and therefore cannot fail (except on a bad argument, which we ignore)
+ */
+void
+mp_zero(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+
+} /* end mp_zero() */
+
+/* }}} */
+
+/* {{{ mp_set(mp, d) */
+
+void
+mp_set(mp_int *mp, mp_digit d)
+{
+ if (mp == NULL)
+ return;
+
+ mp_zero(mp);
+ DIGIT(mp, 0) = d;
+
+} /* end mp_set() */
+
+/* }}} */
+
+/* {{{ mp_set_int(mp, z) */
+
+mp_err
+mp_set_int(mp_int *mp, long z)
+{
+ unsigned long v = labs(z);
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */
+ if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */
+ return res;
+ }
+
+ if (z < 0) {
+ SIGN(mp) = NEG;
+ }
+
+ return MP_OKAY;
+} /* end mp_set_int() */
+
+/* }}} */
+
+/* {{{ mp_set_ulong(mp, z) */
+
+mp_err
+mp_set_ulong(mp_int *mp, unsigned long z)
+{
+ int ix;
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ mp_zero(mp);
+ if (z == 0)
+ return MP_OKAY; /* shortcut for zero */
+
+ if (sizeof z <= sizeof(mp_digit)) {
+ DIGIT(mp, 0) = z;
+ } else {
+ for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+ if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+ return res;
+
+ res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
+ if (res != MP_OKAY)
+ return res;
+ }
+ }
+ return MP_OKAY;
+} /* end mp_set_ulong() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Digit arithmetic */
+
+/* {{{ mp_add_d(a, d, b) */
+
+/*
+ mp_add_d(a, d, b)
+
+ Compute the sum b = a + d, for a single digit d. Respects the sign of
+ its primary addend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == ZPOS) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_add_d() */
+
+/* }}} */
+
+/* {{{ mp_sub_d(a, d, b) */
+
+/*
+ mp_sub_d(a, d, b)
+
+ Compute the difference b = a - d, for a single digit d. Respects the
+ sign of its subtrahend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == NEG) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ SIGN(&tmp) = NEG;
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sub_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_d(a, d, b) */
+
+/*
+ mp_mul_d(a, d, b)
+
+ Compute the product b = a * d, for a single digit d. Respects the sign
+ of its multiplicand (single digits are unsigned anyway)
+ */
+
+mp_err
+mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if (d == 0) {
+ mp_zero(b);
+ return MP_OKAY;
+ }
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ res = s_mp_mul_d(b, d);
+
+ return res;
+
+} /* end mp_mul_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_2(a, c) */
+
+mp_err
+mp_mul_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2(c);
+
+} /* end mp_mul_2() */
+
+/* }}} */
+
+/* {{{ mp_div_d(a, d, q, r) */
+
+/*
+ mp_div_d(a, d, q, r)
+
+ Compute the quotient q = a / d and remainder r = a mod d, for a
+ single digit d. Respects the sign of its divisor (single digits are
+ unsigned anyway).
+ */
+
+mp_err
+mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
+{
+ mp_err res;
+ mp_int qp;
+ mp_digit rem = 0;
+ int pow;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_RANGE;
+
+ /* Shortcut for powers of two ... */
+ if ((pow = s_mp_ispow2d(d)) >= 0) {
+ mp_digit mask;
+
+ mask = ((mp_digit)1 << pow) - 1;
+ rem = DIGIT(a, 0) & mask;
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY) {
+ return res;
+ }
+ s_mp_div_2d(q, pow);
+ }
+
+ if (r)
+ *r = rem;
+
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
+ return res;
+
+ res = s_mp_div_d(&qp, d, &rem);
+
+ if (s_mp_cmp_d(&qp, 0) == 0)
+ SIGN(q) = ZPOS;
+
+ if (r) {
+ *r = rem;
+ }
+
+ if (q)
+ s_mp_exch(&qp, q);
+
+ mp_clear(&qp);
+ return res;
+
+} /* end mp_div_d() */
+
+/* }}} */
+
+/* {{{ mp_div_2(a, c) */
+
+/*
+ mp_div_2(a, c)
+
+ Compute c = a / 2, disregarding the remainder.
+ */
+
+mp_err
+mp_div_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2(c);
+
+ return MP_OKAY;
+
+} /* end mp_div_2() */
+
+/* }}} */
+
+/* {{{ mp_expt_d(a, d, b) */
+
+mp_err
+mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ DIGIT(&s, 0) = 1;
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Full arithmetic */
+
+/* {{{ mp_abs(a, b) */
+
+/*
+ mp_abs(a, b)
+
+ Compute b = |a|. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_abs(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ SIGN(b) = ZPOS;
+
+ return MP_OKAY;
+
+} /* end mp_abs() */
+
+/* }}} */
+
+/* {{{ mp_neg(a, b) */
+
+/*
+ mp_neg(a, b)
+
+ Compute b = -a. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_neg(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ if (s_mp_cmp_d(b, 0) == MP_EQ)
+ SIGN(b) = ZPOS;
+ else
+ SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
+
+ return MP_OKAY;
+
+} /* end mp_neg() */
+
+/* }}} */
+
+/* {{{ mp_add(a, b, c) */
+
+/*
+ mp_add(a, b, c)
+
+ Compute c = a + b. All parameters may be identical.
+ */
+
+mp_err
+mp_add(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else { /* different sign: |a| < |b| */
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_add() */
+
+/* }}} */
+
+/* {{{ mp_sub(a, b, c) */
+
+/*
+ mp_sub(a, b, c)
+
+ Compute c = a - b. All parameters may be identical.
+ */
+
+mp_err
+mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+ int magDiff;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == b) {
+ mp_zero(c);
+ return MP_OKAY;
+ }
+
+ if (MP_SIGN(a) != MP_SIGN(b)) {
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (!(magDiff = s_mp_cmp(a, b))) {
+ mp_zero(c);
+ res = MP_OKAY;
+ } else if (magDiff > 0) {
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else {
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ MP_SIGN(c) = !MP_SIGN(a);
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ MP_SIGN(c) = MP_ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_sub() */
+
+/* }}} */
+
+/* {{{ s_mp_mulg(a, b, c) */
+
+/*
+ s_mp_mulg(a, b, c)
+
+ Compute c = a * b. All parameters may be identical. if constantTime is set,
+ then the operations are done in constant time. The original is mostly
+ constant time as long as s_mpv_mul_d_add() is constant time. This is true
+ of the x86 assembler, as well as the current c code.
+ */
+mp_err
+s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime)
+{
+ mp_digit *pb;
+ mp_int tmp;
+ mp_err res;
+ mp_size ib;
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
+ return res;
+ b = &tmp;
+ } else {
+ MP_DIGITS(&tmp) = 0;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
+ goto CLEANUP;
+
+#ifdef NSS_USE_COMBA
+ /* comba isn't constant time because it clamps! If we cared
+ * (we needed a constant time version of multiply that was 'faster'
+ * we could easily pass constantTime down to the comba code and
+ * get it to skip the clamp... but here are assembler versions
+ * which add comba to platforms that can't compile the normal
+ * comba's imbedded assembler which would also need to change, so
+ * for now we just skip comba when we are running constant time. */
+ if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+ if (MP_USED(a) == 4) {
+ s_mp_mul_comba_4(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_mul_comba_8(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_mul_comba_16(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_mul_comba_32(a, b, c);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ useda = MP_USED(a);
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (constantTime || b_i)
+ s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + useda) = b_i;
+ }
+
+ if (!constantTime) {
+ s_mp_clamp(c);
+ }
+
+ if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+ else
+ SIGN(c) = NEG;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+} /* end smp_mulg() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+ mp_mul(a, b, c)
+
+ Compute c = a * b. All parameters may be identical.
+ */
+
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ return s_mp_mulg(a, b, c, 0);
+} /* end mp_mul() */
+
+/* }}} */
+
+/* {{{ mp_mulCT(a, b, c) */
+
+/*
+ mp_mulCT(a, b, c)
+
+ Compute c = a * b. In constant time. Parameters may not be identical.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize)
+{
+ mp_err res;
+
+ /* make the multiply values fixed length so multiply
+ * doesn't leak the length. at this point all the
+ * values are blinded, but once we finish we want the
+ * output size to be hidden (so no clamping the out put) */
+ MP_CHECKOK(s_mp_pad(a, setSize));
+ MP_CHECKOK(s_mp_pad(b, setSize));
+ MP_CHECKOK(s_mp_pad(c, 2 * setSize));
+ MP_CHECKOK(s_mp_mulg(a, b, c, 1));
+CLEANUP:
+ return res;
+} /* end mp_mulCT() */
+
+/* }}} */
+
+/* {{{ mp_sqr(a, sqr) */
+
+#if MP_SQUARE
+/*
+ Computes the square of a. This can be done more
+ efficiently than a general multiplication, because many of the
+ computation steps are redundant when squaring. The inner product
+ step is a bit more complicated, but we save a fair number of
+ iterations of the multiplication loop.
+ */
+
+/* sqr = a^2; Caller provides both a and tmp; */
+mp_err
+mp_sqr(const mp_int *a, mp_int *sqr)
+{
+ mp_digit *pa;
+ mp_digit d;
+ mp_err res;
+ mp_size ix;
+ mp_int tmp;
+ int count;
+
+ ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
+
+ if (a == sqr) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ a = &tmp;
+ } else {
+ DIGITS(&tmp) = 0;
+ res = MP_OKAY;
+ }
+
+ ix = 2 * MP_USED(a);
+ if (ix > MP_ALLOC(sqr)) {
+ MP_USED(sqr) = 1;
+ MP_CHECKOK(s_mp_grow(sqr, ix));
+ }
+ MP_USED(sqr) = ix;
+ MP_DIGIT(sqr, 0) = 0;
+
+#ifdef NSS_USE_COMBA
+ if (IS_POWER_OF_2(MP_USED(a))) {
+ if (MP_USED(a) == 4) {
+ s_mp_sqr_comba_4(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_sqr_comba_8(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_sqr_comba_16(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_sqr_comba_32(a, sqr);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pa = MP_DIGITS(a);
+ count = MP_USED(a) - 1;
+ if (count > 0) {
+ d = *pa++;
+ s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
+ for (ix = 3; --count > 0; ix += 2) {
+ d = *pa++;
+ s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
+ } /* for(ix ...) */
+ MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
+
+ /* now sqr *= 2 */
+ s_mp_mul_2(sqr);
+ } else {
+ MP_DIGIT(sqr, 1) = 0;
+ }
+
+ /* now add the squares of the digits of a to sqr. */
+ s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
+
+ SIGN(sqr) = ZPOS;
+ s_mp_clamp(sqr);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sqr() */
+#endif
+
+/* }}} */
+
+/* {{{ mp_div(a, b, q, r) */
+
+/*
+ mp_div(a, b, q, r)
+
+ Compute q = a / b and r = a mod b. Input parameters may be re-used
+ as output parameters. If q or r is NULL, that portion of the
+ computation will be discarded (although it will still be computed)
+ */
+mp_err
+mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
+{
+ mp_err res;
+ mp_int *pQ, *pR;
+ mp_int qtmp, rtmp, btmp;
+ int cmp;
+ mp_sign signA;
+ mp_sign signB;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ signA = MP_SIGN(a);
+ signB = MP_SIGN(b);
+
+ if (mp_cmp_z(b) == MP_EQ)
+ return MP_RANGE;
+
+ DIGITS(&qtmp) = 0;
+ DIGITS(&rtmp) = 0;
+ DIGITS(&btmp) = 0;
+
+ /* Set up some temporaries... */
+ if (!r || r == a || r == b) {
+ MP_CHECKOK(mp_init_copy(&rtmp, a));
+ pR = &rtmp;
+ } else {
+ MP_CHECKOK(mp_copy(a, r));
+ pR = r;
+ }
+
+ if (!q || q == a || q == b) {
+ MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
+ pQ = &qtmp;
+ } else {
+ MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
+ pQ = q;
+ mp_zero(pQ);
+ }
+
+ /*
+ If |a| <= |b|, we can compute the solution without division;
+ otherwise, we actually do the work required.
+ */
+ if ((cmp = s_mp_cmp(a, b)) <= 0) {
+ if (cmp) {
+ /* r was set to a above. */
+ mp_zero(pQ);
+ } else {
+ mp_set(pQ, 1);
+ mp_zero(pR);
+ }
+ } else {
+ MP_CHECKOK(mp_init_copy(&btmp, b));
+ MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
+ }
+
+ /* Compute the signs for the output */
+ MP_SIGN(pR) = signA; /* Sr = Sa */
+ /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
+ MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
+
+ if (s_mp_cmp_d(pQ, 0) == MP_EQ)
+ SIGN(pQ) = ZPOS;
+ if (s_mp_cmp_d(pR, 0) == MP_EQ)
+ SIGN(pR) = ZPOS;
+
+ /* Copy output, if it is needed */
+ if (q && q != pQ)
+ s_mp_exch(pQ, q);
+
+ if (r && r != pR)
+ s_mp_exch(pR, r);
+
+CLEANUP:
+ mp_clear(&btmp);
+ mp_clear(&rtmp);
+ mp_clear(&qtmp);
+
+ return res;
+
+} /* end mp_div() */
+
+/* }}} */
+
+/* {{{ mp_div_2d(a, d, q, r) */
+
+mp_err
+mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY)
+ return res;
+ }
+ if (r) {
+ if ((res = mp_copy(a, r)) != MP_OKAY)
+ return res;
+ }
+ if (q) {
+ s_mp_div_2d(q, d);
+ }
+ if (r) {
+ s_mp_mod_2d(r, d);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_div_2d() */
+
+/* }}} */
+
+/* {{{ mp_expt(a, b, c) */
+
+/*
+ mp_expt(a, b, c)
+
+ Compute c = a ** b, that is, raise a to the b power. Uses a
+ standard iterative square-and-multiply technique.
+ */
+
+mp_err
+mp_expt(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+
+ mp_set(&s, 1);
+
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ /* Loop over low-order digits in ascending order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over bits of each non-maximal digit */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Consider now the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ if (mp_iseven(b))
+ SIGN(&s) = SIGN(a);
+
+ res = mp_copy(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt() */
+
+/* }}} */
+
+/* {{{ mp_2expt(a, k) */
+
+/* Compute a = 2^k */
+
+mp_err
+mp_2expt(mp_int *a, mp_digit k)
+{
+ ARGCHK(a != NULL, MP_BADARG);
+
+ return s_mp_2expt(a, k);
+
+} /* end mp_2expt() */
+
+/* }}} */
+
+/* {{{ mp_mod(a, m, c) */
+
+/*
+ mp_mod(a, m, c)
+
+ Compute c = a (mod m). Result will always be 0 <= c < m.
+ */
+
+mp_err
+mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ int mag;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(m) == NEG)
+ return MP_RANGE;
+
+ /*
+ If |a| > m, we need to divide to get the remainder and take the
+ absolute value.
+
+ If |a| < m, we don't need to do any division, just copy and adjust
+ the sign (if a is negative).
+
+ If |a| == m, we can simply set the result to zero.
+
+ This order is intended to minimize the average path length of the
+ comparison chain on common workloads -- the most frequent cases are
+ that |a| != m, so we do those first.
+ */
+ if ((mag = s_mp_cmp(a, m)) > 0) {
+ if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
+ return res;
+
+ if (SIGN(c) == NEG) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else if (mag < 0) {
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ if (mp_cmp_z(a) < 0) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else {
+ mp_zero(c);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_mod() */
+
+/* }}} */
+
+/* {{{ s_mp_subCT_d(a, b, borrow, c) */
+
+/*
+ s_mp_subCT_d(a, b, borrow, c)
+
+ Compute c = (a -b) - subtract in constant time. returns borrow
+ */
+mp_digit
+s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret)
+{
+ *ret = a - b - borrow;
+ return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow);
+} /* s_mp_subCT_d() */
+
+/* }}} */
+
+/* {{{ mp_subCT(a, b, ret, borrow) */
+
+/* return ret= a - b and borrow in borrow. done in constant time.
+ * b could be modified.
+ */
+mp_err
+mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow)
+{
+ mp_size used_a = MP_USED(a);
+ mp_size i;
+ mp_err res;
+
+ MP_CHECKOK(s_mp_pad(b, used_a));
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ *borrow = 0;
+ for (i = 0; i < used_a; i++) {
+ *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow,
+ &MP_DIGIT(ret, i));
+ }
+
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_subCT() */
+
+/* }}} */
+
+/* {{{ mp_selectCT(cond, a, b, ret) */
+
+/*
+ * return ret= cond ? a : b; cond should be either 0 or 1
+ */
+mp_err
+mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret)
+{
+ mp_size used_a = MP_USED(a);
+ mp_err res;
+ mp_size i;
+
+ cond *= MP_DIGIT_MAX;
+
+ /* we currently require these to be equal on input,
+ * we could use pad to extend one of them, but that might
+ * leak data as it wouldn't be constant time */
+ if (used_a != MP_USED(b)) {
+ return MP_BADARG;
+ }
+
+ MP_CHECKOK(s_mp_pad(ret, used_a));
+ for (i = 0; i < used_a; i++) {
+ MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i));
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+} /* end mp_selectCT() */
+
+/* {{{ mp_reduceCT(a, m, c) */
+
+/*
+ mp_reduceCT(a, m, c)
+
+ Compute c = aR^-1 (mod m) in constant time.
+ input should be in montgomery form. If input is the
+ result of a montgomery multiply then out put will be
+ in mongomery form.
+ Result will be reduced to MP_USED(m), but not be
+ clamped.
+ */
+
+mp_err
+mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c)
+{
+ mp_size used_m = MP_USED(m);
+ mp_size used_c = used_m * 2 + 1;
+ mp_digit *m_digits, *c_digits;
+ mp_size i;
+ mp_digit borrow, carry;
+ mp_err res;
+ mp_int sub;
+
+ MP_DIGITS(&sub) = 0;
+ MP_CHECKOK(mp_init_size(&sub, used_m));
+
+ if (a != c) {
+ MP_CHECKOK(mp_copy(a, c));
+ }
+ MP_CHECKOK(s_mp_pad(c, used_c));
+ m_digits = MP_DIGITS(m);
+ c_digits = MP_DIGITS(c);
+ for (i = 0; i < used_m; i++) {
+ mp_digit m_i = MP_DIGIT(c, i) * n0i;
+ s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--);
+ }
+ s_mp_rshd(c, used_m);
+ /* MP_USED(c) should be used_m+1 with the high word being any carry
+ * from the previous multiply, save that carry and drop the high
+ * word for the substraction below */
+ carry = MP_DIGIT(c, used_m);
+ MP_DIGIT(c, used_m) = 0;
+ MP_USED(c) = used_m;
+ /* mp_subCT wants c and m to be the same size, we've already
+ * guarrenteed that in the previous statement, so mp_subCT won't actually
+ * modify m, so it's safe to recast */
+ MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow));
+
+ /* we return c-m if c >= m no borrow or there was a borrow and a carry */
+ MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c));
+ res = MP_OKAY;
+CLEANUP:
+ mp_clear(&sub);
+ return res;
+} /* end mp_reduceCT() */
+
+/* }}} */
+
+/* {{{ mp_mod_d(a, d, c) */
+
+/*
+ mp_mod_d(a, d, c)
+
+ Compute c = a (mod d). Result will always be 0 <= c < d
+ */
+mp_err
+mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if (s_mp_cmp_d(a, d) > 0) {
+ if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
+ return res;
+
+ } else {
+ if (SIGN(a) == NEG)
+ rem = d - DIGIT(a, 0);
+ else
+ rem = DIGIT(a, 0);
+ }
+
+ if (c)
+ *c = rem;
+
+ return MP_OKAY;
+
+} /* end mp_mod_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Modular arithmetic */
+
+#if MP_MODARITH
+/* {{{ mp_addmod(a, b, m, c) */
+
+/*
+ mp_addmod(a, b, m, c)
+
+ Compute c = (a + b) mod m
+ */
+
+mp_err
+mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_add(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_submod(a, b, m, c) */
+
+/*
+ mp_submod(a, b, m, c)
+
+ Compute c = (a - b) mod m
+ */
+
+mp_err
+mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sub(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmod(a, b, m, c) */
+
+/*
+ mp_mulmod(a, b, m, c)
+
+ Compute c = (a * b) mod m
+ */
+
+mp_err
+mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mul(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmontmodCT(a, b, m, c) */
+
+/*
+ mp_mulmontmodCT(a, b, m, c)
+
+ Compute c = (a * b) mod m in constant time wrt a and b. either a or b
+ should be in montgomery form and the output is native. If both a and b
+ are in montgomery form, then the output will also be in montgomery form
+ and can be recovered with an mp_reduceCT call.
+ NOTE: a and b may be modified.
+ */
+
+mp_err
+mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i,
+ mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY)
+ return res;
+
+ if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_sqrmod(a, m, c) */
+
+#if MP_SQUARE
+mp_err
+mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sqr(a, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+
+} /* end mp_sqrmod() */
+#endif
+
+/* }}} */
+
+/* {{{ s_mp_exptmod(a, b, m, c) */
+
+/*
+ s_mp_exptmod(a, b, m, c)
+
+ Compute c = (a ** b) mod m. Uses a standard square-and-multiply
+ method with modular reductions at each step. (This is basically the
+ same code as mp_expt(), except for the addition of the reductions)
+
+ The modular reductions are done using Barrett's algorithm (see
+ s_mp_reduce() below for details)
+ */
+
+mp_err
+s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_int s, x, mu;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto X;
+ if ((res = mp_init(&mu)) != MP_OKAY)
+ goto MU;
+
+ mp_set(&s, 1);
+
+ /* mu = b^2k / m */
+ if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* Loop over digits of b in ascending order, except highest order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over the bits of the lower-order digits */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Now do the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&mu);
+MU:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end s_mp_exptmod() */
+
+/* }}} */
+
+/* {{{ mp_exptmod_d(a, d, m, c) */
+
+mp_err
+mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ mp_set(&s, 1);
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
+ (res = mp_mod(&s, m, &s)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_exptmod_d() */
+
+/* }}} */
+#endif /* if MP_MODARITH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Comparison functions */
+
+/* {{{ mp_cmp_z(a) */
+
+/*
+ mp_cmp_z(a)
+
+ Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0.
+ */
+
+int
+mp_cmp_z(const mp_int *a)
+{
+ ARGMPCHK(a != NULL);
+
+ if (SIGN(a) == NEG)
+ return MP_LT;
+ else if (USED(a) == 1 && DIGIT(a, 0) == 0)
+ return MP_EQ;
+ else
+ return MP_GT;
+
+} /* end mp_cmp_z() */
+
+/* }}} */
+
+/* {{{ mp_cmp_d(a, d) */
+
+/*
+ mp_cmp_d(a, d)
+
+ Compare a <=> d. Returns <0 if a<d, 0 if a=d, >0 if a>d
+ */
+
+int
+mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ ARGCHK(a != NULL, MP_EQ);
+
+ if (SIGN(a) == NEG)
+ return MP_LT;
+
+ return s_mp_cmp_d(a, d);
+
+} /* end mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ mp_cmp(a, b) */
+
+int
+mp_cmp(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ if (SIGN(a) == SIGN(b)) {
+ int mag;
+
+ if ((mag = s_mp_cmp(a, b)) == MP_EQ)
+ return MP_EQ;
+
+ if (SIGN(a) == ZPOS)
+ return mag;
+ else
+ return -mag;
+
+ } else if (SIGN(a) == ZPOS) {
+ return MP_GT;
+ } else {
+ return MP_LT;
+ }
+
+} /* end mp_cmp() */
+
+/* }}} */
+
+/* {{{ mp_cmp_mag(a, b) */
+
+/*
+ mp_cmp_mag(a, b)
+
+ Compares |a| <=> |b|, and returns an appropriate comparison result
+ */
+
+int
+mp_cmp_mag(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ return s_mp_cmp(a, b);
+
+} /* end mp_cmp_mag() */
+
+/* }}} */
+
+/* {{{ mp_isodd(a) */
+
+/*
+ mp_isodd(a)
+
+ Returns a true (non-zero) value if a is odd, false (zero) otherwise.
+ */
+int
+mp_isodd(const mp_int *a)
+{
+ ARGMPCHK(a != NULL);
+
+ return (int)(DIGIT(a, 0) & 1);
+
+} /* end mp_isodd() */
+
+/* }}} */
+
+/* {{{ mp_iseven(a) */
+
+int
+mp_iseven(const mp_int *a)
+{
+ return !mp_isodd(a);
+
+} /* end mp_iseven() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Number theoretic functions */
+
+/* {{{ mp_gcd(a, b, c) */
+
+/*
+ Computes the GCD using the constant-time algorithm
+ by Bernstein and Yang (https://eprint.iacr.org/2019/266)
+ "Fast constant-time gcd computation and modular inversion"
+ */
+mp_err
+mp_gcd(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_err res;
+ mp_digit cond = 0, mask = 0;
+ mp_int g, temp, f;
+ int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1;
+ mp_size top, flen, glen;
+ mp_int *clear[3];
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+ /*
+ Early exit if either of the inputs is zero.
+ Caller is responsible for the proper handling of inputs.
+ */
+ if (mp_cmp_z(a) == MP_EQ) {
+ res = mp_copy(b, c);
+ SIGN(c) = ZPOS;
+ return res;
+ } else if (mp_cmp_z(b) == MP_EQ) {
+ res = mp_copy(a, c);
+ SIGN(c) = ZPOS;
+ return res;
+ }
+
+ MP_CHECKOK(mp_init(&temp));
+ clear[++last] = &temp;
+ MP_CHECKOK(mp_init_copy(&g, a));
+ clear[++last] = &g;
+ MP_CHECKOK(mp_init_copy(&f, b));
+ clear[++last] = &f;
+
+ /*
+ For even case compute the number of
+ shared powers of 2 in f and g.
+ */
+ for (i = 0; i < USED(&f) && i < USED(&g); i++) {
+ mask = ~(DIGIT(&f, i) | DIGIT(&g, i));
+ for (j = 0; j < MP_DIGIT_BIT; j++) {
+ bit &= mask;
+ shifts += bit;
+ mask >>= 1;
+ }
+ }
+ /* Reduce to the odd case by removing the powers of 2. */
+ s_mp_div_2d(&f, shifts);
+ s_mp_div_2d(&g, shifts);
+
+ /* Allocate to the size of largest mp_int. */
+ top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
+ MP_CHECKOK(s_mp_grow(&f, top));
+ MP_CHECKOK(s_mp_grow(&g, top));
+ MP_CHECKOK(s_mp_grow(&temp, top));
+
+ /* Make sure f contains the odd value. */
+ MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top));
+
+ /* Upper bound for the total iterations. */
+ flen = mpl_significant_bits(&f);
+ glen = mpl_significant_bits(&g);
+ m = 4 + 3 * ((flen >= glen) ? flen : glen);
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+
+ for (i = 0; i < m; i++) {
+ /* Step 1: conditional swap. */
+ /* Set cond if delta > 0 and g is odd. */
+ cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
+ /* If cond is set replace (delta,f) with (-delta,-f). */
+ delta = (-cond & -delta) | ((cond - 1) & delta);
+ SIGN(&f) ^= cond;
+ /* If cond is set swap f with g. */
+ MP_CHECKOK(mp_cswap(cond, &f, &g, top));
+
+ /* Step 2: elemination. */
+ /* Update delta. */
+ delta++;
+ /* If g is odd, right shift (g+f) else right shift g. */
+ MP_CHECKOK(mp_add(&g, &f, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
+ s_mp_div_2(&g);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* GCD is in f, take the absolute value. */
+ SIGN(&f) = ZPOS;
+
+ /* Add back the removed powers of 2. */
+ MP_CHECKOK(s_mp_mul_2d(&f, shifts));
+
+ MP_CHECKOK(mp_copy(&f, c));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clear[last--]);
+ return res;
+} /* end mp_gcd() */
+
+/* }}} */
+
+/* {{{ mp_lcm(a, b, c) */
+
+/* We compute the least common multiple using the rule:
+
+ ab = [a, b](a, b)
+
+ ... by computing the product, and dividing out the gcd.
+ */
+
+mp_err
+mp_lcm(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int gcd, prod;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ /* Set up temporaries */
+ if ((res = mp_init(&gcd)) != MP_OKAY)
+ return res;
+ if ((res = mp_init(&prod)) != MP_OKAY)
+ goto GCD;
+
+ if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
+ goto CLEANUP;
+
+ res = mp_div(&prod, &gcd, c, NULL);
+
+CLEANUP:
+ mp_clear(&prod);
+GCD:
+ mp_clear(&gcd);
+
+ return res;
+
+} /* end mp_lcm() */
+
+/* }}} */
+
+/* {{{ mp_xgcd(a, b, g, x, y) */
+
+/*
+ mp_xgcd(a, b, g, x, y)
+
+ Compute g = (a, b) and values x and y satisfying Bezout's identity
+ (that is, ax + by = g). This uses the binary extended GCD algorithm
+ based on the Stein algorithm used for mp_gcd()
+ See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
+ */
+
+mp_err
+mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
+{
+ mp_int gx, xc, yc, u, v, A, B, C, D;
+ mp_int *clean[9];
+ mp_err res;
+ int last = -1;
+
+ if (mp_cmp_z(b) == 0)
+ return MP_RANGE;
+
+ /* Initialize all these variables we need */
+ MP_CHECKOK(mp_init(&u));
+ clean[++last] = &u;
+ MP_CHECKOK(mp_init(&v));
+ clean[++last] = &v;
+ MP_CHECKOK(mp_init(&gx));
+ clean[++last] = &gx;
+ MP_CHECKOK(mp_init(&A));
+ clean[++last] = &A;
+ MP_CHECKOK(mp_init(&B));
+ clean[++last] = &B;
+ MP_CHECKOK(mp_init(&C));
+ clean[++last] = &C;
+ MP_CHECKOK(mp_init(&D));
+ clean[++last] = &D;
+ MP_CHECKOK(mp_init_copy(&xc, a));
+ clean[++last] = &xc;
+ mp_abs(&xc, &xc);
+ MP_CHECKOK(mp_init_copy(&yc, b));
+ clean[++last] = &yc;
+ mp_abs(&yc, &yc);
+
+ mp_set(&gx, 1);
+
+ /* Divide by two until at least one of them is odd */
+ while (mp_iseven(&xc) && mp_iseven(&yc)) {
+ mp_size nx = mp_trailing_zeros(&xc);
+ mp_size ny = mp_trailing_zeros(&yc);
+ mp_size n = MP_MIN(nx, ny);
+ s_mp_div_2d(&xc, n);
+ s_mp_div_2d(&yc, n);
+ MP_CHECKOK(s_mp_mul_2d(&gx, n));
+ }
+
+ MP_CHECKOK(mp_copy(&xc, &u));
+ MP_CHECKOK(mp_copy(&yc, &v));
+ mp_set(&A, 1);
+ mp_set(&D, 1);
+
+ /* Loop through binary GCD algorithm */
+ do {
+ while (mp_iseven(&u)) {
+ s_mp_div_2(&u);
+
+ if (mp_iseven(&A) && mp_iseven(&B)) {
+ s_mp_div_2(&A);
+ s_mp_div_2(&B);
+ } else {
+ MP_CHECKOK(mp_add(&A, &yc, &A));
+ s_mp_div_2(&A);
+ MP_CHECKOK(mp_sub(&B, &xc, &B));
+ s_mp_div_2(&B);
+ }
+ }
+
+ while (mp_iseven(&v)) {
+ s_mp_div_2(&v);
+
+ if (mp_iseven(&C) && mp_iseven(&D)) {
+ s_mp_div_2(&C);
+ s_mp_div_2(&D);
+ } else {
+ MP_CHECKOK(mp_add(&C, &yc, &C));
+ s_mp_div_2(&C);
+ MP_CHECKOK(mp_sub(&D, &xc, &D));
+ s_mp_div_2(&D);
+ }
+ }
+
+ if (mp_cmp(&u, &v) >= 0) {
+ MP_CHECKOK(mp_sub(&u, &v, &u));
+ MP_CHECKOK(mp_sub(&A, &C, &A));
+ MP_CHECKOK(mp_sub(&B, &D, &B));
+ } else {
+ MP_CHECKOK(mp_sub(&v, &u, &v));
+ MP_CHECKOK(mp_sub(&C, &A, &C));
+ MP_CHECKOK(mp_sub(&D, &B, &D));
+ }
+ } while (mp_cmp_z(&u) != 0);
+
+ /* copy results to output */
+ if (x)
+ MP_CHECKOK(mp_copy(&C, x));
+
+ if (y)
+ MP_CHECKOK(mp_copy(&D, y));
+
+ if (g)
+ MP_CHECKOK(mp_mul(&gx, &v, g));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clean[last--]);
+
+ return res;
+
+} /* end mp_xgcd() */
+
+/* }}} */
+
+mp_size
+mp_trailing_zeros(const mp_int *mp)
+{
+ mp_digit d;
+ mp_size n = 0;
+ unsigned int ix;
+
+ if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
+ return n;
+
+ for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
+ n += MP_DIGIT_BIT;
+ if (!d)
+ return 0; /* shouldn't happen, but ... */
+#if !defined(MP_USE_UINT_DIGIT)
+ if (!(d & 0xffffffffU)) {
+ d >>= 32;
+ n += 32;
+ }
+#endif
+ if (!(d & 0xffffU)) {
+ d >>= 16;
+ n += 16;
+ }
+ if (!(d & 0xffU)) {
+ d >>= 8;
+ n += 8;
+ }
+ if (!(d & 0xfU)) {
+ d >>= 4;
+ n += 4;
+ }
+ if (!(d & 0x3U)) {
+ d >>= 2;
+ n += 2;
+ }
+ if (!(d & 0x1U)) {
+ d >>= 1;
+ n += 1;
+ }
+#if MP_ARGCHK == 2
+ assert(0 != (d & 1));
+#endif
+ return n;
+}
+
+/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
+** Returns k (positive) or error (negative).
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
+{
+ mp_err res;
+ mp_err k = 0;
+ mp_int d, f, g;
+
+ ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG);
+
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&f) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_CHECKOK(mp_init(&d));
+ MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
+ MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
+
+ mp_set(c, 1);
+ mp_zero(&d);
+
+ if (mp_cmp_z(&f) == 0) {
+ res = MP_UNDEF;
+ } else
+ for (;;) {
+ int diff_sign;
+ while (mp_iseven(&f)) {
+ mp_size n = mp_trailing_zeros(&f);
+ if (!n) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ s_mp_div_2d(&f, n);
+ MP_CHECKOK(s_mp_mul_2d(&d, n));
+ k += n;
+ }
+ if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
+ res = k;
+ break;
+ }
+ diff_sign = mp_cmp(&f, &g);
+ if (diff_sign < 0) { /* f < g */
+ s_mp_exch(&f, &g);
+ s_mp_exch(c, &d);
+ } else if (diff_sign == 0) { /* f == g */
+ res = MP_UNDEF; /* a and p are not relatively prime */
+ break;
+ }
+ if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
+ MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
+ MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */
+ } else {
+ MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
+ MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */
+ }
+ }
+ if (res >= 0) {
+ if (mp_cmp_mag(c, p) >= 0) {
+ MP_CHECKOK(mp_div(c, p, NULL, c));
+ }
+ if (MP_SIGN(c) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(c, p, c));
+ }
+ res = k;
+ }
+
+CLEANUP:
+ mp_clear(&d);
+ mp_clear(&f);
+ mp_clear(&g);
+ return res;
+}
+
+/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_digit
+s_mp_invmod_radix(mp_digit P)
+{
+ mp_digit T = P;
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#if !defined(MP_USE_UINT_DIGIT)
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#endif
+ return T;
+}
+
+/* Given c, k, and prime p, where a*c == 2**k (mod p),
+** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
+{
+ int k_orig = k;
+ mp_digit r;
+ mp_size ix;
+ mp_err res;
+
+ if (mp_cmp_z(c) < 0) { /* c < 0 */
+ MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
+ } else {
+ MP_CHECKOK(mp_copy(c, x)); /* x = c */
+ }
+
+ /* make sure x is large enough */
+ ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
+ ix = MP_MAX(ix, MP_USED(x));
+ MP_CHECKOK(s_mp_pad(x, ix));
+
+ r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
+
+ for (ix = 0; k > 0; ix++) {
+ int j = MP_MIN(k, MP_DIGIT_BIT);
+ mp_digit v = r * MP_DIGIT(x, ix);
+ if (j < MP_DIGIT_BIT) {
+ v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
+ }
+ s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
+ k -= j;
+ }
+ s_mp_clamp(x);
+ s_mp_div_2d(x, k_orig);
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+
+/*
+ Computes the modular inverse using the constant-time algorithm
+ by Bernstein and Yang (https://eprint.iacr.org/2019/266)
+ "Fast constant-time gcd computation and modular inversion"
+ */
+mp_err
+s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ mp_digit cond = 0;
+ mp_int g, f, v, r, temp;
+ int i, its, delta = 1, last = -1;
+ mp_size top, flen, glen;
+ mp_int *clear[6];
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+ /* Check for invalid inputs. */
+ if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT)
+ return MP_RANGE;
+
+ if (a == m || mp_iseven(m))
+ return MP_UNDEF;
+
+ MP_CHECKOK(mp_init(&temp));
+ clear[++last] = &temp;
+ MP_CHECKOK(mp_init(&v));
+ clear[++last] = &v;
+ MP_CHECKOK(mp_init(&r));
+ clear[++last] = &r;
+ MP_CHECKOK(mp_init_copy(&g, a));
+ clear[++last] = &g;
+ MP_CHECKOK(mp_init_copy(&f, m));
+ clear[++last] = &f;
+
+ mp_set(&v, 0);
+ mp_set(&r, 1);
+
+ /* Allocate to the size of largest mp_int. */
+ top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g));
+ MP_CHECKOK(s_mp_grow(&f, top));
+ MP_CHECKOK(s_mp_grow(&g, top));
+ MP_CHECKOK(s_mp_grow(&temp, top));
+ MP_CHECKOK(s_mp_grow(&v, top));
+ MP_CHECKOK(s_mp_grow(&r, top));
+
+ /* Upper bound for the total iterations. */
+ flen = mpl_significant_bits(&f);
+ glen = mpl_significant_bits(&g);
+ its = 4 + 3 * ((flen >= glen) ? flen : glen);
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+
+ for (i = 0; i < its; i++) {
+ /* Step 1: conditional swap. */
+ /* Set cond if delta > 0 and g is odd. */
+ cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1;
+ /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */
+ delta = (-cond & -delta) | ((cond - 1) & delta);
+ SIGN(&f) ^= cond;
+ SIGN(&v) ^= cond;
+ /* If cond is set swap (f,v) with (g,r). */
+ MP_CHECKOK(mp_cswap(cond, &f, &g, top));
+ MP_CHECKOK(mp_cswap(cond, &v, &r, top));
+
+ /* Step 2: elemination. */
+ /* Update delta */
+ delta++;
+ /* If g is odd replace r with (r+v). */
+ MP_CHECKOK(mp_add(&r, &v, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top));
+ /* If g is odd, right shift (g+f) else right shift g. */
+ MP_CHECKOK(mp_add(&g, &f, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top));
+ s_mp_div_2(&g);
+ /*
+ If r is even, right shift it.
+ If r is odd, right shift (r+m) which is even because m is odd.
+ We want the result modulo m so adding in multiples of m here vanish.
+ */
+ MP_CHECKOK(mp_add(&r, m, &temp));
+ MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top));
+ s_mp_div_2(&r);
+ }
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ /* We have the inverse in v, propagate sign from f. */
+ SIGN(&v) ^= SIGN(&f);
+ /* GCD is in f, take the absolute value. */
+ SIGN(&f) = ZPOS;
+
+ /* If gcd != 1, not invertible. */
+ if (mp_cmp_d(&f, 1) != MP_EQ) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ /* Return inverse modulo m. */
+ MP_CHECKOK(mp_mod(&v, m, c));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clear[last--]);
+ return res;
+}
+
+/* Known good algorithm for computing modular inverse. But slow. */
+mp_err
+mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_int g, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_CHECKOK(mp_init(&x));
+ MP_CHECKOK(mp_init(&g));
+
+ MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
+
+ if (mp_cmp_d(&g, 1) != MP_EQ) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ res = mp_mod(&x, m, c);
+ SIGN(c) = SIGN(a);
+
+CLEANUP:
+ mp_clear(&x);
+ mp_clear(&g);
+
+ return res;
+}
+
+/* modular inverse where modulus is 2**k. */
+/* c = a**-1 mod 2**k */
+mp_err
+s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
+{
+ mp_err res;
+ mp_size ix = k + 4;
+ mp_int t0, t1, val, tmp, two2k;
+
+ static const mp_digit d2 = 2;
+ static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
+
+ if (mp_iseven(a))
+ return MP_UNDEF;
+
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit
+#endif
+ if (k <= MP_DIGIT_BIT) {
+ mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
+ /* propagate the sign from mp_int */
+ i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a);
+ if (k < MP_DIGIT_BIT)
+ i &= ((mp_digit)1 << k) - (mp_digit)1;
+ mp_set(c, i);
+ return MP_OKAY;
+ }
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
+ MP_DIGITS(&t0) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&val) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&two2k) = 0;
+ MP_CHECKOK(mp_init_copy(&val, a));
+ s_mp_mod_2d(&val, k);
+ MP_CHECKOK(mp_init_copy(&t0, &val));
+ MP_CHECKOK(mp_init_copy(&t1, &t0));
+ MP_CHECKOK(mp_init(&tmp));
+ MP_CHECKOK(mp_init(&two2k));
+ MP_CHECKOK(s_mp_2expt(&two2k, k));
+ do {
+ MP_CHECKOK(mp_mul(&val, &t1, &tmp));
+ MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
+ MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
+ s_mp_mod_2d(&t1, k);
+ while (MP_SIGN(&t1) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&t1, &two2k, &t1));
+ }
+ if (mp_cmp(&t1, &t0) == MP_EQ)
+ break;
+ MP_CHECKOK(mp_copy(&t1, &t0));
+ } while (--ix > 0);
+ if (!ix) {
+ res = MP_UNDEF;
+ } else {
+ mp_exch(c, &t1);
+ }
+
+CLEANUP:
+ mp_clear(&t0);
+ mp_clear(&t1);
+ mp_clear(&val);
+ mp_clear(&tmp);
+ mp_clear(&two2k);
+ return res;
+}
+
+mp_err
+s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ mp_size k;
+ mp_int oddFactor, evenFactor; /* factors of the modulus */
+ mp_int oddPart, evenPart; /* parts to combine via CRT. */
+ mp_int C2, tmp1, tmp2;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ /*static const mp_digit d1 = 1; */
+ /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
+
+ if ((res = s_mp_ispow2(m)) >= 0) {
+ k = res;
+ return s_mp_invmod_2d(a, k, c);
+ }
+ MP_DIGITS(&oddFactor) = 0;
+ MP_DIGITS(&evenFactor) = 0;
+ MP_DIGITS(&oddPart) = 0;
+ MP_DIGITS(&evenPart) = 0;
+ MP_DIGITS(&C2) = 0;
+ MP_DIGITS(&tmp1) = 0;
+ MP_DIGITS(&tmp2) = 0;
+
+ MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
+ MP_CHECKOK(mp_init(&evenFactor));
+ MP_CHECKOK(mp_init(&oddPart));
+ MP_CHECKOK(mp_init(&evenPart));
+ MP_CHECKOK(mp_init(&C2));
+ MP_CHECKOK(mp_init(&tmp1));
+ MP_CHECKOK(mp_init(&tmp2));
+
+ k = mp_trailing_zeros(m);
+ s_mp_div_2d(&oddFactor, k);
+ MP_CHECKOK(s_mp_2expt(&evenFactor, k));
+
+ /* compute a**-1 mod oddFactor. */
+ MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
+ /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
+ MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
+
+ /* Use Chinese Remainer theorem to compute a**-1 mod m. */
+ /* let m1 = oddFactor, v1 = oddPart,
+ * let m2 = evenFactor, v2 = evenPart.
+ */
+
+ /* Compute C2 = m1**-1 mod m2. */
+ MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
+
+ /* compute u = (v2 - v1)*C2 mod m2 */
+ MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
+ MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
+ s_mp_mod_2d(&tmp2, k);
+ while (MP_SIGN(&tmp2) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
+ }
+
+ /* compute answer = v1 + u*m1 */
+ MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
+ MP_CHECKOK(mp_add(&oddPart, c, c));
+ /* not sure this is necessary, but it's low cost if not. */
+ MP_CHECKOK(mp_mod(c, m, c));
+
+CLEANUP:
+ mp_clear(&oddFactor);
+ mp_clear(&evenFactor);
+ mp_clear(&oddPart);
+ mp_clear(&evenPart);
+ mp_clear(&C2);
+ mp_clear(&tmp1);
+ mp_clear(&tmp2);
+ return res;
+}
+
+/* {{{ mp_invmod(a, m, c) */
+
+/*
+ mp_invmod(a, m, c)
+
+ Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
+ This is equivalent to the question of whether (a, m) = 1. If not,
+ MP_UNDEF is returned, and there is no inverse.
+ */
+
+mp_err
+mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ if (mp_isodd(m)) {
+ return s_mp_invmod_odd_m(a, m, c);
+ }
+ if (mp_iseven(a))
+ return MP_UNDEF; /* not invertable */
+
+ return s_mp_invmod_even_m(a, m, c);
+
+} /* end mp_invmod() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_print(mp, ofp) */
+
+#if MP_IOFUNC
+/*
+ mp_print(mp, ofp)
+
+ Print a textual representation of the given mp_int on the output
+ stream 'ofp'. Output is generated using the internal radix.
+ */
+
+void
+mp_print(mp_int *mp, FILE *ofp)
+{
+ int ix;
+
+ if (mp == NULL || ofp == NULL)
+ return;
+
+ fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
+
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
+ }
+
+} /* end mp_print() */
+
+#endif /* if MP_IOFUNC */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ More I/O Functions */
+
+/* {{{ mp_read_raw(mp, str, len) */
+
+/*
+ mp_read_raw(mp, str, len)
+
+ Read in a raw value (base 256) into the given mp_int
+ */
+
+mp_err
+mp_read_raw(mp_int *mp, char *str, int len)
+{
+ int ix;
+ mp_err res;
+ unsigned char *ustr = (unsigned char *)str;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Read the rest of the digits */
+ for (ix = 1; ix < len; ix++) {
+ if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
+ return res;
+ if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
+ return res;
+ }
+
+ /* Get sign from first byte */
+ if (ustr[0])
+ SIGN(mp) = NEG;
+ else
+ SIGN(mp) = ZPOS;
+
+ return MP_OKAY;
+
+} /* end mp_read_raw() */
+
+/* }}} */
+
+/* {{{ mp_raw_size(mp) */
+
+int
+mp_raw_size(mp_int *mp)
+{
+ ARGCHK(mp != NULL, 0);
+
+ return (USED(mp) * sizeof(mp_digit)) + 1;
+
+} /* end mp_raw_size() */
+
+/* }}} */
+
+/* {{{ mp_toraw(mp, str) */
+
+mp_err
+mp_toraw(mp_int *mp, char *str)
+{
+ int ix, jx, pos = 1;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+
+ str[0] = (char)SIGN(mp);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ str[pos++] = (char)(d >> (jx * CHAR_BIT));
+ }
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toraw() */
+
+/* }}} */
+
+/* {{{ mp_read_radix(mp, str, radix) */
+
+/*
+ mp_read_radix(mp, str, radix)
+
+ Read an integer from the given string, and set mp to the resulting
+ value. The input is presumed to be in base 10. Leading non-digit
+ characters are ignored, and the function reads until a non-digit
+ character or the end of the string.
+ */
+
+mp_err
+mp_read_radix(mp_int *mp, const char *str, int radix)
+{
+ int ix = 0, val = 0;
+ mp_err res;
+ mp_sign sig = ZPOS;
+
+ ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
+ MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while (str[ix] &&
+ (s_mp_tovalue(str[ix], radix) < 0) &&
+ str[ix] != '-' &&
+ str[ix] != '+') {
+ ++ix;
+ }
+
+ if (str[ix] == '-') {
+ sig = NEG;
+ ++ix;
+ } else if (str[ix] == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++ix;
+ }
+
+ while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
+ if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
+ return res;
+ if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
+ return res;
+ ++ix;
+ }
+
+ if (s_mp_cmp_d(mp, 0) == MP_EQ)
+ SIGN(mp) = ZPOS;
+ else
+ SIGN(mp) = sig;
+
+ return MP_OKAY;
+
+} /* end mp_read_radix() */
+
+mp_err
+mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
+{
+ int radix = default_radix;
+ int cx;
+ mp_sign sig = ZPOS;
+ mp_err res;
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while ((cx = *str) != 0 &&
+ (s_mp_tovalue(cx, radix) < 0) &&
+ cx != '-' &&
+ cx != '+') {
+ ++str;
+ }
+
+ if (cx == '-') {
+ sig = NEG;
+ ++str;
+ } else if (cx == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++str;
+ }
+
+ if (str[0] == '0') {
+ if ((str[1] | 0x20) == 'x') {
+ radix = 16;
+ str += 2;
+ } else {
+ radix = 8;
+ str++;
+ }
+ }
+ res = mp_read_radix(a, str, radix);
+ if (res == MP_OKAY) {
+ MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
+ }
+ return res;
+}
+
+/* }}} */
+
+/* {{{ mp_radix_size(mp, radix) */
+
+int
+mp_radix_size(mp_int *mp, int radix)
+{
+ int bits;
+
+ if (!mp || radix < 2 || radix > MAX_RADIX)
+ return 0;
+
+ bits = USED(mp) * DIGIT_BIT - 1;
+
+ return SIGN(mp) + s_mp_outlen(bits, radix);
+
+} /* end mp_radix_size() */
+
+/* }}} */
+
+/* {{{ mp_toradix(mp, str, radix) */
+
+mp_err
+mp_toradix(mp_int *mp, char *str, int radix)
+{
+ int ix, pos = 0;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+ ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
+
+ if (mp_cmp_z(mp) == MP_EQ) {
+ str[0] = '0';
+ str[1] = '\0';
+ } else {
+ mp_err res;
+ mp_int tmp;
+ mp_sign sgn;
+ mp_digit rem, rdx = (mp_digit)radix;
+ char ch;
+
+ if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
+ return res;
+
+ /* Save sign for later, and take absolute value */
+ sgn = SIGN(&tmp);
+ SIGN(&tmp) = ZPOS;
+
+ /* Generate output digits in reverse order */
+ while (mp_cmp_z(&tmp) != 0) {
+ if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
+ mp_clear(&tmp);
+ return res;
+ }
+
+ /* Generate digits, use capital letters */
+ ch = s_mp_todigit(rem, radix, 0);
+
+ str[pos++] = ch;
+ }
+
+ /* Add - sign if original value was negative */
+ if (sgn == NEG)
+ str[pos++] = '-';
+
+ /* Add trailing NUL to end the string */
+ str[pos--] = '\0';
+
+ /* Reverse the digits and sign indicator */
+ ix = 0;
+ while (ix < pos) {
+ char tmpc = str[ix];
+
+ str[ix] = str[pos];
+ str[pos] = tmpc;
+ ++ix;
+ --pos;
+ }
+
+ mp_clear(&tmp);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toradix() */
+
+/* }}} */
+
+/* {{{ mp_tovalue(ch, r) */
+
+int
+mp_tovalue(char ch, int r)
+{
+ return s_mp_tovalue(ch, r);
+
+} /* end mp_tovalue() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_strerror(ec) */
+
+/*
+ mp_strerror(ec)
+
+ Return a string describing the meaning of error code 'ec'. The
+ string returned is allocated in static memory, so the caller should
+ not attempt to modify or free the memory associated with this
+ string.
+ */
+const char *
+mp_strerror(mp_err ec)
+{
+ int aec = (ec < 0) ? -ec : ec;
+
+ /* Code values are negative, so the senses of these comparisons
+ are accurate */
+ if (ec < MP_LAST_CODE || ec > MP_OKAY) {
+ return mp_err_string[0]; /* unknown error code */
+ } else {
+ return mp_err_string[aec + 1];
+ }
+
+} /* end mp_strerror() */
+
+/* }}} */
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static function definitions (internal use only) */
+
+/* {{{ Memory management */
+
+/* {{{ s_mp_grow(mp, min) */
+
+/* Make sure there are at least 'min' digits allocated to mp */
+mp_err
+s_mp_grow(mp_int *mp, mp_size min)
+{
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (min > ALLOC(mp)) {
+ mp_digit *tmp;
+
+ /* Set min to next nearest default precision block size */
+ min = MP_ROUNDUP(min, s_mp_defprec);
+
+ if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(mp), tmp, USED(mp));
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = tmp;
+ ALLOC(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_grow() */
+
+/* }}} */
+
+/* {{{ s_mp_pad(mp, min) */
+
+/* Make sure the used size of mp is at least 'min', growing if needed */
+mp_err
+s_mp_pad(mp_int *mp, mp_size min)
+{
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (min > USED(mp)) {
+ mp_err res;
+
+ /* Make sure there is room to increase precision */
+ if (min > ALLOC(mp)) {
+ if ((res = s_mp_grow(mp, min)) != MP_OKAY)
+ return res;
+ } else {
+ s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
+ }
+
+ /* Increase precision; should already be 0-filled */
+ USED(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_pad() */
+
+/* }}} */
+
+/* {{{ s_mp_setz(dp, count) */
+
+/* Set 'count' digits pointed to by dp to be zeroes */
+void
+s_mp_setz(mp_digit *dp, mp_size count)
+{
+ memset(dp, 0, count * sizeof(mp_digit));
+} /* end s_mp_setz() */
+
+/* }}} */
+
+/* {{{ s_mp_copy(sp, dp, count) */
+
+/* Copy 'count' digits from sp to dp */
+void
+s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
+{
+ memcpy(dp, sp, count * sizeof(mp_digit));
+} /* end s_mp_copy() */
+
+/* }}} */
+
+/* {{{ s_mp_alloc(nb, ni) */
+
+/* Allocate ni records of nb bytes each, and return a pointer to that */
+void *
+s_mp_alloc(size_t nb, size_t ni)
+{
+ return calloc(nb, ni);
+
+} /* end s_mp_alloc() */
+
+/* }}} */
+
+/* {{{ s_mp_free(ptr) */
+
+/* Free the memory pointed to by ptr */
+void
+s_mp_free(void *ptr)
+{
+ if (ptr) {
+ free(ptr);
+ }
+} /* end s_mp_free() */
+
+/* }}} */
+
+/* {{{ s_mp_clamp(mp) */
+
+/* Remove leading zeroes from the given value */
+void
+s_mp_clamp(mp_int *mp)
+{
+ mp_size used = MP_USED(mp);
+ while (used > 1 && DIGIT(mp, used - 1) == 0)
+ --used;
+ MP_USED(mp) = used;
+ if (used == 1 && DIGIT(mp, 0) == 0)
+ MP_SIGN(mp) = ZPOS;
+} /* end s_mp_clamp() */
+
+/* }}} */
+
+/* {{{ s_mp_exch(a, b) */
+
+/* Exchange the data for a and b; (b, a) = (a, b) */
+void
+s_mp_exch(mp_int *a, mp_int *b)
+{
+ mp_int tmp;
+ if (!a || !b) {
+ return;
+ }
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+
+} /* end s_mp_exch() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Arithmetic helpers */
+
+/* {{{ s_mp_lshd(mp, p) */
+
+/*
+ Shift mp leftward by p digits, growing if needed, and zero-filling
+ the in-shifted digits at the right end. This is a convenient
+ alternative to multiplication by powers of the radix
+ */
+
+mp_err
+s_mp_lshd(mp_int *mp, mp_size p)
+{
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ if (p == 0)
+ return MP_OKAY;
+
+ if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
+ return MP_OKAY;
+
+ if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
+ return res;
+
+ /* Shift all the significant figures over as needed */
+ for (ix = USED(mp) - p; ix-- > 0;) {
+ DIGIT(mp, ix + p) = DIGIT(mp, ix);
+ }
+
+ /* Fill the bottom digits with zeroes */
+ for (ix = 0; (mp_size)ix < p; ix++)
+ DIGIT(mp, ix) = 0;
+
+ return MP_OKAY;
+
+} /* end s_mp_lshd() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2d(mp, d) */
+
+/*
+ Multiply the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value.
+ */
+mp_err
+s_mp_mul_2d(mp_int *mp, mp_digit d)
+{
+ mp_err res;
+ mp_digit dshift, rshift, mask, x, prev = 0;
+ mp_digit *pa = NULL;
+ int i;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ dshift = d / MP_DIGIT_BIT;
+ d %= MP_DIGIT_BIT;
+ /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */
+ /* mod and corresponding mask logic avoid that when d = 0 */
+ rshift = MP_DIGIT_BIT - d;
+ rshift %= MP_DIGIT_BIT;
+ /* mask = (2**d - 1) * 2**(w-d) mod 2**w */
+ mask = (DIGIT_MAX << rshift) + 1;
+ mask &= DIGIT_MAX - 1;
+ /* bits to be shifted out of the top word */
+ x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask;
+
+ if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0))))
+ return res;
+
+ if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
+ return res;
+
+ pa = MP_DIGITS(mp) + dshift;
+
+ for (i = MP_USED(mp) - dshift; i > 0; i--) {
+ x = *pa;
+ *pa++ = (x << d) | prev;
+ prev = (x & mask) >> rshift;
+ }
+
+ s_mp_clamp(mp);
+ return MP_OKAY;
+} /* end s_mp_mul_2d() */
+
+/* {{{ s_mp_rshd(mp, p) */
+
+/*
+ Shift mp rightward by p digits. Maintains the invariant that
+ digits above the precision are all zero. Digits shifted off the
+ end are lost. Cannot fail.
+ */
+
+void
+s_mp_rshd(mp_int *mp, mp_size p)
+{
+ mp_size ix;
+ mp_digit *src, *dst;
+
+ if (p == 0)
+ return;
+
+ /* Shortcut when all digits are to be shifted off */
+ if (p >= USED(mp)) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+ return;
+ }
+
+ /* Shift all the significant figures over as needed */
+ dst = MP_DIGITS(mp);
+ src = dst + p;
+ for (ix = USED(mp) - p; ix > 0; ix--)
+ *dst++ = *src++;
+
+ MP_USED(mp) -= p;
+ /* Fill the top digits with zeroes */
+ while (p-- > 0)
+ *dst++ = 0;
+
+} /* end s_mp_rshd() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2(mp) */
+
+/* Divide by two -- take advantage of radix properties to do it fast */
+void
+s_mp_div_2(mp_int *mp)
+{
+ s_mp_div_2d(mp, 1);
+
+} /* end s_mp_div_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2(mp) */
+
+mp_err
+s_mp_mul_2(mp_int *mp)
+{
+ mp_digit *pd;
+ unsigned int ix, used;
+ mp_digit kin = 0;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ /* Shift digits leftward by 1 bit */
+ used = MP_USED(mp);
+ pd = MP_DIGITS(mp);
+ for (ix = 0; ix < used; ix++) {
+ mp_digit d = *pd;
+ *pd++ = (d << 1) | kin;
+ kin = (d >> (DIGIT_BIT - 1));
+ }
+
+ /* Deal with rollover from last digit */
+ if (kin) {
+ if (ix >= ALLOC(mp)) {
+ mp_err res;
+ if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
+ return res;
+ }
+
+ DIGIT(mp, ix) = kin;
+ USED(mp) += 1;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_mul_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mod_2d(mp, d) */
+
+/*
+ Remainder the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise AND of the value, and does not require the full
+ division code
+ */
+void
+s_mp_mod_2d(mp_int *mp, mp_digit d)
+{
+ mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
+ mp_size ix;
+ mp_digit dmask;
+
+ if (ndig >= USED(mp))
+ return;
+
+ /* Flush all the bits above 2^d in its digit */
+ dmask = ((mp_digit)1 << nbit) - 1;
+ DIGIT(mp, ndig) &= dmask;
+
+ /* Flush all digits above the one with 2^d in it */
+ for (ix = ndig + 1; ix < USED(mp); ix++)
+ DIGIT(mp, ix) = 0;
+
+ s_mp_clamp(mp);
+
+} /* end s_mp_mod_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2d(mp, d) */
+
+/*
+ Divide the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value, and does not require the
+ full division code (used in Barrett reduction, see below)
+ */
+void
+s_mp_div_2d(mp_int *mp, mp_digit d)
+{
+ int ix;
+ mp_digit save, next, mask, lshift;
+
+ s_mp_rshd(mp, d / DIGIT_BIT);
+ d %= DIGIT_BIT;
+ /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */
+ /* mod and corresponding mask logic avoid that when d = 0 */
+ lshift = DIGIT_BIT - d;
+ lshift %= DIGIT_BIT;
+ mask = ((mp_digit)1 << d) - 1;
+ save = 0;
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ next = DIGIT(mp, ix) & mask;
+ DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d);
+ save = next;
+ }
+ s_mp_clamp(mp);
+
+} /* end s_mp_div_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_norm(a, b, *d) */
+
+/*
+ s_mp_norm(a, b, *d)
+
+ Normalize a and b for division, where b is the divisor. In order
+ that we might make good guesses for quotient digits, we want the
+ leading digit of b to be at least half the radix, which we
+ accomplish by multiplying a and b by a power of 2. The exponent
+ (shift count) is placed in *pd, so that the remainder can be shifted
+ back at the end of the division process.
+ */
+
+mp_err
+s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
+{
+ mp_digit d;
+ mp_digit mask;
+ mp_digit b_msd;
+ mp_err res = MP_OKAY;
+
+ ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG);
+
+ d = 0;
+ mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
+ b_msd = DIGIT(b, USED(b) - 1);
+ while (!(b_msd & mask)) {
+ b_msd <<= 1;
+ ++d;
+ }
+
+ if (d) {
+ MP_CHECKOK(s_mp_mul_2d(a, d));
+ MP_CHECKOK(s_mp_mul_2d(b, d));
+ }
+
+ *pd = d;
+CLEANUP:
+ return res;
+
+} /* end s_mp_norm() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive digit arithmetic */
+
+/* {{{ s_mp_add_d(mp, d) */
+
+/* Add d to |mp| in place */
+mp_err
+s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+ mp_size ix = 1;
+
+ w = (mp_word)DIGIT(mp, 0) + d;
+ DIGIT(mp, 0) = ACCUM(w);
+ k = CARRYOUT(w);
+
+ while (ix < USED(mp) && k) {
+ w = (mp_word)DIGIT(mp, ix) + k;
+ DIGIT(mp, ix) = ACCUM(w);
+ k = CARRYOUT(w);
+ ++ix;
+ }
+
+ if (k != 0) {
+ mp_err res;
+
+ if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(mp, ix) = (mp_digit)k;
+ }
+
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit sum, mp_i, carry = 0;
+ mp_err res = MP_OKAY;
+ int used = (int)MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = sum = d + mp_i;
+ carry = (sum < d);
+ while (carry && --used > 0) {
+ mp_i = *pmp;
+ *pmp++ = sum = carry + mp_i;
+ carry = !sum;
+ }
+ if (carry && !used) {
+ /* mp is growing */
+ used = MP_USED(mp);
+ MP_CHECKOK(s_mp_pad(mp, used + 1));
+ MP_DIGIT(mp, used) = carry;
+ }
+CLEANUP:
+ return res;
+#endif
+} /* end s_mp_add_d() */
+
+/* }}} */
+
+/* {{{ s_mp_sub_d(mp, d) */
+
+/* Subtract d from |mp| in place, assumes |mp| > d */
+mp_err
+s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_word w, b = 0;
+ mp_size ix = 1;
+
+ /* Compute initial subtraction */
+ w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, 0) = ACCUM(w);
+
+ /* Propagate borrows leftward */
+ while (b && ix < USED(mp)) {
+ w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, ix) = ACCUM(w);
+ ++ix;
+ }
+
+ /* Remove leading zeroes */
+ s_mp_clamp(mp);
+
+ /* If we have a borrow out, it's a violation of the input invariant */
+ if (b)
+ return MP_RANGE;
+ else
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit mp_i, diff, borrow;
+ mp_size used = MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - d;
+ borrow = (diff > mp_i);
+ while (borrow && --used) {
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - borrow;
+ borrow = (diff > mp_i);
+ }
+ s_mp_clamp(mp);
+ return (borrow && !used) ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub_d() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_d(a, d) */
+
+/* Compute a = a * d, single digit multiplication */
+mp_err
+s_mp_mul_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_size used;
+ int pow;
+
+ if (!d) {
+ mp_zero(a);
+ return MP_OKAY;
+ }
+ if (d == 1)
+ return MP_OKAY;
+ if (0 <= (pow = s_mp_ispow2d(d))) {
+ return s_mp_mul_2d(a, (mp_digit)pow);
+ }
+
+ used = MP_USED(a);
+ MP_CHECKOK(s_mp_pad(a, used + 1));
+
+ s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
+
+ s_mp_clamp(a);
+
+CLEANUP:
+ return res;
+
+} /* end s_mp_mul_d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_d(mp, d, r) */
+
+/*
+ s_mp_div_d(mp, d, r)
+
+ Compute the quotient mp = mp / d and remainder r = mp mod d, for a
+ single digit d. If r is null, the remainder will be discarded.
+ */
+
+mp_err
+s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ mp_word w = 0, q;
+#else
+ mp_digit w = 0, q;
+#endif
+ int ix;
+ mp_err res;
+ mp_int quot;
+ mp_int rem;
+
+ if (d == 0)
+ return MP_RANGE;
+ if (d == 1) {
+ if (r)
+ *r = 0;
+ return MP_OKAY;
+ }
+ /* could check for power of 2 here, but mp_div_d does that. */
+ if (MP_USED(mp) == 1) {
+ mp_digit n = MP_DIGIT(mp, 0);
+ mp_digit remdig;
+
+ q = n / d;
+ remdig = n % d;
+ MP_DIGIT(mp, 0) = q;
+ if (r) {
+ *r = remdig;
+ }
+ return MP_OKAY;
+ }
+
+ MP_DIGITS(&rem) = 0;
+ MP_DIGITS(&quot) = 0;
+ /* Make room for the quotient */
+ MP_CHECKOK(mp_init_size(&quot, USED(mp)));
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ w = (w << DIGIT_BIT) | DIGIT(mp, ix);
+
+ if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ s_mp_lshd(&quot, 1);
+ DIGIT(&quot, 0) = (mp_digit)q;
+ }
+#else
+ {
+ mp_digit p;
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ mp_digit norm;
+#endif
+
+ MP_CHECKOK(mp_init_copy(&rem, mp));
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ MP_DIGIT(&quot, 0) = d;
+ MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
+ if (norm)
+ d <<= norm;
+ MP_DIGIT(&quot, 0) = 0;
+#endif
+
+ p = 0;
+ for (ix = USED(&rem) - 1; ix >= 0; ix--) {
+ w = DIGIT(&rem, ix);
+
+ if (p) {
+ MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
+ } else if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ MP_CHECKOK(s_mp_lshd(&quot, 1));
+ DIGIT(&quot, 0) = q;
+ p = w;
+ }
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ if (norm)
+ w >>= norm;
+#endif
+ }
+#endif
+
+ /* Deliver the remainder, if desired */
+ if (r) {
+ *r = (mp_digit)w;
+ }
+
+ s_mp_clamp(&quot);
+ mp_exch(&quot, mp);
+CLEANUP:
+ mp_clear(&quot);
+ mp_clear(&rem);
+
+ return res;
+} /* end s_mp_div_d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive full arithmetic */
+
+/* {{{ s_mp_add(a, b) */
+
+/* Compute a = |a| + |b| */
+mp_err
+s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_digit *pa, *pb;
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa + *pb++;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pa++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ used = MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ while (w && ix < used) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+ ++ix;
+ }
+#else
+ while (carry && ix < used) {
+ sum = carry + *pa;
+ *pa++ = sum;
+ carry = !sum;
+ ++ix;
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ix) = (mp_digit)w;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, used) = carry;
+ }
+#endif
+
+ return MP_OKAY;
+} /* end s_mp_add() */
+
+/* }}} */
+
+/* Compute c = |a| + |b| */ /* magnitude addition */
+mp_err
+s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit sum, carry = 0, d;
+#endif
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = a;
+ a = b;
+ b = xch;
+ }
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ exchange step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++ + *pb++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa++;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pc++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ for (used = MP_USED(a); ix < used; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ *pc++ = sum = carry + *pa++;
+ carry = (sum < carry);
+#endif
+ }
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = (mp_digit)w;
+ ++used;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = carry;
+ ++used;
+ }
+#endif
+ MP_USED(c) = used;
+ return MP_OKAY;
+}
+/* {{{ s_mp_add_offset(a, b, offset) */
+
+/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
+mp_err
+s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_size ib;
+ mp_size ia;
+ mp_size lim;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ lim = MP_USED(b) + offset;
+ if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ lim = USED(b);
+ for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+#else
+ d = MP_DIGIT(a, ia);
+ sum = d + MP_DIGIT(b, ib);
+ d = (sum < d);
+ MP_DIGIT(a, ia) = sum += carry;
+ carry = d + (sum < carry);
+#endif
+ }
+
+/* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ for (lim = MP_USED(a); k && (ia < lim); ++ia) {
+ w = (mp_word)DIGIT(a, ia) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+ }
+#else
+ for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
+ d = MP_DIGIT(a, ia);
+ MP_DIGIT(a, ia) = sum = d + carry;
+ carry = (sum < d);
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (k) {
+ if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ia) = (mp_digit)k;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, lim) = carry;
+ }
+#endif
+ s_mp_clamp(a);
+
+ return MP_OKAY;
+
+} /* end s_mp_add_offset() */
+
+/* }}} */
+
+/* {{{ s_mp_sub(a, b) */
+
+/* Compute a = |a| - |b|, assumes |a| >= |b| */
+mp_err
+s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
+{
+ mp_digit *pa, *pb, *limit;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ limit = pb + MP_USED(b);
+ while (pb < limit) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa - *pb++;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa;
+ diff = d - *pb++;
+ d = (diff > d); /* detect borrow */
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pa++ = diff;
+ borrow = d;
+#endif
+ }
+ limit = MP_DIGITS(a) + MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ while (w && pa < limit) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+ }
+#else
+ while (borrow && pa < limit) {
+ d = *pa;
+ *pa++ = diff = d - borrow;
+ borrow = (diff > d);
+ }
+#endif
+
+ /* Clobber any leading zeroes we created */
+ s_mp_clamp(a);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub() */
+
+/* }}} */
+
+/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
+mp_err
+s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+ int ix, limit;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ limit = MP_USED(b);
+ for (ix = 0; ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++ - *pb++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ diff = d - *pb++;
+ d = (diff > d);
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pc++ = diff;
+ borrow = d;
+#endif
+ }
+ for (limit = MP_USED(a); ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ *pc++ = diff = d - borrow;
+ borrow = (diff > d);
+#endif
+ }
+
+ /* Clobber any leading zeroes we created */
+ MP_USED(c) = ix;
+ s_mp_clamp(c);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+}
+/* {{{ s_mp_mul(a, b) */
+
+/* Compute a = |a| * |b| */
+mp_err
+s_mp_mul(mp_int *a, const mp_int *b)
+{
+ return mp_mul(a, b, a);
+} /* end s_mp_mul() */
+
+/* }}} */
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+#else
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ mp_digit a0b1, a1b0; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
+ a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
+ a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
+ a1b0 += a0b1; \
+ Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
+ Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \
+ a1b0 <<= MP_HALF_DIGIT_BIT; \
+ Plo += a1b0; \
+ Phi += MP_CT_LTU(Plo, a1b0); \
+ }
+#endif
+
+/* Constant time version of s_mpv_mul_d_add_prop.
+ * Presently, this is only used by the Constant time Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c, mp_size c_len)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ c_len -= a_len;
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ c_len -= a_len;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ a0b0 += a_i = *c;
+ a1b1 += MP_CT_LTU(a0b0, a_i);
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ /* propagate the carry to the end, even if carry is zero */
+ while (c_len--) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = MP_CT_LTU(carry, c_i);
+ }
+#endif
+}
+
+#if !defined(MP_ASSEMBLY_MULTIPLY)
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ a1b1 += MP_CT_LTU(a0b0, carry);
+ a0b0 += a_i = *c;
+ a1b1 += MP_CT_LTU(a0b0, a_i);
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+#endif
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ unsigned long long square = (unsigned long long)a * a; \
+ Plo = (mp_digit)square; \
+ Phi = (mp_digit)(square >> MP_DIGIT_BIT); \
+ }
+#else
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ mp_digit Pmid; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
+ Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
+ Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \
+ Pmid <<= (MP_HALF_DIGIT_BIT + 1); \
+ Plo += Pmid; \
+ if (Plo < Pmid) \
+ ++Phi; \
+ }
+#endif
+
+#if !defined(MP_ASSEMBLY_SQUARE)
+/* Add the squares of the digits of a to the digits of b. */
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_word w;
+ mp_digit d;
+ mp_size ix;
+
+ w = 0;
+#define ADD_SQUARE(n) \
+ d = pa[n]; \
+ w += (d * (mp_word)d) + ps[2 * n]; \
+ ps[2 * n] = ACCUM(w); \
+ w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
+ ps[2 * n + 1] = ACCUM(w); \
+ w = (w >> DIGIT_BIT)
+
+ for (ix = a_len; ix >= 4; ix -= 4) {
+ ADD_SQUARE(0);
+ ADD_SQUARE(1);
+ ADD_SQUARE(2);
+ ADD_SQUARE(3);
+ pa += 4;
+ ps += 8;
+ }
+ if (ix) {
+ ps += 2 * ix;
+ pa += ix;
+ switch (ix) {
+ case 3:
+ ADD_SQUARE(-3); /* FALLTHRU */
+ case 2:
+ ADD_SQUARE(-2); /* FALLTHRU */
+ case 1:
+ ADD_SQUARE(-1); /* FALLTHRU */
+ case 0:
+ break;
+ }
+ }
+ while (w) {
+ w += *ps;
+ *ps++ = ACCUM(w);
+ w = (w >> DIGIT_BIT);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *pa++;
+ mp_digit a0a0, a1a1;
+
+ MP_SQR_D(a_i, a1a1, a0a0);
+
+ /* here a1a1 and a0a0 constitute a_i ** 2 */
+ a0a0 += carry;
+ if (a0a0 < carry)
+ ++a1a1;
+
+ /* now add to ps */
+ a0a0 += a_i = *ps;
+ if (a0a0 < a_i)
+ ++a1a1;
+ *ps++ = a0a0;
+ a1a1 += a_i = *ps;
+ carry = (a1a1 < a_i);
+ *ps++ = a1a1;
+ }
+ while (carry) {
+ mp_digit s_i = *ps;
+ carry += s_i;
+ *ps++ = carry;
+ carry = carry < s_i;
+ }
+#endif
+}
+#endif
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+/*
+** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+** so its high bit is 1. This code is from NSPR.
+*/
+mp_err
+s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ mp_digit d1, d0, q1, q0;
+ mp_digit r1, r0, m;
+
+ d1 = divisor >> MP_HALF_DIGIT_BIT;
+ d0 = divisor & MP_HALF_DIGIT_MAX;
+ r1 = Nhi % d1;
+ q1 = Nhi / d1;
+ m = q1 * d0;
+ r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
+ if (r1 < m) {
+ q1--, r1 += divisor;
+ if (r1 >= divisor && r1 < m) {
+ q1--, r1 += divisor;
+ }
+ }
+ r1 -= m;
+ r0 = r1 % d1;
+ q0 = r1 / d1;
+ m = q0 * d0;
+ r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
+ if (r0 < m) {
+ q0--, r0 += divisor;
+ if (r0 >= divisor && r0 < m) {
+ q0--, r0 += divisor;
+ }
+ }
+ if (qp)
+ *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
+ if (rp)
+ *rp = r0 - m;
+ return MP_OKAY;
+}
+#endif
+
+#if MP_SQUARE
+/* {{{ s_mp_sqr(a) */
+
+mp_err
+s_mp_sqr(mp_int *a)
+{
+ mp_err res;
+ mp_int tmp;
+
+ if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
+ return res;
+ res = mp_sqr(a, &tmp);
+ if (res == MP_OKAY) {
+ s_mp_exch(&tmp, a);
+ }
+ mp_clear(&tmp);
+ return res;
+}
+
+/* }}} */
+#endif
+
+/* {{{ s_mp_div(a, b) */
+
+/*
+ s_mp_div(a, b)
+
+ Compute a = a / b and b = a mod b. Assumes b > a.
+ */
+
+mp_err
+s_mp_div(mp_int *rem, /* i: dividend, o: remainder */
+ mp_int *div, /* i: divisor */
+ mp_int *quot) /* i: 0; o: quotient */
+{
+ mp_int part, t;
+ mp_digit q_msd;
+ mp_err res;
+ mp_digit d;
+ mp_digit div_msd;
+ int ix;
+
+ if (mp_cmp_z(div) == 0)
+ return MP_RANGE;
+
+ DIGITS(&t) = 0;
+ /* Shortcut if divisor is power of two */
+ if ((ix = s_mp_ispow2(div)) >= 0) {
+ MP_CHECKOK(mp_copy(rem, quot));
+ s_mp_div_2d(quot, (mp_digit)ix);
+ s_mp_mod_2d(rem, (mp_digit)ix);
+
+ return MP_OKAY;
+ }
+
+ MP_SIGN(rem) = ZPOS;
+ MP_SIGN(div) = ZPOS;
+ MP_SIGN(&part) = ZPOS;
+
+ /* A working temporary for division */
+ MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
+
+ /* Normalize to optimize guessing */
+ MP_CHECKOK(s_mp_norm(rem, div, &d));
+
+ /* Perform the division itself...woo! */
+ MP_USED(quot) = MP_ALLOC(quot);
+
+ /* Find a partial substring of rem which is at least div */
+ /* If we didn't find one, we're finished dividing */
+ while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
+ int i;
+ int unusedRem;
+ int partExtended = 0; /* set to true if we need to extend part */
+
+ unusedRem = MP_USED(rem) - MP_USED(div);
+ MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
+ MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
+ MP_USED(&part) = MP_USED(div);
+
+ /* We have now truncated the part of the remainder to the same length as
+ * the divisor. If part is smaller than div, extend part by one digit. */
+ if (s_mp_cmp(&part, div) < 0) {
+ --unusedRem;
+#if MP_ARGCHK == 2
+ assert(unusedRem >= 0);
+#endif
+ --MP_DIGITS(&part);
+ ++MP_USED(&part);
+ ++MP_ALLOC(&part);
+ partExtended = 1;
+ }
+
+ /* Compute a guess for the next quotient digit */
+ q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
+ div_msd = MP_DIGIT(div, MP_USED(div) - 1);
+ if (!partExtended) {
+ /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
+ * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
+ * we didn't extend part, q_msd >= div_msd. Therefore we know that
+ * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
+ * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
+ q_msd = 1;
+ } else {
+ if (q_msd == div_msd) {
+ q_msd = MP_DIGIT_MAX;
+ } else {
+ mp_digit r;
+ MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
+ div_msd, &q_msd, &r));
+ }
+ }
+#if MP_ARGCHK == 2
+ assert(q_msd > 0); /* This case should never occur any more. */
+#endif
+ if (q_msd <= 0)
+ break;
+
+ /* See what that multiplies out to */
+ mp_copy(div, &t);
+ MP_CHECKOK(s_mp_mul_d(&t, q_msd));
+
+ /*
+ If it's too big, back it off. We should not have to do this
+ more than once, or, in rare cases, twice. Knuth describes a
+ method by which this could be reduced to a maximum of once, but
+ I didn't implement that here.
+ When using s_mpv_div_2dx1d, we may have to do this 3 times.
+ */
+ for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
+ --q_msd;
+ MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
+ }
+ if (i < 0) {
+ res = MP_RANGE;
+ goto CLEANUP;
+ }
+
+ /* At this point, q_msd should be the right next digit */
+ MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
+ s_mp_clamp(rem);
+
+ /*
+ Include the digit in the quotient. We allocated enough memory
+ for any quotient we could ever possibly get, so we should not
+ have to check for failures here
+ */
+ MP_DIGIT(quot, unusedRem) = q_msd;
+ }
+
+ /* Denormalize remainder */
+ if (d) {
+ s_mp_div_2d(rem, d);
+ }
+
+ s_mp_clamp(quot);
+
+CLEANUP:
+ mp_clear(&t);
+
+ return res;
+
+} /* end s_mp_div() */
+
+/* }}} */
+
+/* {{{ s_mp_2expt(a, k) */
+
+mp_err
+s_mp_2expt(mp_int *a, mp_digit k)
+{
+ mp_err res;
+ mp_size dig, bit;
+
+ dig = k / DIGIT_BIT;
+ bit = k % DIGIT_BIT;
+
+ mp_zero(a);
+ if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, dig) |= ((mp_digit)1 << bit);
+
+ return MP_OKAY;
+
+} /* end s_mp_2expt() */
+
+/* }}} */
+
+/* {{{ s_mp_reduce(x, m, mu) */
+
+/*
+ Compute Barrett reduction, x (mod m), given a precomputed value for
+ mu = b^2k / m, where b = RADIX and k = #digits(m). This should be
+ faster than straight division, when many reductions by the same
+ value of m are required (such as in modular exponentiation). This
+ can nearly halve the time required to do modular exponentiation,
+ as compared to using the full integer divide to reduce.
+
+ This algorithm was derived from the _Handbook of Applied
+ Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
+ pp. 603-604.
+ */
+
+mp_err
+s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
+{
+ mp_int q;
+ mp_err res;
+
+ if ((res = mp_init_copy(&q, x)) != MP_OKAY)
+ return res;
+
+ s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */
+ s_mp_mul(&q, mu); /* q2 = q1 * mu */
+ s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
+
+ /* x = x mod b^(k+1), quick (no division) */
+ s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
+
+ /* q = q * m mod b^(k+1), quick (no division) */
+ s_mp_mul(&q, m);
+ s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
+
+ /* x = x - q */
+ if ((res = mp_sub(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* If x < 0, add b^(k+1) to it */
+ if (mp_cmp_z(x) < 0) {
+ mp_set(&q, 1);
+ if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_add(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ /* Back off if it's too big */
+ while (mp_cmp(x, m) >= 0) {
+ if ((res = s_mp_sub(x, m)) != MP_OKAY)
+ break;
+ }
+
+CLEANUP:
+ mp_clear(&q);
+
+ return res;
+
+} /* end s_mp_reduce() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive comparisons */
+
+/* {{{ s_mp_cmp(a, b) */
+
+/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b */
+int
+s_mp_cmp(const mp_int *a, const mp_int *b)
+{
+ ARGMPCHK(a != NULL && b != NULL);
+
+ mp_size used_a = MP_USED(a);
+ {
+ mp_size used_b = MP_USED(b);
+
+ if (used_a > used_b)
+ goto IS_GT;
+ if (used_a < used_b)
+ goto IS_LT;
+ }
+ {
+ mp_digit *pa, *pb;
+ mp_digit da = 0, db = 0;
+
+#define CMP_AB(n) \
+ if ((da = pa[n]) != (db = pb[n])) \
+ goto done
+
+ pa = MP_DIGITS(a) + used_a;
+ pb = MP_DIGITS(b) + used_a;
+ while (used_a >= 4) {
+ pa -= 4;
+ pb -= 4;
+ used_a -= 4;
+ CMP_AB(3);
+ CMP_AB(2);
+ CMP_AB(1);
+ CMP_AB(0);
+ }
+ while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
+ /* do nothing */;
+ done:
+ if (da > db)
+ goto IS_GT;
+ if (da < db)
+ goto IS_LT;
+ }
+ return MP_EQ;
+IS_LT:
+ return MP_LT;
+IS_GT:
+ return MP_GT;
+} /* end s_mp_cmp() */
+
+/* }}} */
+
+/* {{{ s_mp_cmp_d(a, d) */
+
+/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d */
+int
+s_mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ ARGMPCHK(a != NULL);
+
+ if (USED(a) > 1)
+ return MP_GT;
+
+ if (DIGIT(a, 0) < d)
+ return MP_LT;
+ else if (DIGIT(a, 0) > d)
+ return MP_GT;
+ else
+ return MP_EQ;
+
+} /* end s_mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2(v) */
+
+/*
+ Returns -1 if the value is not a power of two; otherwise, it returns
+ k such that v = 2^k, i.e. lg(v).
+ */
+int
+s_mp_ispow2(const mp_int *v)
+{
+ mp_digit d;
+ int extra = 0, ix;
+
+ ARGMPCHK(v != NULL);
+
+ ix = MP_USED(v) - 1;
+ d = MP_DIGIT(v, ix); /* most significant digit of v */
+
+ extra = s_mp_ispow2d(d);
+ if (extra < 0 || ix == 0)
+ return extra;
+
+ while (--ix >= 0) {
+ if (DIGIT(v, ix) != 0)
+ return -1; /* not a power of two */
+ extra += MP_DIGIT_BIT;
+ }
+
+ return extra;
+
+} /* end s_mp_ispow2() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2d(d) */
+
+int
+s_mp_ispow2d(mp_digit d)
+{
+ if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
+ int pow = 0;
+#if defined(MP_USE_UINT_DIGIT)
+ if (d & 0xffff0000U)
+ pow += 16;
+ if (d & 0xff00ff00U)
+ pow += 8;
+ if (d & 0xf0f0f0f0U)
+ pow += 4;
+ if (d & 0xccccccccU)
+ pow += 2;
+ if (d & 0xaaaaaaaaU)
+ pow += 1;
+#elif defined(MP_USE_LONG_LONG_DIGIT)
+ if (d & 0xffffffff00000000ULL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000ULL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00ULL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0ULL)
+ pow += 4;
+ if (d & 0xccccccccccccccccULL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaULL)
+ pow += 1;
+#elif defined(MP_USE_LONG_DIGIT)
+ if (d & 0xffffffff00000000UL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000UL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00UL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0UL)
+ pow += 4;
+ if (d & 0xccccccccccccccccUL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaUL)
+ pow += 1;
+#else
+#error "unknown type for mp_digit"
+#endif
+ return pow;
+ }
+ return -1;
+
+} /* end s_mp_ispow2d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive I/O helpers */
+
+/* {{{ s_mp_tovalue(ch, r) */
+
+/*
+ Convert the given character to its digit value, in the given radix.
+ If the given character is not understood in the given radix, -1 is
+ returned. Otherwise the digit's numeric value is returned.
+
+ The results will be odd if you use a radix < 2 or > 62, you are
+ expected to know what you're up to.
+ */
+int
+s_mp_tovalue(char ch, int r)
+{
+ int val, xch;
+
+ if (r > 36)
+ xch = ch;
+ else
+ xch = toupper(ch);
+
+ if (isdigit(xch))
+ val = xch - '0';
+ else if (isupper(xch))
+ val = xch - 'A' + 10;
+ else if (islower(xch))
+ val = xch - 'a' + 36;
+ else if (xch == '+')
+ val = 62;
+ else if (xch == '/')
+ val = 63;
+ else
+ return -1;
+
+ if (val < 0 || val >= r)
+ return -1;
+
+ return val;
+
+} /* end s_mp_tovalue() */
+
+/* }}} */
+
+/* {{{ s_mp_todigit(val, r, low) */
+
+/*
+ Convert val to a radix-r digit, if possible. If val is out of range
+ for r, returns zero. Otherwise, returns an ASCII character denoting
+ the value in the given radix.
+
+ The results may be odd if you use a radix < 2 or > 64, you are
+ expected to know what you're doing.
+ */
+
+char
+s_mp_todigit(mp_digit val, int r, int low)
+{
+ char ch;
+
+ if (val >= r)
+ return 0;
+
+ ch = s_dmap_1[val];
+
+ if (r <= 36 && low)
+ ch = tolower(ch);
+
+ return ch;
+
+} /* end s_mp_todigit() */
+
+/* }}} */
+
+/* {{{ s_mp_outlen(bits, radix) */
+
+/*
+ Return an estimate for how long a string is needed to hold a radix
+ r representation of a number with 'bits' significant bits, plus an
+ extra for a zero terminator (assuming C style strings here)
+ */
+int
+s_mp_outlen(int bits, int r)
+{
+ return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
+
+} /* end s_mp_outlen() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_read_unsigned_octets(mp, str, len) */
+/* mp_read_unsigned_octets(mp, str, len)
+ Read in a raw value (base 256) into the given mp_int
+ No sign bit, number is positive. Leading zeros ignored.
+ */
+
+mp_err
+mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
+{
+ int count;
+ mp_err res;
+ mp_digit d;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ count = len % sizeof(mp_digit);
+ if (count) {
+ for (d = 0; count-- > 0; --len) {
+ d = (d << 8) | *str++;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+
+ /* Read the rest of the digits */
+ for (; len > 0; len -= sizeof(mp_digit)) {
+ for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
+ d = (d << 8) | *str++;
+ }
+ if (MP_EQ == mp_cmp_z(mp)) {
+ if (!d)
+ continue;
+ } else {
+ if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
+ return res;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+ return MP_OKAY;
+} /* end mp_read_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_unsigned_octet_size(mp) */
+unsigned int
+mp_unsigned_octet_size(const mp_int *mp)
+{
+ unsigned int bytes;
+ int ix;
+ mp_digit d = 0;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+ ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
+
+ bytes = (USED(mp) * sizeof(mp_digit));
+
+ /* subtract leading zeros. */
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ d = DIGIT(mp, ix);
+ if (d)
+ break;
+ bytes -= sizeof(d);
+ }
+ if (!bytes)
+ return 1;
+
+ /* Have MSD, check digit bytes, high order first */
+ for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
+ unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
+ if (x)
+ break;
+ --bytes;
+ }
+ return bytes;
+} /* end mp_unsigned_octet_size() */
+/* }}} */
+
+/* {{{ mp_to_unsigned_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos && !x) /* suppress leading zeros */
+ continue;
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_to_signed_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos) {
+ if (!x) /* suppress leading zeros */
+ continue;
+ if (x & 0x80) { /* add one leading zero to make output positive. */
+ ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
+ if (bytes + 1 > maxlen)
+ return MP_BADARG;
+ str[pos++] = 0;
+ }
+ }
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_signed_octets() */
+/* }}} */
+
+/* {{{ mp_to_fixlen_octets(mp, str) */
+/* output a buffer of big endian octets exactly as long as requested.
+ constant time on the value of mp. */
+mp_err
+mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
+{
+ int ix, jx;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG);
+
+ /* Constant time on the value of mp. Don't use mp_unsigned_octet_size. */
+ bytes = USED(mp) * MP_DIGIT_SIZE;
+
+ /* If the output is shorter than the native size of mp, then check that any
+ * bytes not written have zero values. This check isn't constant time on
+ * the assumption that timing-sensitive callers can guarantee that mp fits
+ * in the allocated space. */
+ ix = USED(mp) - 1;
+ if (bytes > length) {
+ unsigned int zeros = bytes - length;
+
+ while (zeros >= MP_DIGIT_SIZE) {
+ ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG);
+ zeros -= MP_DIGIT_SIZE;
+ ix--;
+ }
+
+ if (zeros > 0) {
+ mp_digit d = DIGIT(mp, ix);
+ mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT);
+ ARGCHK((d & m) == 0, MP_BADARG);
+ for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) {
+ *str++ = d >> (jx * CHAR_BIT);
+ }
+ ix--;
+ }
+ } else if (bytes < length) {
+ /* Place any needed leading zeros. */
+ unsigned int zeros = length - bytes;
+ memset(str, 0, zeros);
+ str += zeros;
+ }
+
+ /* Iterate over each whole digit... */
+ for (; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+
+ /* Unpack digit bytes, high order first */
+ for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) {
+ *str++ = d >> (jx * CHAR_BIT);
+ }
+ }
+ return MP_OKAY;
+} /* end mp_to_fixlen_octets() */
+/* }}} */
+
+/* {{{ mp_cswap(condition, a, b, numdigits) */
+/* performs a conditional swap between mp_int. */
+mp_err
+mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits)
+{
+ mp_digit x;
+ unsigned int i;
+ mp_err res = 0;
+
+ /* if pointers are equal return */
+ if (a == b)
+ return res;
+
+ if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) {
+ MP_CHECKOK(s_mp_grow(a, numdigits));
+ MP_CHECKOK(s_mp_grow(b, numdigits));
+ }
+
+ condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1;
+
+ x = (USED(a) ^ USED(b)) & condition;
+ USED(a) ^= x;
+ USED(b) ^= x;
+
+ x = (SIGN(a) ^ SIGN(b)) & condition;
+ SIGN(a) ^= x;
+ SIGN(b) ^= x;
+
+ for (i = 0; i < numdigits; i++) {
+ x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition;
+ DIGIT(a, i) ^= x;
+ DIGIT(b, i) ^= x;
+ }
+
+CLEANUP:
+ return res;
+} /* end mp_cswap() */
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h
new file mode 100644
index 0000000000..dd129db0d6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.h
@@ -0,0 +1,363 @@
+/*
+ * mpi.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPI_
+#define _H_MPI_
+
+#include "mpi-config.h"
+
+#include "seccomon.h"
+SEC_BEGIN_PROTOS
+
+#if MP_DEBUG
+#undef MP_IOFUNC
+#define MP_IOFUNC 1
+#endif
+
+#if MP_IOFUNC
+#include <stdio.h>
+#include <ctype.h>
+#endif
+
+#include <limits.h>
+
+#if defined(BSDI)
+#undef ULLONG_MAX
+#endif
+
+#include <sys/types.h>
+
+#define MP_NEG 1
+#define MP_ZPOS 0
+
+#define MP_OKAY 0 /* no error, all is well */
+#define MP_YES 0 /* yes (boolean result) */
+#define MP_NO -1 /* no (boolean result) */
+#define MP_MEM -2 /* out of memory */
+#define MP_RANGE -3 /* argument out of range */
+#define MP_BADARG -4 /* invalid parameter */
+#define MP_UNDEF -5 /* answer is undefined */
+#define MP_LAST_CODE MP_UNDEF
+
+typedef unsigned int mp_sign;
+typedef unsigned int mp_size;
+typedef int mp_err;
+
+#define MP_32BIT_MAX 4294967295U
+
+#if !defined(ULONG_MAX)
+#error "ULONG_MAX not defined"
+#elif !defined(UINT_MAX)
+#error "UINT_MAX not defined"
+#elif !defined(USHRT_MAX)
+#error "USHRT_MAX not defined"
+#endif
+
+#if defined(ULLONG_MAX) /* C99, Solaris */
+#define MP_ULONG_LONG_MAX ULLONG_MAX
+/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
+#elif defined(ULONG_LONG_MAX) /* HPUX */
+#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
+#elif defined(ULONGLONG_MAX) /* AIX */
+#define MP_ULONG_LONG_MAX ULONGLONG_MAX
+#endif
+
+/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
+#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
+typedef unsigned long mp_digit;
+#define MP_DIGIT_MAX ULONG_MAX
+#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_DIGIT
+#define MP_USE_LONG_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+
+#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
+typedef unsigned long long mp_digit;
+#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
+#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_LONG_DIGIT
+#define MP_USE_LONG_LONG_DIGIT 1
+#undef MP_USE_LONG_DIGIT
+
+#else
+typedef unsigned int mp_digit;
+#define MP_DIGIT_MAX UINT_MAX
+#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX USHRT_MAX
+#undef MP_USE_UINT_DIGIT
+#define MP_USE_UINT_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+#undef MP_USE_LONG_DIGIT
+#endif
+
+#if !defined(MP_NO_MP_WORD)
+#if defined(MP_USE_UINT_DIGIT) && \
+ (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
+
+#if (ULONG_MAX > UINT_MAX)
+typedef unsigned long mp_word;
+typedef long mp_sword;
+#define MP_WORD_MAX ULONG_MAX
+
+#else
+typedef unsigned long long mp_word;
+typedef long long mp_sword;
+#define MP_WORD_MAX MP_ULONG_LONG_MAX
+#endif
+
+#else
+#define MP_NO_MP_WORD 1
+#endif
+#endif /* !defined(MP_NO_MP_WORD) */
+
+#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
+typedef unsigned int mp_word;
+typedef int mp_sword;
+#define MP_WORD_MAX UINT_MAX
+#endif
+
+#define MP_DIGIT_SIZE sizeof(mp_digit)
+#define MP_DIGIT_BIT (CHAR_BIT * MP_DIGIT_SIZE)
+#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word))
+#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX)
+
+#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2)
+#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX)
+/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
+** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
+** consistent with the other _HALF_ names.
+*/
+
+/* Macros for accessing the mp_int internals */
+#define MP_SIGN(MP) ((MP)->sign)
+#define MP_USED(MP) ((MP)->used)
+#define MP_ALLOC(MP) ((MP)->alloc)
+#define MP_DIGITS(MP) ((MP)->dp)
+#define MP_DIGIT(MP, N) (MP)->dp[(N)]
+
+/* This defines the maximum I/O base (minimum is 2) */
+#define MP_MAX_RADIX 64
+
+/* Constant Time Macros on mp_digits */
+#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1)))
+#define MP_CT_TRUE ((mp_digit)1)
+#define MP_CT_FALSE ((mp_digit)0)
+
+/* basic zero and non zero tests */
+#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x)))))
+
+/* basic constant-time helper macro for equalities and inequalities.
+ * The inequalities will produce incorrect results if
+ * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay
+ * within the range 0-MP_DIGIT_MAX/2. */
+#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b)))
+#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b)))
+#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a))
+#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b))
+#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b))
+#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b))
+
+/* use constant time result to select a boolean value
+ * or an mp digit depending on the args */
+#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l))))
+#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r) /* mask, l and r are booleans */
+#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */
+
+/* full inequalities that work with full mp_digit values */
+#define MP_CT_OVERFLOW(a, b, c, d) \
+ MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \
+ (MP_CT_HIGH_TO_LOW(d)), c)
+#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b)
+
+typedef struct {
+ mp_sign sign; /* sign of this quantity */
+ mp_size alloc; /* how many digits allocated */
+ mp_size used; /* how many digits used */
+ mp_digit *dp; /* the digits themselves */
+} mp_int;
+
+/* Default precision */
+mp_size mp_get_prec(void);
+void mp_set_prec(mp_size prec);
+
+/* Memory management */
+mp_err mp_init(mp_int *mp);
+mp_err mp_init_size(mp_int *mp, mp_size prec);
+mp_err mp_init_copy(mp_int *mp, const mp_int *from);
+mp_err mp_copy(const mp_int *from, mp_int *to);
+void mp_exch(mp_int *mp1, mp_int *mp2);
+void mp_clear(mp_int *mp);
+void mp_zero(mp_int *mp);
+void mp_set(mp_int *mp, mp_digit d);
+mp_err mp_set_int(mp_int *mp, long z);
+#define mp_set_long(mp, z) mp_set_int(mp, z)
+mp_err mp_set_ulong(mp_int *mp, unsigned long z);
+
+/* Single digit arithmetic */
+mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_2(const mp_int *a, mp_int *c);
+mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
+mp_err mp_div_2(const mp_int *a, mp_int *c);
+mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
+
+/* Sign manipulations */
+mp_err mp_abs(const mp_int *a, mp_int *b);
+mp_err mp_neg(const mp_int *a, mp_int *b);
+
+/* Full arithmetic */
+mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow);
+mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize);
+#if MP_SQUARE
+mp_err mp_sqr(const mp_int *a, mp_int *b);
+#else
+#define mp_sqr(a, b) mp_mul(a, a, b)
+#endif
+mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
+mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
+mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_2expt(mp_int *a, mp_digit k);
+
+/* Modular arithmetic */
+#if MP_MODARITH
+mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
+mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
+#else
+#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
+#endif
+mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
+#endif /* MP_MODARITH */
+
+/* montgomery math */
+mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont);
+mp_digit mp_calculate_mont_n0i(const mp_int *N);
+mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct);
+mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c);
+
+/* Comparisons */
+int mp_cmp_z(const mp_int *a);
+int mp_cmp_d(const mp_int *a, mp_digit d);
+int mp_cmp(const mp_int *a, const mp_int *b);
+int mp_cmp_mag(const mp_int *a, const mp_int *b);
+int mp_isodd(const mp_int *a);
+int mp_iseven(const mp_int *a);
+mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret);
+
+/* Number theoretic */
+mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
+mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
+
+/* Input and output */
+#if MP_IOFUNC
+void mp_print(mp_int *mp, FILE *ofp);
+#endif /* end MP_IOFUNC */
+
+/* Base conversion */
+mp_err mp_read_raw(mp_int *mp, char *str, int len);
+int mp_raw_size(mp_int *mp);
+mp_err mp_toraw(mp_int *mp, char *str);
+mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
+mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix);
+int mp_radix_size(mp_int *mp, int radix);
+mp_err mp_toradix(mp_int *mp, char *str, int radix);
+int mp_tovalue(char ch, int r);
+
+#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S) mp_toradix((M), (S), 16)
+
+/* Error strings */
+const char *mp_strerror(mp_err ec);
+
+/* Octet string conversion functions */
+mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
+unsigned int mp_unsigned_octet_size(const mp_int *mp);
+mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
+
+/* Miscellaneous */
+mp_size mp_trailing_zeros(const mp_int *mp);
+void freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx);
+mp_err mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits);
+
+#define MP_CHECKOK(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+#define MP_CHECKERR(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+
+#define NEG MP_NEG
+#define ZPOS MP_ZPOS
+#define DIGIT_MAX MP_DIGIT_MAX
+#define DIGIT_BIT MP_DIGIT_BIT
+#define DIGIT_FMT MP_DIGIT_FMT
+#define RADIX MP_RADIX
+#define MAX_RADIX MP_MAX_RADIX
+#define SIGN(MP) MP_SIGN(MP)
+#define USED(MP) MP_USED(MP)
+#define ALLOC(MP) MP_ALLOC(MP)
+#define DIGITS(MP) MP_DIGITS(MP)
+#define DIGIT(MP, N) MP_DIGIT(MP, N)
+
+/* Functions which return an mp_err value will NULL-check their arguments via
+ * ARGCHK(condition, return), where the caller is responsible for checking the
+ * mp_err return code. For functions that return an integer type, the caller
+ * has no way to tell if the value is an error code or a legitimate value.
+ * Therefore, ARGMPCHK(condition) will trigger an assertion failure on debug
+ * builds, but no-op in optimized builds. */
+#if MP_ARGCHK == 1
+#define ARGMPCHK(X) /* */
+#define ARGCHK(X, Y) \
+ { \
+ if (!(X)) { \
+ return (Y); \
+ } \
+ }
+#elif MP_ARGCHK == 2
+#include <assert.h>
+#define ARGMPCHK(X) assert(X)
+#define ARGCHK(X, Y) assert(X)
+#else
+#define ARGMPCHK(X) /* */
+#define ARGCHK(X, Y) /* */
+#endif
+
+#ifdef CT_VERIF
+void mp_taint(mp_int *mp);
+void mp_untaint(mp_int *mp);
+#endif
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPI_ */
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c
new file mode 100644
index 0000000000..9e538bb6a1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64.c
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_AMD64
+#error This file only works on AMD64 platforms.
+#endif
+
+#include <mpi-priv.h>
+
+/*
+ * MPI glue
+ *
+ */
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void MPI_ASM_DECL
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ mp_digit w;
+ mp_digit d;
+
+ d = s_mpv_mul_add_vec64(c, a, a_len, b);
+ c += a_len;
+ while (d) {
+ w = c[0] + d;
+ d = (w < c[0] || w < d);
+ *c++ = w;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_common.S b/security/nss/lib/freebl/mpi/mpi_amd64_common.S
new file mode 100644
index 0000000000..4000f2066a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_common.S
@@ -0,0 +1,409 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_set_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r = a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_set_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64
+.private_extern s_mpv_mul_set_vec64
+s_mpv_mul_set_vec64:
+#else
+.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+#endif
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L15:
+ cmpq $8, %r8 # 8 - len
+ jb .L16
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+#ifndef DARWIN
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+#endif
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_add_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r += a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_add_vec64;
+
+#ifdef DARWIN
+#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64
+.private_extern s_mpv_mul_add_vec64
+s_mpv_mul_add_vec64:
+#else
+.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+#endif
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L25:
+ cmpq $8, %r8 # 8 - len
+ jb .L26
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 0(%rdi), %r10 # r10 = r[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ movq 8(%rdi), %r10 # prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ movq 16(%rdi), %r10 # prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ movq 24(%rdi), %r10 # prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ movq 32(%rdi), %r10 # prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ movq 40(%rdi), %r10 # prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ movq 48(%rdi), %r10 # prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ movq 56(%rdi), %r10 # prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+#ifndef DARWIN
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
new file mode 100644
index 0000000000..2120c18f9d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
@@ -0,0 +1,388 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;
+; This code is converted from mpi_amd64_gas.asm for MASM for x64.
+;
+
+; ------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_set_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+; ------------------------------------------------------------------------
+
+; r = a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+.CODE
+
+s_mpv_mul_set_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L17
+ mov r8, rdx
+ xor r9, r9
+
+L15:
+ cmp r8, 8
+ jb L16
+ mov rax, [rsi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [0+rdi], rax
+ mov r9, rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi, 64
+ add rdi, 64
+ sub r8, 8
+ jz L17
+ jmp L15
+
+L16:
+ mov rax, [0+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [8+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [16+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [24+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [24+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [32+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [32+rdi],rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [40+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [40+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [48+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+
+L17:
+ mov rax, r9
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_set_vec64 ENDP
+
+
+;------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_add_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+;------------------------------------------------------------------------
+
+; r += a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+s_mpv_mul_add_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L27
+ mov r8, rdx
+ xor r9, r9
+
+L25:
+ cmp r8, 8
+ jb L26
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [8+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [16+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [24+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [32+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [40+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [48+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [56+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi,64
+ add rdi,64
+ sub r8, 8
+ jz L27
+ jmp L25
+
+L26:
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [8+rsi]
+ mov r10, [8+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [16+rsi]
+ mov r10, [16+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [24+rsi]
+ mov r10, [24+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [32+rsi]
+ mov r10, [32+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [40+rsi]
+ mov r10, [40+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [48+rsi]
+ mov r10, [48+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L27
+
+L27:
+ mov rax, r9
+
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_add_vec64 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
new file mode 100644
index 0000000000..ddd5c40fda
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
@@ -0,0 +1,385 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_set_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r = a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L15:
+ cmpq $8, %r8 / 8 - len
+ jb .L16
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_add_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r += a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L25:
+ cmpq $8, %r8 / 8 - len
+ jb .L26
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 0(%rdi), %r10 / r10 = r[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ movq 8(%rdi), %r10 / prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ movq 16(%rdi), %r10 / prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ movq 24(%rdi), %r10 / prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ movq 32(%rdi), %r10 / prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ movq 40(%rdi), %r10 / prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ movq 48(%rdi), %r10 / prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ movq 56(%rdi), %r10 / prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c
new file mode 100644
index 0000000000..27e4efdad1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_arm.c
@@ -0,0 +1,175 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This inlined version is for 32-bit ARM platform only */
+
+#if !defined(__arm__)
+#error "This is for ARM only"
+#endif
+
+/* 16-bit thumb doesn't work inlined assember version */
+#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__)
+
+#include "mpi-priv.h"
+
+#ifdef MP_ASSEMBLY_MULTIPLY
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%2]\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%2]\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %3\n"
+ "str r5, [%2], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r4, 3f\n"
+#else
+ "cmp r4, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%2]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "str r5, [%2], #4\n"
+ "movs r5, r4\n"
+ "bne 2b\n"
+
+ "3:\n"
+ : "+r"(a), "+l"(a_len), "+r"(c)
+ : "r"(b)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+#endif
+
+#ifdef MP_ASSEMBLY_SQUARE
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r3, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "umlal r3, r4, r6, r6\n" /* w = r3:r4 */
+ "str r3, [%2], #4\n"
+
+ "ldr r5, [%2]\n"
+ "adds r3, r4, r5\n"
+ "mov r4, #0\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "mov r3, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r3, 3f\n"
+#else
+ "cmp r3, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "movs r3, r4\n"
+ "bne 2b\n"
+
+ "3:"
+ : "+r"(pa), "+r"(a_len), "+r"(ps)
+ :
+ : "memory", "cc", "%r3", "%r4", "%r5", "%r6");
+}
+#endif
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c
new file mode 100644
index 0000000000..0cea7685d6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_hp.c
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file contains routines that perform vector multiplication. */
+
+#include "mpi-priv.h"
+#include <unistd.h>
+
+#include <stddef.h>
+/* #include <sys/systeminfo.h> */
+#include <strings.h>
+
+extern void multacc512(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void maxpy_little(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void add_diag_little(
+ int length, /* doublewords in input vector. */
+ const mp_digit *root, /* The vector to square. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ add_diag_little(a_len, pa, ps);
+}
+
+#define MAX_STACK_DIGITS 258
+#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
+#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit x[MAX_STACK_DIGITS];
+ mp_digit *px = x;
+ size_t xSize = 0;
+
+ if (a == c) {
+ if (a_len > MAX_STACK_DIGITS) {
+ xSize = sizeof(mp_digit) * (a_len + 2);
+ px = malloc(xSize);
+ if (!px)
+ return;
+ }
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ }
+ s_mp_setz(c, a_len + 1);
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+ if (px != x && px) {
+ memset(px, 0, xSize);
+ free(px);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ c[a_len] = 0; /* so carry propagation stops here. */
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s
new file mode 100644
index 0000000000..f800396596
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s
@@ -0,0 +1,313 @@
+/
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.text
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L2 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L1:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L1 / jmp if a_len != 0
+L2:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L4 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L3:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L3 / jmp if a_len != 0
+L4:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L6 / jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = a
+L5:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L5 / jmp if a_len != 0
+L6:
+ cmp $0,%ebx / is carry zero?
+ jz L8
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L8
+L7:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L7
+L8:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 20: caller's esi
+ / ebp - 16: caller's edi
+ / ebp - 12:
+ / ebp - 8: carry
+ / ebp - 4: a_len local
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: pa argument
+ / ebp + 12: a_len argument
+ / ebp + 16: ps argument
+ / ebp + 20:
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / a_len
+ mov 16(%ebp),%edi / edi = ps
+ cmp $0,%ecx
+ je L11 / jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = pa
+L10:
+ lodsl / %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax / add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax / add low word from result
+ mov 4(%edi),%ebx
+ stosl / [es:di] = %eax; di += 4;
+ adc %ebx,%edx / add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl / [es:di] = %eax; di += 4;
+ dec %ecx / --a_len
+ jnz L10 / jmp if a_len != 0
+L11:
+ cmp $0,%ebx / is carry zero?
+ jz L14
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L14
+L12:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L12
+L14:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ /
+ / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ / so its high bit is 1. This code is from NSPR.
+ /
+ / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ / mp_digit *qp, mp_digit *rp)
+
+ / esp + 0: Caller's ebx
+ / esp + 4: return address
+ / esp + 8: Nhi argument
+ / esp + 12: Nlo argument
+ / esp + 16: divisor argument
+ / esp + 20: qp argument
+ / esp + 24: rp argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+ /
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax / return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 0000000000..455792bbba
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,472 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <regdef.h>
+ .set noreorder
+ .set noat
+
+ .section .text, 1, 0x00000006, 4, 4
+.text:
+ .section .text
+
+ .ent s_mpv_mul_d_add
+ .globl s_mpv_mul_d_add
+
+s_mpv_mul_d_add:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.L.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.L.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.L.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.L.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.L.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.L.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.L.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .L.6
+ addiu a3,a3,4
+ # } else {
+.L.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .L.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.L.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.L.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.L.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add
+
+ .ent s_mpv_mul_d_add_prop
+ .globl s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.M.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.M.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.M.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.M.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.M.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.M.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.M.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .M.6
+ addiu a3,a3,8
+ # } else {
+.M.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ b .M.6
+ addiu a3,a3,4
+ # }
+ # } else {
+.M.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ addiu a3,a3,4
+ # }
+.M.6:
+
+ # while (cy) {
+ beq t2,zero,.M.1
+ nop
+.M.7:
+ # mp_word w = (mp_word)*c + cy;
+ lwu a6,0(a3)
+ daddu t2,t2,a6
+ # *c++ = ACCUM(w);
+ sw t2,0(a3)
+ # cy = CARRYOUT(w);
+ dsrl32 t2,t2,0
+ bne t2,zero,.M.7
+ addiu a3,a3,4
+
+ # }
+.M.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add_prop
+
+ .ent s_mpv_mul_d
+ .globl s_mpv_mul_d
+
+s_mpv_mul_d:
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.N.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.N.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.N.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.N.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.N.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.N.3:
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.N.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .N.6
+ addiu a3,a3,4
+ # } else {
+.N.5:
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .N.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.N.2:
+ mflo t0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.N.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.N.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d
+
+
+ .ent s_mpv_sqr_add_prop
+ .globl s_mpv_sqr_add_prop
+ #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
+ # registers
+ # a0 *a
+ # a1 a_len
+ # a2 *sqr
+ # a3 digit from *a, a_i
+ # a4 square of digit from a
+ # a5,a6 next 2 digits in sqr
+ # a7,t0 carry
+s_mpv_sqr_add_prop:
+ move a7,zero
+ move t0,zero
+ lwu a3,0(a0)
+ addiu a1,a1,-1 # --a_len
+ dmultu a3,a3
+ beq a1,zero,.P.3 # jump if we've already done the only sqr
+ addiu a0,a0,4 # ++a
+.P.2:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ lwu a3,0(a0)
+ addiu a0,a0,4 # ++a
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ dmultu a3,a3
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ addiu a1,a1,-1 # --a_len
+ dsrl32 a4,a4,0
+ bne a1,zero,.P.2 # loop if a_len > 0
+ sw a4,-4(a2)
+.P.3:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ beq t0,zero,.P.9 # jump if no carry
+ dsrl32 a4,a4,0
+.P.8:
+ sw a4,-4(a2)
+ /* propagate final carry */
+ lwu a5,0(a2)
+ daddu a6,a5,t0
+ sltu t0,a6,a5
+ bne t0,zero,.P.8 # loop if carry persists
+ addiu a2,a2,4 # sqrs++
+.P.9:
+ jr ra
+ sw a4,-4(a2)
+
+ .end s_mpv_sqr_add_prop
diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c
new file mode 100644
index 0000000000..1e88357af8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sparc.c
@@ -0,0 +1,226 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Multiplication performance enhancements for sparc v8+vis CPUs. */
+
+#include "mpi-priv.h"
+#include <stddef.h>
+#include <sys/systeminfo.h>
+#include <strings.h>
+
+/* In the functions below, */
+/* vector y must be 8-byte aligned, and n must be even */
+/* returns carry out of high order word of result */
+/* maximum n is 256 */
+
+/* vector x += vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
+
+/* vector z = vector x + vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
+ int n, mp_digit a);
+
+/* v8 versions of these functions run on any Sparc v8 CPU. */
+
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+
+/* c = a * b */
+static void
+v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+static void
+v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+static void
+v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+
+/* These functions run only on v8plus+vis or v9+vis CPUs. */
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ s_mp_setz(c, a_len + 1);
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d_add(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ if (d) {
+ c += a_len;
+ do {
+ mp_digit sum = d + *c;
+ *c++ = sum;
+ d = sum < d;
+ } while (d);
+ }
+ } else {
+ v8_mpv_mul_d_add_prop(a, a_len, b, c);
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s
new file mode 100644
index 0000000000..16a47019c3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sse2.s
@@ -0,0 +1,294 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef DARWIN
+#define s_mpv_mul_d _s_mpv_mul_d
+#define s_mpv_mul_d_add _s_mpv_mul_d_add
+#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
+#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
+#define s_mpv_div_2dx1d _s_mpv_div_2dx1d
+#define TYPE_FUNCTION(x)
+#else
+#define TYPE_FUNCTION(x) .type x, @function
+#endif
+
+.text
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.private_extern s_mpv_mul_d
+TYPE_FUNCTION(s_mpv_mul_d)
+s_mpv_mul_d:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.private_extern s_mpv_mul_d_add
+TYPE_FUNCTION(s_mpv_mul_d_add)
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd 0(%edi), %mm0
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.private_extern s_mpv_mul_d_add_prop
+TYPE_FUNCTION(s_mpv_mul_d_add_prop)
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add *c++
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_sqr_add_prop
+.private_extern s_mpv_sqr_add_prop
+TYPE_FUNCTION(s_mpv_sqr_add_prop)
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ mov 16(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm0, %mm0 # mm0 = sqr(a)
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add the low word
+ movd 4(%edi), %mm3
+ movd %mm2, 0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3, %mm2 # add the high word
+ movd %mm2, 4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8, %edi
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+.globl s_mpv_div_2dx1d
+.private_extern s_mpv_div_2dx1d
+TYPE_FUNCTION(s_mpv_div_2dx1d)
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp), %edx
+ mov 12(%esp), %eax
+ mov 16(%esp), %ebx
+ div %ebx
+ mov 20(%esp), %ebx
+ mov %eax, 0(%ebx)
+ mov 24(%esp), %ebx
+ mov %edx, 0(%ebx)
+ xor %eax, %eax # return zero
+ pop %ebx
+ ret
+ nop
+
+#ifndef DARWIN
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s
new file mode 100644
index 0000000000..8f7e2130c3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86.s
@@ -0,0 +1,541 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+.ifndef NO_PIC
+.macro GET var,reg
+ movl \var@GOTOFF(%ebx),\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var@GOTOFF(%ebx)
+.endm
+.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
new file mode 100644
index 0000000000..4faeef30ca
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
@@ -0,0 +1,531 @@
+/*
+ * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_2 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_1:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_1 ; jmp if a_len != 0
+L_2:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_6 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_5:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_5 ; jmp if a_len != 0
+L_6:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_11 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_10:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_10 ; jmp if a_len != 0
+L_11:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_16 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_15:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd mm0, [edi]
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_15 ; jmp if a_len != 0
+L_16:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_21 ; jmp if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = a
+L_20:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_20 ; jmp if a_len != 0
+L_21:
+ cmp ebx,0 ; is carry zero?
+ jz L_23
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_23
+L_22:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_22
+L_23:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_26 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_25:
+ movd mm0, [esi] ; mm0 = *a++
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add *c++
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_25 ; jmp if a_len != 0
+L_26:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_28
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_28
+L_27:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_27
+L_28:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 20: caller's esi
+ * ebp - 16: caller's edi
+ * ebp - 12:
+ * ebp - 8: carry
+ * ebp - 4: a_len local
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: pa argument
+ * ebp + 12: a_len argument
+ * ebp + 16: ps argument
+ * ebp + 20:
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,12
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; a_len
+ mov edi,[ebp+16] ; edi = ps
+ cmp ecx,0
+ je L_31 ; jump if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = pa
+L_30:
+ lodsd ; eax = [ds:si]; si += 4;
+ mul eax
+
+ add eax,ebx ; add "carry"
+ adc edx,0
+ mov ebx,[edi]
+ add eax,ebx ; add low word from result
+ mov ebx,[edi+4]
+ stosd ; [es:di] = eax; di += 4;
+ adc edx,ebx ; add high word from result
+ mov ebx,0
+ mov eax,edx
+ adc ebx,0
+ stosd ; [es:di] = eax; di += 4;
+ dec ecx ; --a_len
+ jnz L_30 ; jmp if a_len != 0
+L_31:
+ cmp ebx,0 ; is carry zero?
+ jz L_34
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_34
+L_32:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_32
+L_34:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ mov edi, [ebp+16]
+ cmp ecx, 0
+ je L_36 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_35:
+ movd mm0, [esi] ; mm0 = *a
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm0 ; mm0 = sqr(a)
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add the low word
+ movd mm3, [edi+4]
+ movd [edi], mm2 ; store the 32bit result
+ psrlq mm2, 32
+ paddq mm2, mm3 ; add the high word
+ movd [edi+4], mm2 ; store the 32bit result
+ psrlq mm2, 32 ; save the carry.
+ add edi, 8
+ dec ecx ; --a_len
+ jnz L_35 ; jmp if a_len != 0
+L_36:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_38
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_38
+L_37:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_37
+L_38:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ * so its high bit is 1. This code is from NSPR.
+ *
+ * Dump of assembler code for function s_mpv_div_2dx1d:
+ *
+ * esp + 0: Caller's ebx
+ * esp + 4: return address
+ * esp + 8: Nhi argument
+ * esp + 12: Nlo argument
+ * esp + 16: divisor argument
+ * esp + 20: qp argument
+ * esp + 24: rp argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) mp_err
+ s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ __asm {
+ push ebx
+ mov edx,[esp+8]
+ mov eax,[esp+12]
+ mov ebx,[esp+16]
+ div ebx
+ mov ebx,[esp+20]
+ mov [ebx],eax
+ mov ebx,[esp+24]
+ mov [ebx],edx
+ xor eax,eax ; return zero
+ pop ebx
+ ret
+ nop
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 0000000000..b903e2564a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET var,reg
+# movl \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT reg,var
+# movl \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d
+.type _s_mpv_mul_d,@function
+_s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_x86
+ jg _s_mpv_mul_d_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add
+.type _s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_x86
+ jg _s_mpv_mul_d_add_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add_prop
+.type _s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_prop_x86
+ jg _s_mpv_mul_d_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl _s_mpv_sqr_add_prop
+.type _s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_sqr_add_prop_x86
+ jg _s_mpv_sqr_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl _s_mpv_div_2dx1d
+.type _s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c
new file mode 100644
index 0000000000..db19cff138
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.c
@@ -0,0 +1,460 @@
+/*
+ * mplogic.c
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/* {{{ Lookup table for population count */
+
+static unsigned char bitc[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_not(a, b) - compute b = ~a
+ mpl_and(a, b, c) - compute c = a & b
+ mpl_or(a, b, c) - compute c = a | b
+ mpl_xor(a, b, c) - compute c = a ^ b
+ */
+
+/* {{{ mpl_not(a, b) */
+
+mp_err
+mpl_not(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ /* This relies on the fact that the digit type is unsigned */
+ for (ix = 0; ix < USED(b); ix++)
+ DIGIT(b, ix) = ~DIGIT(b, ix);
+
+ s_mp_clamp(b);
+
+ return MP_OKAY;
+
+} /* end mpl_not() */
+
+/* }}} */
+
+/* {{{ mpl_and(a, b, c) */
+
+mp_err
+mpl_and(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) <= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) &= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_and() */
+
+/* }}} */
+
+/* {{{ mpl_or(a, b, c) */
+
+mp_err
+mpl_or(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) |= DIGIT(other, ix);
+
+ return MP_OKAY;
+
+} /* end mpl_or() */
+
+/* }}} */
+
+/* {{{ mpl_xor(a, b, c) */
+
+mp_err
+mpl_xor(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) ^= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_xor() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_rsh(a, b, d) - b = a >> d
+ mpl_lsh(a, b, d) - b = a << d
+ */
+
+/* {{{ mpl_rsh(a, b, d) */
+
+mp_err
+mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2d(b, d);
+
+ return MP_OKAY;
+
+} /* end mpl_rsh() */
+
+/* }}} */
+
+/* {{{ mpl_lsh(a, b, d) */
+
+mp_err
+mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2d(b, d);
+
+} /* end mpl_lsh() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_num_set(a, num)
+
+ Count the number of set bits in the binary representation of a.
+ Returns MP_OKAY and sets 'num' to be the number of such bits, if
+ possible. If num is NULL, the result is thrown away, but it is
+ not considered an error.
+
+ mpl_num_clear() does basically the same thing for clear bits.
+ */
+
+/* {{{ mpl_num_set(a, num) */
+
+mp_err
+mpl_num_set(mp_int *a, unsigned int *num)
+{
+ unsigned int ix, db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_set() */
+
+/* }}} */
+
+/* {{{ mpl_num_clear(a, num) */
+
+mp_err
+mpl_num_clear(mp_int *a, unsigned int *num)
+{
+ unsigned int ix, db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[UCHAR_MAX - reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_clear() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_parity(a)
+
+ Determines the bitwise parity of the value given. Returns MP_EVEN
+ if an even number of digits are set, MP_ODD if an odd number are
+ set.
+ */
+
+/* {{{ mpl_parity(a) */
+
+mp_err
+mpl_parity(mp_int *a)
+{
+ unsigned int ix;
+ int par = 0;
+ mp_digit cur;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
+
+ cur = DIGIT(a, ix);
+
+ /* Compute parity for current digit */
+ while (shft != 0) {
+ cur ^= (cur >> shft);
+ shft >>= 1;
+ }
+ cur &= 1;
+
+ /* XOR with running parity so far */
+ par ^= cur;
+ }
+
+ if (par)
+ return MP_ODD;
+ else
+ return MP_EVEN;
+
+} /* end mpl_parity() */
+
+/* }}} */
+
+/*
+ mpl_set_bit
+
+ Returns MP_OKAY or some error code.
+ Grows a if needed to set a bit to 1.
+ */
+mp_err
+mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
+{
+ mp_size ix;
+ mp_err rv;
+ mp_digit mask;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ if (ix + 1 > MP_USED(a)) {
+ rv = s_mp_pad(a, ix + 1);
+ if (rv != MP_OKAY)
+ return rv;
+ }
+
+ bitNum = bitNum % MP_DIGIT_BIT;
+ mask = (mp_digit)1 << bitNum;
+ if (value)
+ MP_DIGIT(a, ix) |= mask;
+ else
+ MP_DIGIT(a, ix) &= ~mask;
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+/*
+ mpl_get_bit
+
+ returns 0 or 1 or some (negative) error code.
+ */
+mp_err
+mpl_get_bit(const mp_int *a, mp_size bitNum)
+{
+ mp_size bit, ix;
+ mp_err rv;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
+
+ bit = bitNum % MP_DIGIT_BIT;
+ rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
+ return rv;
+}
+
+/*
+ mpl_get_bits
+ - Extracts numBits bits from a, where the least significant extracted bit
+ is bit lsbNum. Returns a negative value if error occurs.
+ - Because sign bit is used to indicate error, maximum number of bits to
+ be returned is the lesser of (a) the number of bits in an mp_digit, or
+ (b) one less than the number of bits in an mp_err.
+ - lsbNum + numbits can be greater than the number of significant bits in
+ integer a, as long as bit lsbNum is in the high order digit of a.
+ */
+mp_err
+mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
+{
+ mp_size rshift = (lsbNum % MP_DIGIT_BIT);
+ mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
+ mp_digit *digit = MP_DIGITS(a) + lsWndx;
+ mp_digit mask = ((1 << numBits) - 1);
+
+ ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
+ ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
+
+ if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
+ (lsWndx + 1 >= MP_USED(a))) {
+ mask &= (digit[0] >> rshift);
+ } else {
+ mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
+ }
+ return (mp_err)mask;
+}
+
+#define LZCNTLOOP(i) \
+ do { \
+ x = d >> (i); \
+ mask = (0 - x); \
+ mask = (0 - (mask >> (MP_DIGIT_BIT - 1))); \
+ bits += (i)&mask; \
+ d ^= (x ^ d) & mask; \
+ } while (0)
+
+/*
+ mpl_significant_bits
+ returns number of significant bits in abs(a).
+ In other words: floor(lg(abs(a))) + 1.
+ returns 1 if value is zero.
+ */
+mp_size
+mpl_significant_bits(const mp_int *a)
+{
+ /*
+ start bits at 1.
+ lg(0) = 0 => bits = 1 by function semantics.
+ below does a binary search for the _position_ of the top bit set,
+ which is floor(lg(abs(a))) for a != 0.
+ */
+ mp_size bits = 1;
+ int ix;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = MP_USED(a); ix > 0;) {
+ mp_digit d, x, mask;
+ if ((d = MP_DIGIT(a, --ix)) == 0)
+ continue;
+#if !defined(MP_USE_UINT_DIGIT)
+ LZCNTLOOP(32);
+#endif
+ LZCNTLOOP(16);
+ LZCNTLOOP(8);
+ LZCNTLOOP(4);
+ LZCNTLOOP(2);
+ LZCNTLOOP(1);
+ break;
+ }
+ bits += ix * MP_DIGIT_BIT;
+ return bits;
+}
+
+#undef LZCNTLOOP
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h
new file mode 100644
index 0000000000..71b7551392
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.h
@@ -0,0 +1,55 @@
+/*
+ * mplogic.h
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPLOGIC_
+#define _H_MPLOGIC_
+
+#include "mpi.h"
+SEC_BEGIN_PROTOS
+
+/*
+ The logical operations treat an mp_int as if it were a bit vector,
+ without regard to its sign (an mp_int is represented in a signed
+ magnitude format). Values are treated as if they had an infinite
+ string of zeros left of the most-significant bit.
+ */
+
+/* Parity results */
+
+#define MP_EVEN MP_YES
+#define MP_ODD MP_NO
+
+/* Bitwise functions */
+
+mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */
+mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */
+mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */
+mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */
+
+/* Shift functions */
+
+mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */
+mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */
+
+/* Bit count and parity */
+
+mp_err mpl_num_set(mp_int *a, unsigned int *num); /* count set bits */
+mp_err mpl_num_clear(mp_int *a, unsigned int *num); /* count clear bits */
+mp_err mpl_parity(mp_int *a); /* determine parity */
+
+/* Get & Set the value of a bit */
+
+mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
+mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
+mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
+mp_size mpl_significant_bits(const mp_int *a);
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPLOGIC_ */
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
new file mode 100644
index 0000000000..63842c6314
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -0,0 +1,1160 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file implements moduluar exponentiation using Montgomery's
+ * method for modular reduction. This file implements the method
+ * described as "Improvement 2" in the paper "A Cryptogrpahic Library for
+ * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
+ * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
+ * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
+ * published by Springer Verlag.
+ */
+
+#define MP_USING_CACHE_SAFE_MOD_EXP 1
+#include <string.h>
+#include "mpi-priv.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#ifdef MP_USING_MONT_MULF
+#include "montmulf.h"
+#endif
+#include <stddef.h> /* ptrdiff_t */
+#include <assert.h>
+
+#define STATIC
+
+#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
+
+/*! computes T = REDC(T), 2^b == R
+ \param T < RN
+*/
+mp_err
+s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
+{
+ mp_err res;
+ mp_size i;
+
+ i = (MP_USED(&mmm->N) << 1) + 1;
+ MP_CHECKOK(s_mp_pad(T, i));
+ for (i = 0; i < MP_USED(&mmm->N); ++i) {
+ mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
+ /* T += N * m_i * (MP_RADIX ** i); */
+ s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);
+ }
+ s_mp_clamp(T);
+
+ /* T /= R */
+ s_mp_rshd(T, MP_USED(&mmm->N));
+
+ if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
+ /* T = T - N */
+ MP_CHECKOK(s_mp_sub(T, &mmm->N));
+#ifdef DEBUG
+ if ((res = mp_cmp(T, &mmm->N)) >= 0) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+#endif
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+}
+
+#if !defined(MP_MONT_USE_MP_MUL)
+
+/*! c <- REDC( a * b ) mod N
+ \param a < N i.e. "reduced"
+ \param b < N i.e. "reduced"
+ \param mmm modulus N and n0' of N
+*/
+mp_err
+s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm)
+{
+ mp_digit *pb;
+ mp_digit m_i;
+ mp_err res;
+ mp_size ib; /* "index b": index of current digit of B */
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ ib = (MP_USED(&mmm->N) << 1) + 1;
+ if ((res = s_mp_pad(c, ib)) != MP_OKAY)
+ goto CLEANUP;
+
+ useda = MP_USED(a);
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
+ s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
+ m_i = MP_DIGIT(c, 0) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
+
+ /* Outer loop: Digits of b */
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ if (usedb < MP_USED(&mmm->N)) {
+ for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) {
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ }
+ s_mp_clamp(c);
+ s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */
+ if (s_mp_cmp(c, &mmm->N) >= 0) {
+ MP_CHECKOK(s_mp_sub(c, &mmm->N));
+ }
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+#endif
+
+mp_err
+mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont)
+{
+ mp_err res;
+
+ /* xMont = x * R mod N where N is modulus */
+ if (x != xMont) {
+ MP_CHECKOK(mp_copy(x, xMont));
+ }
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */
+ MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */
+CLEANUP:
+ return res;
+}
+
+mp_digit
+mp_calculate_mont_n0i(const mp_int *N)
+{
+ return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0));
+}
+
+#ifdef MP_USING_MONT_MULF
+
+/* the floating point multiply is already cache safe,
+ * don't turn on cache safe unless we specifically
+ * force it */
+#ifndef MP_FORCE_CACHE_SAFE
+#undef MP_USING_CACHE_SAFE_MOD_EXP
+#endif
+
+unsigned int mp_using_mont_mulf = 1;
+
+/* computes montgomery square of the integer in mResult */
+#define SQR \
+ conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, d16Tmp, \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* computes montgomery product of x and the integer in mResult */
+#define MUL(x) \
+ conv_i32_to_d32(dm1, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, oddPowers[x], \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* Do modular exponentiation using floating point multiply code. */
+mp_err
+mp_exptmod_f(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_digit *mResult;
+ double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
+ double dn0;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ int dSize = 0, oddPowSize, dTmpSize;
+ mp_int accum1;
+ double *oddPowers[MAX_ODD_INTS];
+
+ /* function for computing n0prime only works if n0 is odd */
+
+ MP_DIGITS(&accum1) = 0;
+
+ for (i = 0; i < MAX_ODD_INTS; ++i)
+ oddPowers[i] = 0;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ MP_CHECKOK(s_mp_pad(&accum1, nLen));
+
+ oddPowSize = 2 * nLen + 1;
+ dTmpSize = 2 * oddPowSize;
+ dSize = sizeof(double) * (nLen * 4 + 1 +
+ ((odd_ints + 1) * oddPowSize) + dTmpSize);
+ dBuf = malloc(dSize);
+ if (!dBuf) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ dm1 = dBuf; /* array of d32 */
+ dn = dBuf + nLen; /* array of d32 */
+ dSqr = dn + nLen; /* array of d32 */
+ d16Tmp = dSqr + nLen; /* array of d16 */
+ dTmp = d16Tmp + oddPowSize;
+
+ for (i = 0; i < odd_ints; ++i) {
+ oddPowers[i] = dTmp;
+ dTmp += oddPowSize;
+ }
+ mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
+
+ /* Make dn and dn0 */
+ conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
+ dn0 = (double)(mmm->n0prime & 0xffff);
+
+ /* Make dSqr */
+ conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
+ mont_mulf_noconv(mResult, dm1, oddPowers[0],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d32(dSqr, mResult, nLen);
+
+ for (i = 1; i < odd_ints; ++i) {
+ mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d16(oddPowers[i], mResult, nLen);
+ }
+
+ s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ MUL(0);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x20) {
+ SQR;
+ MUL(smallExp / 64);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
+
+ res = s_mp_redc(&accum1, mmm);
+ mp_exch(&accum1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ if (dBuf) {
+ if (dSize)
+ memset(dBuf, 0, dSize);
+ free(dBuf);
+ }
+
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL(x, a, b) \
+ MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm))
+#endif
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
+
+ /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
+ /* oddPowers[i] = base ** (2*i + 1); */
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&power2) = 0;
+ for (i = 0; i < MAX_ODD_INTS; ++i) {
+ MP_DIGITS(oddPowers + i) = 0;
+ }
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase));
+
+ MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2));
+ MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */
+ MP_CHECKOK(s_mp_redc(&power2, mmm));
+
+ for (i = 1; i < odd_ints; ++i) {
+ MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
+ MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
+ MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
+ }
+
+ /* set accumulator to montgomery residue of 1 */
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL(0, pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 2, pa2, pa1);
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 4, pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 8, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 16, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 32, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 32, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x20) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 64, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&power2);
+ for (i = 0; i < odd_ints; ++i) {
+ mp_clear(oddPowers + i);
+ }
+ return res;
+}
+#undef SQR
+#undef MUL
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+unsigned int mp_using_cache_safe_exp = 1;
+#endif
+
+mp_err
+mp_set_safe_modexp(int value)
+{
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ mp_using_cache_safe_exp = value;
+ return MP_OKAY;
+#else
+ if (value == 0) {
+ return MP_OKAY;
+ }
+ return MP_BADARG;
+#endif
+}
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+#define WEAVE_WORD_SIZE 4
+
+/*
+ * mpi_to_weave takes an array of bignums, a matrix in which each bignum
+ * occupies all the columns of a row, and transposes it into a matrix in
+ * which each bignum occupies a column of every row. The first row of the
+ * input matrix becomes the first column of the output matrix. The n'th
+ * row of input becomes the n'th column of output. The input data is said
+ * to be "interleaved" or "woven" into the output matrix.
+ *
+ * The array of bignums is left in this woven form. Each time a single
+ * bignum value is needed, it is recreated by fetching the n'th column,
+ * forming a single row which is the new bignum.
+ *
+ * The purpose of this interleaving is make it impossible to determine which
+ * of the bignums is being used in any one operation by examining the pattern
+ * of cache misses.
+ *
+ * The weaving function does not transpose the entire input matrix in one call.
+ * It transposes 4 rows of mp_ints into their respective columns of output.
+ *
+ * This implementation treats each mp_int bignum as an array of mp_digits,
+ * It stores those bytes as a column of mp_digits in the output matrix. It
+ * doesn't care if the machine uses big-endian or little-endian byte ordering
+ * within mp_digits.
+ *
+ * "bignums" is an array of mp_ints.
+ * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
+ *
+ * "weaved" is the weaved output matrix.
+ * The first byte of bignums[0] is stored in weaved[0].
+ *
+ * "nBignums" is the total number of bignums in the array of which "bignums"
+ * is a part.
+ *
+ * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.
+ * mp_ints that use less than nDigits digits are logically padded with zeros
+ * while being stored in the weaved array.
+ */
+mp_err
+mpi_to_weave(const mp_int *bignums,
+ mp_digit *weaved,
+ mp_size nDigits, /* in each mp_int of input */
+ mp_size nBignums) /* in the entire source array */
+{
+ mp_size i;
+ mp_digit *endDest = weaved + (nDigits * nBignums);
+
+ for (i = 0; i < WEAVE_WORD_SIZE; i++) {
+ mp_size used = MP_USED(&bignums[i]);
+ mp_digit *pSrc = MP_DIGITS(&bignums[i]);
+ mp_digit *endSrc = pSrc + used;
+ mp_digit *pDest = weaved + i;
+
+ ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
+ ARGCHK(used <= nDigits, MP_BADARG);
+
+ for (; pSrc < endSrc; pSrc++) {
+ *pDest = *pSrc;
+ pDest += nBignums;
+ }
+ while (pDest < endDest) {
+ *pDest = 0;
+ pDest += nBignums;
+ }
+ }
+
+ return MP_OKAY;
+}
+
+/*
+ * These functions return 0xffffffff if the output is true, and 0 otherwise.
+ */
+#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1)))
+#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1))
+#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b))
+
+/* Reverse the operation above for one mp_int.
+ * Reconstruct one mp_int from its column in the weaved array.
+ * Every read accesses every element of the weaved array, in order to
+ * avoid timing attacks based on patterns of memory accesses.
+ */
+mp_err
+weave_to_mpi(mp_int *a, /* out, result */
+ const mp_digit *weaved, /* in, byte matrix */
+ mp_size index, /* which column to read */
+ mp_size nDigits, /* number of mp_digits in each bignum */
+ mp_size nBignums) /* width of the matrix */
+{
+ /* these are indices, but need to be the same size as mp_digit
+ * because of the CONST_TIME operations */
+ mp_digit i, j;
+ mp_digit d;
+ mp_digit *pDest = MP_DIGITS(a);
+
+ MP_SIGN(a) = MP_ZPOS;
+ MP_USED(a) = nDigits;
+
+ assert(weaved != NULL);
+
+ /* Fetch the proper column in constant time, indexing over the whole array */
+ for (i = 0; i < nDigits; ++i) {
+ d = 0;
+ for (j = 0; j < nBignums; ++j) {
+ d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index);
+ }
+ pDest[i] = d;
+ }
+
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(mp_mul(a, x, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm))
+#endif
+
+#define MUL(x, a, b) \
+ MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \
+ MUL_NOWEAVE(&tmp, a, b)
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y)))
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_safe_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size num_powers)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_size first_window;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
+ mp_int tmp;
+ mp_digit *powersArray = NULL;
+ mp_digit *powers = NULL;
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&accum[0]) = 0;
+ MP_DIGITS(&accum[1]) = 0;
+ MP_DIGITS(&accum[2]) = 0;
+ MP_DIGITS(&accum[3]) = 0;
+ MP_DIGITS(&tmp) = 0;
+
+ /* grab the first window value. This allows us to preload accumulator1
+ * and save a conversion, some squares and a multiple*/
+ MP_CHECKOK(mpl_get_bits(exponent,
+ bits_in_exponent - window_bits, window_bits));
+ first_window = (mp_size)res;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ /* build the first WEAVE_WORD powers inline */
+ /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
+ if (num_powers > 2) {
+ MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
+ mp_set(&accum[0], 1);
+ MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0]));
+ MP_CHECKOK(mp_copy(montBase, &accum[1]));
+ SQR(montBase, &accum[2]);
+ MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
+ powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
+ if (!powersArray) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ /* powers[i] = base ** (i); */
+ powers = (mp_digit *)MP_ALIGN(powersArray, num_powers);
+ MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
+ if (first_window < 4) {
+ MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
+ first_window = num_powers;
+ }
+ } else {
+ if (first_window == 0) {
+ mp_set(&accum1, 1);
+ MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1));
+ } else {
+ /* assert first_window == 1? */
+ MP_CHECKOK(mp_copy(montBase, &accum1));
+ }
+ }
+
+ /*
+ * calculate all the powers in the powers array.
+ * this adds 2**(k-1)-2 square operations over just calculating the
+ * odd powers where k is the window size in the two other mp_modexpt
+ * implementations in this file. We will get some of that
+ * back by not needing the first 'k' squares and one multiply for the
+ * first window.
+ * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if
+ * num_powers > 2, in which case powers will have been allocated.
+ */
+ for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
+ int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */
+ if (i & 1) {
+ MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]);
+ /* we've filled the array do our 'per array' processing */
+ if (acc_index == (WEAVE_WORD_SIZE - 1)) {
+ MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1),
+ nLen, num_powers));
+
+ if (first_window <= i) {
+ MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)],
+ &accum1));
+ first_window = num_powers;
+ }
+ }
+ } else {
+ /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
+ * and target are the same so we need to copy.. After that, the
+ * value is overwritten, so we need to fetch it from the stored
+ * weave array */
+ if (i > 2 * WEAVE_WORD_SIZE) {
+ MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1);
+ if (half_power_index == acc_index) {
+ /* copy is cheaper than weave_to_mpi */
+ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ SQR(&accum[half_power_index], &accum[acc_index]);
+ }
+ }
+ }
+ }
+/* if the accum1 isn't set, Then there is something wrong with our logic
+ * above and is an internal programming error.
+ */
+#if MP_ARGCHK == 2
+ assert(MP_USED(&accum1) != 0);
+#endif
+
+ /* set accumulator to montgomery residue of 1 */
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ /* tmp is not used if window_bits == 1. */
+ if (window_bits != 1) {
+ MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2));
+ }
+
+ for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ /* handle unroll the loops */
+ switch (window_bits) {
+ case 1:
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL_NOWEAVE(montBase, pa2, pa1);
+ } else {
+ abort();
+ }
+ break;
+ case 6:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ /* fall through */
+ case 4:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp, pa1, pa2);
+ SWAPPA;
+ break;
+ case 5:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp, pa2, pa1);
+ break;
+ default:
+ abort(); /* could do a loop? */
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&accum[0]);
+ mp_clear(&accum[1]);
+ mp_clear(&accum[2]);
+ mp_clear(&accum[3]);
+ mp_clear(&tmp);
+ /* zero required by FIPS here, can't use PORT_ZFree
+ * because mpi doesn't link with util */
+ if (powers) {
+ PORT_Memset(powers, 0, num_powers * sizeof(mp_digit));
+ }
+ free(powersArray);
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+mp_err
+mp_exptmod(const mp_int *inBase, const mp_int *exponent,
+ const mp_int *modulus, mp_int *result)
+{
+ const mp_int *base;
+ mp_size bits_in_exponent, i, window_bits, odd_ints;
+ mp_err res;
+ int nLen;
+ mp_int montBase, goodBase;
+ mp_mont_modulus mmm;
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ static unsigned int max_window_bits;
+#endif
+
+ /* function for computing n0prime only works if n0 is odd */
+ if (!mp_isodd(modulus))
+ return s_mp_exptmod(inBase, exponent, modulus, result);
+
+ if (mp_cmp_z(inBase) == MP_LT)
+ return MP_RANGE;
+ MP_DIGITS(&montBase) = 0;
+ MP_DIGITS(&goodBase) = 0;
+
+ if (mp_cmp(inBase, modulus) < 0) {
+ base = inBase;
+ } else {
+ MP_CHECKOK(mp_init(&goodBase));
+ base = &goodBase;
+ MP_CHECKOK(mp_mod(inBase, modulus, &goodBase));
+ }
+
+ nLen = MP_USED(modulus);
+ MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
+
+ mmm.N = *modulus; /* a copy of the mp_int struct */
+
+ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
+ ** where n0 = least significant mp_digit of N, the modulus.
+ */
+ mmm.n0prime = mp_calculate_mont_n0i(modulus);
+
+ MP_CHECKOK(mp_to_mont(base, modulus, &montBase));
+
+ bits_in_exponent = mpl_significant_bits(exponent);
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ if (bits_in_exponent > 780)
+ window_bits = 6;
+ else if (bits_in_exponent > 256)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+ } else
+#endif
+ if (bits_in_exponent > 480)
+ window_bits = 6;
+ else if (bits_in_exponent > 160)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ /*
+ * clamp the window size based on
+ * the cache line size.
+ */
+ if (!max_window_bits) {
+ unsigned long cache_size = s_mpi_getProcessorLineSize();
+ /* processor has no cache, use 'fast' code always */
+ if (cache_size == 0) {
+ mp_using_cache_safe_exp = 0;
+ }
+ if ((cache_size == 0) || (cache_size >= 64)) {
+ max_window_bits = 6;
+ } else if (cache_size >= 32) {
+ max_window_bits = 5;
+ } else if (cache_size >= 16) {
+ max_window_bits = 4;
+ } else
+ max_window_bits = 1; /* should this be an assert? */
+ }
+
+ /* clamp the window size down before we caclulate bits_in_exponent */
+ if (mp_using_cache_safe_exp) {
+ if (window_bits > max_window_bits) {
+ window_bits = max_window_bits;
+ }
+ }
+#endif
+
+ odd_ints = 1 << (window_bits - 1);
+ i = bits_in_exponent % window_bits;
+ if (i != 0) {
+ bits_in_exponent += window_bits - i;
+ }
+
+#ifdef MP_USING_MONT_MULF
+ if (mp_using_mont_mulf) {
+ MP_CHECKOK(s_mp_pad(&montBase, nLen));
+ res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+ } else
+#endif
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, 1 << window_bits);
+ } else
+#endif
+ res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+
+CLEANUP:
+ mp_clear(&montBase);
+ mp_clear(&goodBase);
+ /* Don't mp_clear mmm.N because it is merely a copy of modulus.
+ ** Just zap it.
+ */
+ memset(&mmm, 0, sizeof mmm);
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c
new file mode 100644
index 0000000000..b757150e79
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.c
@@ -0,0 +1,610 @@
+/*
+ * mpprime.c
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
+
+#define RANDOM() rand()
+
+#include "primes.c" /* pull in the prime digit table */
+
+/*
+ Test if any of a given vector of digits divides a. If not, MP_NO
+ is returned; otherwise, MP_YES is returned and 'which' is set to
+ the index of the integer in the vector which divided a.
+ */
+mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
+
+/* {{{ mpp_divis(a, b) */
+
+/*
+ mpp_divis(a, b)
+
+ Returns MP_YES if a is divisible by b, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ mp_int rem;
+
+ if ((res = mp_init(&rem)) != MP_OKAY)
+ return res;
+
+ if ((res = mp_mod(a, b, &rem)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp_z(&rem) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&rem);
+ return res;
+
+} /* end mpp_divis() */
+
+/* }}} */
+
+/* {{{ mpp_divis_d(a, d) */
+
+/*
+ mpp_divis_d(a, d)
+
+ Return MP_YES if a is divisible by d, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_NO;
+
+ if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0)
+ return MP_YES;
+ else
+ return MP_NO;
+
+} /* end mpp_divis_d() */
+
+/* }}} */
+
+/* {{{ mpp_random(a) */
+
+/*
+ mpp_random(a)
+
+ Assigns a random value to a. This value is generated using the
+ standard C library's rand() function, so it should not be used for
+ cryptographic purposes, but it should be fine for primality testing,
+ since all we really care about there is good statistical properties.
+
+ As many digits as a currently has are filled with random digits.
+ */
+
+mp_err
+mpp_random(mp_int *a)
+
+{
+ mp_digit next = 0;
+ unsigned int ix, jx;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ for (jx = 0; jx < sizeof(mp_digit); jx++) {
+ next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
+ }
+ DIGIT(a, ix) = next;
+ }
+
+ return MP_OKAY;
+
+} /* end mpp_random() */
+
+/* }}} */
+
+static mpp_random_fn mpp_random_insecure = &mpp_random;
+
+/* {{{ mpp_random_size(a, prec) */
+
+mp_err
+mpp_random_size(mp_int *a, mp_size prec)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && prec > 0, MP_BADARG);
+
+ if ((res = s_mp_pad(a, prec)) != MP_OKAY)
+ return res;
+
+ return (*mpp_random_insecure)(a);
+
+} /* end mpp_random_size() */
+
+/* }}} */
+
+/* {{{ mpp_divis_vector(a, vec, size, which) */
+
+/*
+ mpp_divis_vector(a, vec, size, which)
+
+ Determines if a is divisible by any of the 'size' digits in vec.
+ Returns MP_YES and sets 'which' to the index of the offending digit,
+ if it is; returns MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
+
+ return s_mpp_divp(a, vec, size, which);
+
+} /* end mpp_divis_vector() */
+
+/* }}} */
+
+/* {{{ mpp_divis_primes(a, np) */
+
+/*
+ mpp_divis_primes(a, np)
+
+ Test whether a is divisible by any of the first 'np' primes. If it
+ is, returns MP_YES and sets *np to the value of the digit that did
+ it. If not, returns MP_NO.
+ */
+mp_err
+mpp_divis_primes(mp_int *a, mp_digit *np)
+{
+ int size, which;
+ mp_err res;
+
+ ARGCHK(a != NULL && np != NULL, MP_BADARG);
+
+ size = (int)*np;
+ if (size > prime_tab_size)
+ size = prime_tab_size;
+
+ res = mpp_divis_vector(a, prime_tab, size, &which);
+ if (res == MP_YES)
+ *np = prime_tab[which];
+
+ return res;
+
+} /* end mpp_divis_primes() */
+
+/* }}} */
+
+/* {{{ mpp_fermat(a, w) */
+
+/*
+ Using w as a witness, try pseudo-primality testing based on Fermat's
+ little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod
+ a). So, we compute z = w^a (mod a) and compare z to w; if they are
+ equal, the test passes and we return MP_YES. Otherwise, we return
+ MP_NO.
+ */
+mp_err
+mpp_fermat(mp_int *a, mp_digit w)
+{
+ mp_int base, test;
+ mp_err res;
+
+ if ((res = mp_init(&base)) != MP_OKAY)
+ return res;
+
+ mp_set(&base, w);
+
+ if ((res = mp_init(&test)) != MP_OKAY)
+ goto TEST;
+
+ /* Compute test = base^a (mod a) */
+ if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp(&base, &test) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&test);
+TEST:
+ mp_clear(&base);
+
+ return res;
+
+} /* end mpp_fermat() */
+
+/* }}} */
+
+/*
+ Perform the fermat test on each of the primes in a list until
+ a) one of them shows a is not prime, or
+ b) the list is exhausted.
+ Returns: MP_YES if it passes tests.
+ MP_NO if fermat test reveals it is composite
+ Some MP error code if some other error occurs.
+ */
+mp_err
+mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
+{
+ mp_err rv = MP_YES;
+
+ while (nPrimes-- > 0 && rv == MP_YES) {
+ rv = mpp_fermat(a, *primes++);
+ }
+ return rv;
+}
+
+/* {{{ mpp_pprime(a, nt) */
+
+/*
+ mpp_pprime(a, nt)
+
+ Performs nt iteration of the Miller-Rabin probabilistic primality
+ test on a. Returns MP_YES if the tests pass, MP_NO if one fails.
+ If MP_NO is returned, the number is definitely composite. If MP_YES
+ is returned, it is probably prime (but that is not guaranteed).
+ */
+
+mp_err
+mpp_pprime(mp_int *a, int nt)
+{
+ return mpp_pprime_ext_random(a, nt, mpp_random_insecure);
+}
+
+mp_err
+mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random)
+{
+ mp_err res;
+ mp_int x, amo, m, z; /* "amo" = "a minus one" */
+ int iter;
+ unsigned int jx;
+ mp_size b;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&amo) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&z) = 0;
+
+ /* Initialize temporaries... */
+ MP_CHECKOK(mp_init(&amo));
+ /* Compute amo = a - 1 for what follows... */
+ MP_CHECKOK(mp_sub_d(a, 1, &amo));
+
+ b = mp_trailing_zeros(&amo);
+ if (!b) { /* a was even ? */
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ MP_CHECKOK(mp_init_size(&x, MP_USED(a)));
+ MP_CHECKOK(mp_init(&z));
+ MP_CHECKOK(mp_init(&m));
+ MP_CHECKOK(mp_div_2d(&amo, b, &m, 0));
+
+ /* Do the test nt times... */
+ for (iter = 0; iter < nt; iter++) {
+
+ /* Choose a random value for 1 < x < a */
+ MP_CHECKOK(s_mp_pad(&x, USED(a)));
+ MP_CHECKOK((*random)(&x));
+ MP_CHECKOK(mp_mod(&x, a, &x));
+ if (mp_cmp_d(&x, 1) <= 0) {
+ iter--; /* don't count this iteration */
+ continue; /* choose a new x */
+ }
+
+ /* Compute z = (x ** m) mod a */
+ MP_CHECKOK(mp_exptmod(&x, &m, a, &z));
+
+ if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ continue;
+ }
+
+ res = MP_NO; /* just in case the following for loop never executes. */
+ for (jx = 1; jx < b; jx++) {
+ /* z = z^2 (mod a) */
+ MP_CHECKOK(mp_sqrmod(&z, a, &z));
+ res = MP_NO; /* previous line set res to MP_YES */
+
+ if (mp_cmp_d(&z, 1) == 0) {
+ break;
+ }
+ if (mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ break;
+ }
+ } /* end testing loop */
+
+ /* If the test passes, we will continue iterating, but a failed
+ test means the candidate is definitely NOT prime, so we will
+ immediately break out of this loop
+ */
+ if (res == MP_NO)
+ break;
+
+ } /* end iterations loop */
+
+CLEANUP:
+ mp_clear(&m);
+ mp_clear(&z);
+ mp_clear(&x);
+ mp_clear(&amo);
+ return res;
+
+} /* end mpp_pprime() */
+
+/* }}} */
+
+/* Produce table of composites from list of primes and trial value.
+** trial must be odd. List of primes must not include 2.
+** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
+** prime in list of primes. After this function is finished,
+** if sieve[i] is non-zero, then (trial + 2*i) is composite.
+** Each prime used in the sieve costs one division of trial, and eliminates
+** one or more values from the search space. (3 eliminates 1/3 of the values
+** alone!) Each value left in the search space costs 1 or more modular
+** exponentations. So, these divisions are a bargain!
+*/
+mp_err
+mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve)
+{
+ mp_err res;
+ mp_digit rem;
+ mp_size ix;
+ unsigned long offset;
+
+ memset(sieve, 0, nSieve);
+
+ for (ix = 0; ix < nPrimes; ix++) {
+ mp_digit prime = primes[ix];
+ mp_size i;
+ if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ offset = 0;
+ } else {
+ offset = prime - rem;
+ }
+
+ for (i = offset; i < nSieve * 2; i += prime) {
+ if (i % 2 == 0) {
+ sieve[i / 2] = 1;
+ }
+ }
+ }
+
+ return MP_OKAY;
+}
+
+#define SIEVE_SIZE 32 * 1024
+
+mp_err
+mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong)
+{
+ return mpp_make_prime_ext_random(start, nBits, strong, mpp_random_insecure);
+}
+
+mp_err
+mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random)
+{
+ mp_digit np;
+ mp_err res;
+ unsigned int i = 0;
+ mp_int trial;
+ mp_int q;
+ mp_size num_tests;
+ unsigned char *sieve;
+
+ ARGCHK(start != 0, MP_BADARG);
+ ARGCHK(nBits > 16, MP_RANGE);
+
+ sieve = malloc(SIEVE_SIZE);
+ ARGCHK(sieve != NULL, MP_MEM);
+
+ MP_DIGITS(&trial) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_CHECKOK(mp_init(&trial));
+ MP_CHECKOK(mp_init(&q));
+ /* values originally taken from table 4.4,
+ * HandBook of Applied Cryptography, augmented by FIPS-186
+ * requirements, Table C.2 and C.3 */
+ if (nBits >= 2000) {
+ num_tests = 3;
+ } else if (nBits >= 1536) {
+ num_tests = 4;
+ } else if (nBits >= 1024) {
+ num_tests = 5;
+ } else if (nBits >= 550) {
+ num_tests = 6;
+ } else if (nBits >= 450) {
+ num_tests = 7;
+ } else if (nBits >= 400) {
+ num_tests = 8;
+ } else if (nBits >= 350) {
+ num_tests = 9;
+ } else if (nBits >= 300) {
+ num_tests = 10;
+ } else if (nBits >= 250) {
+ num_tests = 20;
+ } else if (nBits >= 200) {
+ num_tests = 41;
+ } else if (nBits >= 100) {
+ num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the
+ * required more iterations for larger aux primes */
+ } else
+ num_tests = 50;
+
+ if (strong)
+ --nBits;
+ MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1));
+ MP_CHECKOK(mpl_set_bit(start, 0, 1));
+ for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
+ MP_CHECKOK(mpl_set_bit(start, i, 0));
+ }
+ /* start sieveing with prime value of 3. */
+ MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
+ sieve, SIEVE_SIZE));
+
+#ifdef DEBUG_SIEVE
+ res = 0;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (!sieve[i])
+ ++res;
+ }
+ fprintf(stderr, "sieve found %d potential primes.\n", res);
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+ res = MP_NO;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (sieve[i]) /* this number is composite */
+ continue;
+ MP_CHECKOK(mp_add_d(start, 2 * i, &trial));
+ FPUTC('.', stderr);
+ /* run a Fermat test */
+ res = mpp_fermat(&trial, 2);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ FPUTC('+', stderr);
+ /* If that passed, run some Miller-Rabin tests */
+ res = mpp_pprime_ext_random(&trial, num_tests, random);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+ FPUTC('!', stderr);
+
+ if (!strong)
+ break; /* success !! */
+
+ /* At this point, we have strong evidence that our candidate
+ is itself prime. If we want a strong prime, we need now
+ to test q = 2p + 1 for primality...
+ */
+ MP_CHECKOK(mp_mul_2(&trial, &q));
+ MP_CHECKOK(mp_add_d(&q, 1, &q));
+
+ /* Test q for small prime divisors ... */
+ np = prime_tab_size;
+ res = mpp_divis_primes(&q, &np);
+ if (res == MP_YES) { /* is composite */
+ mp_clear(&q);
+ continue;
+ }
+ if (res != MP_NO)
+ goto CLEANUP;
+
+ /* And test with Fermat, as with its parent ... */
+ res = mpp_fermat(&q, 2);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* And test with Miller-Rabin, as with its parent ... */
+ res = mpp_pprime_ext_random(&q, num_tests, random);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* If it passed, we've got a winner */
+ mp_exch(&q, &trial);
+ mp_clear(&q);
+ break;
+
+ } /* end of loop through sieved values */
+ if (res == MP_YES)
+ mp_exch(&trial, start);
+CLEANUP:
+ mp_clear(&trial);
+ mp_clear(&q);
+ if (sieve != NULL) {
+ memset(sieve, 0, SIEVE_SIZE);
+ free(sieve);
+ }
+ return res;
+}
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static functions visible only to the library internally */
+
+/* {{{ s_mpp_divp(a, vec, size, which) */
+
+/*
+ Test for divisibility by members of a vector of digits. Returns
+ MP_NO if a is not divisible by any of them; returns MP_YES and sets
+ 'which' to the index of the offender, if it is. Will stop on the
+ first digit against which a is divisible.
+ */
+
+mp_err
+s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ mp_err res;
+ mp_digit rem;
+
+ int ix;
+
+ for (ix = 0; ix < size; ix++) {
+ if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ if (which)
+ *which = ix;
+ return MP_YES;
+ }
+ }
+
+ return MP_NO;
+
+} /* end s_mpp_divp() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h
new file mode 100644
index 0000000000..0bdc6598ce
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.h
@@ -0,0 +1,48 @@
+/*
+ * mpprime.h
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MP_PRIME_
+#define _H_MP_PRIME_
+
+#include "mpi.h"
+
+SEC_BEGIN_PROTOS
+
+extern const int prime_tab_size; /* number of primes available */
+extern const mp_digit prime_tab[];
+
+/* Tests for divisibility */
+mp_err mpp_divis(mp_int *a, mp_int *b);
+mp_err mpp_divis_d(mp_int *a, mp_digit d);
+
+/* Random selection */
+mp_err mpp_random(mp_int *a);
+mp_err mpp_random_size(mp_int *a, mp_size prec);
+
+/* Type for a pointer to a user-provided mpp_random implementation */
+typedef mp_err (*mpp_random_fn)(mp_int *);
+
+/* Pseudo-primality testing */
+mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
+mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
+mp_err mpp_fermat(mp_int *a, mp_digit w);
+mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
+mp_err mpp_pprime(mp_int *a, int nt);
+mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve);
+mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong);
+
+/* Pseudo-primality tests using a user-provided mpp_random implementation */
+mp_err mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random);
+mp_err mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random);
+
+SEC_END_PROTOS
+
+#endif /* end _H_MP_PRIME_ */
diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c
new file mode 100644
index 0000000000..423311b65b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparc.c
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "vis_proto.h"
+
+/***************************************************************/
+
+typedef int t_s32;
+typedef unsigned int t_u32;
+#if defined(__sparcv9)
+typedef long t_s64;
+typedef unsigned long t_u64;
+#else
+typedef long long t_s64;
+typedef unsigned long long t_u64;
+#endif
+typedef double t_d64;
+
+/***************************************************************/
+
+typedef union {
+ t_d64 d64;
+ struct {
+ t_s32 i0;
+ t_s32 i1;
+ } i32s;
+} d64_2_i32;
+
+/***************************************************************/
+
+#define BUFF_SIZE 256
+
+#define A_BITS 19
+#define A_MASK ((1 << A_BITS) - 1)
+
+/***************************************************************/
+
+static t_u64 mask_cnst[] = {
+ 0x8000000080000000ull
+};
+
+/***************************************************************/
+
+#define DEF_VARS(N) \
+ t_d64 *py = (t_d64 *)y; \
+ t_d64 mask = *((t_d64 *)mask_cnst); \
+ t_d64 ca = (1u << 31) - 1; \
+ t_d64 da = (t_d64)a; \
+ t_s64 buff[N], s; \
+ d64_2_i32 dy
+
+/***************************************************************/
+
+#define MUL_U32_S64_2(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \
+ buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da
+
+#define MUL_U32_S64_2_D(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ d0 = ca - (t_d64)dy.i32s.i0; \
+ d1 = ca - (t_d64)dy.i32s.i1; \
+ buff[4 * (i)] = (t_s64)(d0 * da); \
+ buff[4 * (i) + 1] = (t_s64)(d0 * db); \
+ buff[4 * (i) + 2] = (t_s64)(d1 * da); \
+ buff[4 * (i) + 3] = (t_s64)(d1 * db)
+
+/***************************************************************/
+
+#define ADD_S64_U32(i) \
+ s = buff[i] + x[i] + c; \
+ z[i] = s; \
+ c = (s >> 32)
+
+#define ADD_S64_U32_D(i) \
+ s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \
+ z[i] = s; \
+ uc = ((t_u64)s >> 32)
+
+/***************************************************************/
+
+#define MUL_U32_S64_8(i) \
+ MUL_U32_S64_2(i); \
+ MUL_U32_S64_2(i + 1); \
+ MUL_U32_S64_2(i + 2); \
+ MUL_U32_S64_2(i + 3)
+
+#define MUL_U32_S64_D_8(i) \
+ MUL_U32_S64_2_D(i); \
+ MUL_U32_S64_2_D(i + 1); \
+ MUL_U32_S64_2_D(i + 2); \
+ MUL_U32_S64_2_D(i + 3)
+
+/***************************************************************/
+
+#define ADD_S64_U32_8(i) \
+ ADD_S64_U32(i); \
+ ADD_S64_U32(i + 1); \
+ ADD_S64_U32(i + 2); \
+ ADD_S64_U32(i + 3); \
+ ADD_S64_U32(i + 4); \
+ ADD_S64_U32(i + 5); \
+ ADD_S64_U32(i + 6); \
+ ADD_S64_U32(i + 7)
+
+#define ADD_S64_U32_D_8(i) \
+ ADD_S64_U32_D(i); \
+ ADD_S64_U32_D(i + 1); \
+ ADD_S64_U32_D(i + 2); \
+ ADD_S64_U32_D(i + 3); \
+ ADD_S64_U32_D(i + 4); \
+ ADD_S64_U32_D(i + 5); \
+ ADD_S64_U32_D(i + 6); \
+ ADD_S64_U32_D(i + 7)
+
+/***************************************************************/
+
+t_u32
+mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ if (a < (1 << A_BITS)) {
+
+ if (n == 8) {
+ DEF_VARS(8);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ ADD_S64_U32_8(0);
+
+ return c;
+
+ } else if (n == 16) {
+ DEF_VARS(16);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ MUL_U32_S64_8(4);
+ ADD_S64_U32_8(0);
+ ADD_S64_U32_8(8);
+
+ return c;
+
+ } else {
+ DEF_VARS(BUFF_SIZE);
+ t_s32 i, c = 0;
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32(i);
+ }
+
+ return c;
+ }
+ } else {
+
+ if (n == 8) {
+ DEF_VARS(2 * 8);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ ADD_S64_U32_D_8(0);
+
+ return uc;
+
+ } else if (n == 16) {
+ DEF_VARS(2 * 16);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ MUL_U32_S64_D_8(4);
+ ADD_S64_U32_D_8(0);
+ ADD_S64_U32_D_8(8);
+
+ return uc;
+
+ } else {
+ DEF_VARS(2 * BUFF_SIZE);
+ t_d64 d0, d1, db;
+ t_u32 i, uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2_D(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32_D(i);
+ }
+
+ return uc;
+ }
+ }
+}
+
+/***************************************************************/
+
+t_u32
+mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ return mul_add(x, x, y, n, a);
+}
+
+/***************************************************************/
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
new file mode 100644
index 0000000000..66122a1d9d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
@@ -0,0 +1,1607 @@
+! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs.
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 14 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000106
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */
+/* 000000 19 ( 0 0) */ .word 1127219200,0
+/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 21 ( 0 0) */ .align 16
+/* 0x0010 27 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1
+/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3
+/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1
+/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84]
+/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3
+/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3
+/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2
+/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2
+/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4
+/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0
+/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5
+/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3
+/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0
+/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0
+/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0068 ( 7 8) */ subcc %o2,8,%g0
+/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50
+/* 0x0070 ( 8 12) */ ldd [%o0],%f8
+/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4
+/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6
+/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10
+/* 0x0080 198 (11 14) */ fmovs %f8,%f12
+/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4
+/* 0x0088 200 (12 14) */ ld [%fp+84],%f13
+/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14
+/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6
+/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16
+/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10
+/* 0x009c 208 (15 17) */ ld [%i1],%g2
+/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8
+/* 0x00a4 210 (16 21) */ fitod %f4,%f18
+/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3
+/* 0x00ac 212 (17 22) */ fitod %f5,%f4
+/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4
+/* 0x00b4 214 (18 23) */ fitod %f6,%f20
+/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5
+/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0
+/* 0x00c0 217 (19 24) */ fitod %f7,%f6
+/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1
+/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16
+/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12
+/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2
+/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4
+/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3
+/* 0x00dc 224 (23 28) */ fitod %f10,%f18
+/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20
+/* 0x00e4 226 (25 30) */ fitod %f11,%f10
+/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6
+/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12
+/* 0x00f0 229 (27 32) */ fitod %f16,%f22
+/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4
+/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18
+/* 0x00fc 232 (29 34) */ fitod %f17,%f16
+/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20
+/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10
+/* 0x0108 235 (31 36) */ fdtox %f12,%f12
+/* 0x010c 236 (31 32) */ std %f12,[%sp+152]
+/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6
+/* 0x0114 238 (32 37) */ fdtox %f4,%f4
+/* 0x0118 239 (32 33) */ std %f4,[%sp+144]
+/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4
+/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12
+/* 0x0124 242 (34 39) */ fdtox %f20,%f18
+/* 0x0128 243 (34 35) */ std %f18,[%sp+136]
+/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4
+/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14
+/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10
+/* 0x0138 247 (36 41) */ fdtox %f6,%f6
+/* 0x013c 248 (36 37) */ std %f6,[%sp+128]
+/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5
+/* 0x0144 250 (37 38) */ add %o4,%g2,%o4
+/* 0x0148 251 (38 39) */ st %o4,[%i0]
+/* 0x014c 252 (38 39) */ srax %o4,32,%g2
+/* 0x0150 253 (38 43) */ fdtox %f12,%f6
+/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4
+/* 0x0158 255 (39 40) */ std %f6,[%sp+120]
+/* 0x015c 256 (39 40) */ add %o5,%g3,%g3
+/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7
+/* 0x0164 258 (40 41) */ add %g3,%g2,%g2
+/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6
+/* 0x016c 260 (40 45) */ fdtox %f10,%f8
+/* 0x0170 261 (41 42) */ std %f8,[%sp+112]
+/* 0x0174 262 (41 42) */ srax %g2,32,%o5
+/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3
+/* 0x017c 264 (42 43) */ add %o7,%g4,%g4
+/* 0x0180 265 (43 44) */ st %g2,[%i0+4]
+/* 0x0184 266 (43 44) */ add %g4,%o5,%g4
+/* 0x0188 267 (43 48) */ fdtox %f4,%f4
+/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5
+/* 0x0190 269 (44 45) */ add %g3,%g5,%g3
+/* 0x0194 270 (44 45) */ srax %g4,32,%g5
+/* 0x0198 271 (45 46) */ std %f4,[%sp+104]
+/* 0x019c 272 (45 46) */ add %g3,%g5,%g3
+/* 0x01a0 273 (45 50) */ fdtox %f6,%f4
+/* 0x01a4 274 (46 47) */ std %f4,[%sp+96]
+/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0
+/* 0x01ac 276 (46 47) */ srax %g3,32,%o5
+/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5
+/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0
+/* 0x01b8 279 (48 49) */ st %g4,[%i0+8]
+/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5
+/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1
+/* 0x01c4 282 (49 50) */ srax %o0,32,%g5
+/* 0x01c8 283 (50 51) */ st %o0,[%i0+16]
+/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1
+/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5
+/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2
+/* 0x01d8 287 (51 52) */ srax %o1,32,%o5
+/* 0x01dc 288 (52 53) */ st %o1,[%i0+20]
+/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2
+/* 0x01e4 290 (53 54) */ st %o2,[%i0+24]
+/* 0x01e8 291 (53 54) */ srax %o2,32,%g4
+/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2
+/* 0x01f0 293 (54 55) */ st %g3,[%i0+12]
+/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2
+/* 0x01f8 295 (55 56) */ st %g2,[%i0+28]
+/* 0x01fc 299 (55 56) */ srax %g2,32,%o7
+/* 0x0200 300 (56 57) */ or %g0,%o7,%i0
+/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (59 61) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000037
+!
+
+ .L77000037: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50
+/* 0x0214 ( 1 5) */ ldd [%o0],%f8
+/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4
+/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6
+/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14
+/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12
+/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13
+/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4
+/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6
+/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16
+/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20
+/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8
+/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7
+/* 0x0248 332 ( 9 14) */ fitod %f4,%f18
+/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22
+/* 0x0250 334 (10 15) */ fitod %f5,%f4
+/* 0x0254 335 (11 12) */ stx %o7,[%sp+96]
+/* 0x0258 336 (11 16) */ fitod %f6,%f24
+/* 0x025c 337 (12 14) */ ld [%i1+44],%o7
+/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10
+/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26
+/* 0x0268 340 (13 18) */ fitod %f7,%f6
+/* 0x026c 341 (14 15) */ stx %o7,[%sp+104]
+/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18
+/* 0x0274 343 (15 17) */ ld [%i1+48],%o7
+/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4
+/* 0x027c 345 (16 18) */ ld [%i1+36],%o5
+/* 0x0280 346 (16 21) */ fitod %f10,%f28
+/* 0x0284 347 (17 18) */ stx %o7,[%sp+112]
+/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16
+/* 0x028c 349 (18 20) */ ld [%i1],%g2
+/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24
+/* 0x0294 351 (19 20) */ stx %o5,[%sp+120]
+/* 0x0298 352 (19 24) */ fitod %f11,%f10
+/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18
+/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5
+/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6
+/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4
+/* 0x02ac 357 (21 26) */ fitod %f16,%f30
+/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20
+/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3
+/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2
+/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28
+/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24
+/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128]
+/* 0x02c8 364 (24 29) */ fdtox %f18,%f18
+/* 0x02cc 365 (25 26) */ std %f18,[%sp+272]
+/* 0x02d0 366 (25 30) */ fitod %f17,%f16
+/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6
+/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10
+/* 0x02dc 369 (27 32) */ fitod %f20,%f18
+/* 0x02e0 370 (28 33) */ fdtox %f4,%f4
+/* 0x02e4 371 (28 29) */ std %f4,[%sp+264]
+/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28
+/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4
+/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4
+/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22
+/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5
+/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0
+/* 0x0300 378 (31 36) */ fitod %f21,%f20
+/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10
+/* 0x0308 380 (32 34) */ ld [%i1+20],%o1
+/* 0x030c 381 (32 37) */ fdtox %f24,%f24
+/* 0x0310 382 (33 34) */ std %f24,[%sp+256]
+/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16
+/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7
+/* 0x031c 385 (34 39) */ fdtox %f6,%f6
+/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4
+/* 0x0324 387 (35 36) */ std %f6,[%sp+248]
+/* 0x0328 388 (35 40) */ fitod %f22,%f24
+/* 0x032c 389 (36 38) */ ld [%i1+32],%o4
+/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6
+/* 0x0334 391 (36 37) */ add %o7,%g2,%g2
+/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7
+/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26
+/* 0x0340 394 (37 38) */ srax %g2,32,%o5
+/* 0x0344 395 (38 39) */ st %g2,[%i0]
+/* 0x0348 396 (38 43) */ fitod %f23,%f18
+/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16
+/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2
+/* 0x0354 399 (39 44) */ fdtox %f28,%f22
+/* 0x0358 400 (39 40) */ add %o7,%g3,%g3
+/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7
+/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20
+/* 0x0364 403 (40 41) */ add %g3,%o5,%g3
+/* 0x0368 404 (41 42) */ std %f22,[%sp+240]
+/* 0x036c 405 (41 46) */ fitod %f26,%f22
+/* 0x0370 406 (41 42) */ srax %g3,32,%o5
+/* 0x0374 407 (41 42) */ add %g2,%g5,%g2
+/* 0x0378 408 (42 43) */ st %g3,[%i0+4]
+/* 0x037c 409 (42 47) */ fdtox %f10,%f10
+/* 0x0380 410 (42 43) */ add %o7,%g4,%g4
+/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6
+/* 0x0388 412 (43 44) */ std %f10,[%sp+232]
+/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12
+/* 0x0390 414 (43 44) */ add %g4,%o5,%g4
+/* 0x0394 415 (44 45) */ st %g4,[%i0+8]
+/* 0x0398 416 (44 45) */ srax %g4,32,%o5
+/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10
+/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7
+/* 0x03a4 419 (45 50) */ fdtox %f4,%f4
+/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2
+/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20
+/* 0x03b0 422 (46 47) */ std %f4,[%sp+224]
+/* 0x03b4 423 (46 47) */ srax %g2,32,%g5
+/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4
+/* 0x03bc 425 (47 48) */ st %g2,[%i0+12]
+/* 0x03c0 426 (47 52) */ fitod %f27,%f24
+/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3
+/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5
+/* 0x03cc 429 (48 53) */ fdtox %f16,%f16
+/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2
+/* 0x03d4 431 (49 50) */ std %f16,[%sp+216]
+/* 0x03d8 432 (49 50) */ srax %g2,32,%g4
+/* 0x03dc 433 (49 54) */ fitod %f12,%f18
+/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10
+/* 0x03e4 435 (50 51) */ st %g2,[%i0+16]
+/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16
+/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2
+/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2
+/* 0x03f4 439 (51 56) */ fitod %f13,%f12
+/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2
+/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22
+/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3
+/* 0x0404 443 (52 53) */ srax %g2,32,%g4
+/* 0x0408 444 (52 57) */ fdtox %f6,%f6
+/* 0x040c 445 (53 54) */ std %f6,[%sp+208]
+/* 0x0410 446 (53 58) */ fdtox %f20,%f6
+/* 0x0414 447 (54 55) */ stx %o4,[%sp+136]
+/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4
+/* 0x041c 449 (55 56) */ std %f6,[%sp+200]
+/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6
+/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16
+/* 0x0428 452 (56 57) */ st %g2,[%i0+20]
+/* 0x042c 453 (56 57) */ add %g3,%o2,%g2
+/* 0x0430 454 (56 61) */ fdtox %f10,%f10
+/* 0x0434 455 (57 59) */ ld [%i1+28],%o3
+/* 0x0438 456 (57 58) */ add %g2,%g4,%g2
+/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5
+/* 0x0440 458 (58 59) */ srax %g2,32,%g4
+/* 0x0444 459 (59 60) */ std %f10,[%sp+192]
+/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10
+/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4
+/* 0x0450 462 (60 61) */ st %g2,[%i0+24]
+/* 0x0454 463 (60 61) */ add %g5,%o3,%g2
+/* 0x0458 464 (60 65) */ fdtox %f22,%f12
+/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6
+/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0
+/* 0x0464 467 (61 62) */ add %g2,%g4,%g2
+/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3
+/* 0x046c 469 (62 63) */ srax %g2,32,%g4
+/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1
+/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5
+/* 0x0478 472 (64 65) */ add %g3,%o0,%g3
+/* 0x047c 473 (64 69) */ fdtox %f4,%f4
+/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8
+/* 0x0484 475 (65 66) */ std %f12,[%sp+184]
+/* 0x0488 476 (65 66) */ add %g3,%g4,%g3
+/* 0x048c 477 (65 70) */ fdtox %f16,%f12
+/* 0x0490 478 (66 67) */ std %f12,[%sp+176]
+/* 0x0494 479 (66 67) */ srax %g3,32,%o0
+/* 0x0498 480 (66 67) */ add %g5,%o1,%g5
+/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2
+/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5
+/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4
+/* 0x04a8 484 (68 69) */ srax %g5,32,%o1
+/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4
+/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0
+/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4
+/* 0x04b8 488 (71 72) */ std %f4,[%sp+168]
+/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4
+/* 0x04c0 490 (71 76) */ fdtox %f6,%f4
+/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3
+/* 0x04c8 492 (72 73) */ srax %g4,32,%o2
+/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1
+/* 0x04d0 494 (74 75) */ std %f4,[%sp+160]
+/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0
+/* 0x04d8 496 (74 79) */ fdtox %f8,%f4
+/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5
+/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0
+/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144]
+/* 0x04e8 500 (77 78) */ st %g2,[%i0+28]
+/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2
+/* 0x04f0 502 (77 78) */ srax %o0,32,%o1
+/* 0x04f4 503 (78 79) */ std %f4,[%sp+152]
+/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1
+/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7
+/* 0x0500 506 (79 80) */ srax %o1,32,%o3
+/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2
+/* 0x0508 508 (81 83) */ ld [%i1+60],%o4
+/* 0x050c 509 (82 83) */ add %o7,%o2,%o2
+/* 0x0510 510 (83 84) */ add %o2,%o3,%o2
+/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5
+/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2
+/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3
+/* 0x0520 514 (86 87) */ st %g3,[%i0+32]
+/* 0x0524 515 (86 87) */ add %g2,%o5,%g2
+/* 0x0528 516 (86 87) */ srax %o2,32,%o5
+/* 0x052c 517 (87 88) */ st %g5,[%i0+36]
+/* 0x0530 518 (87 88) */ add %g2,%o5,%g2
+/* 0x0534 519 (87 88) */ add %o3,%o4,%g3
+/* 0x0538 520 (88 89) */ st %o0,[%i0+44]
+/* 0x053c 521 (88 89) */ srax %g2,32,%g5
+/* 0x0540 522 (89 90) */ st %o1,[%i0+48]
+/* 0x0544 523 (89 90) */ add %g3,%g5,%g3
+/* 0x0548 524 (90 91) */ st %o2,[%i0+52]
+/* 0x054c 528 (90 91) */ srax %g3,32,%o7
+/* 0x0550 529 (91 92) */ st %g4,[%i0+40]
+/* 0x0554 530 (92 93) */ st %g2,[%i0+56]
+/* 0x0558 531 (93 94) */ st %g3,[%i0+60]
+/* 0x055c 532 (93 94) */ or %g0,%o7,%i0
+/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0564 (96 98) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000076
+!
+
+ .L77000076: /* frequency 1.0 confidence 0.0 */
+/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6
+/* 0x056c 546 ( 0 1) */ add %o2,1,%g2
+/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14
+/* 0x0574 548 ( 0 1) */ or %g0,0,%o7
+/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9
+/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3
+/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5
+/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18
+/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4
+/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8
+/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1
+/* 0x0598 557 ( 3 4) */ or %g0,0,%g2
+/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0
+/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2
+/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0
+/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16
+/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50
+/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0
+/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0
+/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x05bc ( 7 8) */ sub %o1,2,%o1
+/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2
+/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4
+/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2
+/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0
+/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2
+/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12
+/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6
+/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10
+/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8
+/* 0x05e4 579 (15 20) */ fitod %f3,%f0
+/* 0x05e8 580 (16 21) */ fitod %f2,%f4
+/* 0x05ec 581 (17 22) */ fitod %f7,%f2
+/* 0x05f0 582 (18 23) */ fitod %f6,%f6
+/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0
+/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22
+/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4
+/* 0x0604 588 ( 0 1) */ add %g2,2,%g2
+/* 0x0608 589 ( 0 1) */ add %o4,32,%o4
+/* 0x060c 590 ( 1 6) */ fitod %f9,%f24
+/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20
+/* 0x0614 592 ( 1 2) */ add %o0,8,%o0
+/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0
+/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12
+/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0
+/* 0x0624 596 ( 2 3) */ add %o5,32,%o5
+/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2
+/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4
+/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6
+/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32]
+/* 0x0638 601 ( 7 12) */ fitod %f8,%f4
+/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32]
+/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8
+/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6
+/* 0x0648 605 ( 9 14) */ fitod %f23,%f2
+/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20
+/* 0x0650 607 ( 9 10) */ add %o0,8,%o0
+/* 0x0654 608 (10 14) */ ldd [%o0],%f10
+/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0
+/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4
+/* 0x0660 611 (13 18) */ fdtox %f6,%f6
+/* 0x0664 612 (14 19) */ fdtox %f20,%f20
+/* 0x0668 613 (14 15) */ std %f6,[%o5-16]
+/* 0x066c 614 (15 20) */ fitod %f22,%f6
+/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50
+/* 0x0674 (15 16) */ std %f20,[%o4-16]
+
+!
+! ENTRY .L900000152
+!
+
+ .L900000152: /* frequency 1.0 confidence 0.0 */
+/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12
+/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22
+/* 0x0680 620 ( 0 1) */ add %o5,80,%o5
+/* 0x0684 621 ( 0 1) */ add %o4,80,%o4
+/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0
+/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24
+/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0694 625 ( 1 2) */ add %o0,8,%g5
+/* 0x0698 626 ( 2 7) */ fitod %f8,%f20
+/* 0x069c 627 ( 3 8) */ fitod %f9,%f8
+/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6
+/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26
+/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4
+/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12
+/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2
+/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0
+/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10
+/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20
+/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8
+/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6
+/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12
+/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4
+/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2
+/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0
+/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10
+/* 0x06dc 643 (16 21) */ fdtox %f24,%f24
+/* 0x06e0 644 (16 17) */ std %f24,[%o5-80]
+/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8
+/* 0x06e8 646 (17 22) */ fdtox %f22,%f22
+/* 0x06ec 647 (17 18) */ std %f22,[%o4-80]
+/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6
+/* 0x06f4 649 (18 23) */ fdtox %f20,%f20
+/* 0x06f8 650 (18 19) */ std %f20,[%o5-64]
+/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4
+/* 0x0700 652 (19 24) */ fdtox %f12,%f12
+/* 0x0704 653 (19 20) */ std %f12,[%o4-64]
+/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2
+/* 0x070c 655 (20 25) */ fdtox %f10,%f10
+/* 0x0710 656 (20 21) */ std %f10,[%o5-48]
+/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0
+/* 0x0718 658 (21 26) */ fdtox %f8,%f8
+/* 0x071c 659 (21 22) */ std %f8,[%o4-48]
+/* 0x0720 660 (22 27) */ fdtox %f6,%f6
+/* 0x0724 661 (22 23) */ std %f6,[%o5-32]
+/* 0x0728 662 (23 28) */ fdtox %f4,%f4
+/* 0x072c 663 (23 24) */ std %f4,[%o4-32]
+/* 0x0730 664 (24 29) */ fdtox %f2,%f2
+/* 0x0734 665 (24 25) */ std %f2,[%o5-16]
+/* 0x0738 666 (25 30) */ fdtox %f0,%f0
+/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0740 (25 26) */ std %f0,[%o4-16]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0
+
+!
+! ENTRY .L900000160
+!
+
+ .L900000160: /* frequency 1.0 confidence 0.0 */
+/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0
+/* 0x074c 673 ( 0 1) */ add %g2,1,%g2
+/* 0x0750 674 ( 0 1) */ add %g5,8,%g5
+/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0758 676 ( 4 9) */ fitod %f0,%f2
+/* 0x075c 677 ( 5 10) */ fitod %f1,%f0
+/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2
+/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0
+/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2
+/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0
+/* 0x0770 682 (19 24) */ fdtox %f2,%f2
+/* 0x0774 683 (19 20) */ std %f2,[%o5]
+/* 0x0778 684 (19 20) */ add %o5,16,%o5
+/* 0x077c 685 (20 25) */ fdtox %f0,%f0
+/* 0x0780 686 (20 21) */ std %f0,[%o4]
+/* 0x0784 687 (20 21) */ add %o4,16,%o4
+/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50
+/* 0x078c (23 27) */ ldd [%g5],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0
+
+!
+! ENTRY .L900000161
+!
+
+ .L900000161: /* frequency 1.0 confidence 0.0 */
+/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50
+/* 0x0798 ( 0 1) */ or %g0,%o7,%i0
+/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2
+/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3
+/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5
+/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4
+/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5
+/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2
+/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0
+/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4
+/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0
+/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1
+/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2
+/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4
+/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3
+/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3
+/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5
+/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0
+/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1
+/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4]
+/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000145
+!
+
+ .L900000145: /* frequency 1.0 confidence 0.0 */
+/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7
+/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2
+/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3
+/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0
+/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2
+/* 0x0800 729 ( 1 2) */ add %g4,3,%g4
+/* 0x0804 730 ( 2 3) */ st %o2,[%g2]
+/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3
+/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0
+/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2
+/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96]
+/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1
+/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2
+/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1
+/* 0x0824 738 ( 5 6) */ add %g2,12,%g2
+/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8]
+/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7
+/* 0x0830 741 ( 6 7) */ add %g3,12,%g3
+/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3
+/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1
+/* 0x083c 744 (10 11) */ add %o0,%o1,%o0
+/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1
+/* 0x0844 746 (11 12) */ add %o0,%o7,%o0
+/* 0x0848 747 (11 12) */ add %g5,24,%g5
+/* 0x084c 748 (11 12) */ st %o0,[%g2-4]
+/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50
+/* 0x0854 (12 13) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000148
+!
+
+ .L900000148: /* frequency 1.0 confidence 0.0 */
+/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2
+/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3
+/* 0x0860 754 ( 0 2) */ ld [%g3],%o0
+/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3
+/* 0x0868 756 ( 1 2) */ add %g2,8,%g2
+/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2
+/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8]
+/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0
+/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0
+/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4]
+/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0
+/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0888 ( 4 5) */ srax %o0,32,%o7
+/* 0x088c 765 ( 4 5) */ add %g3,4,%g3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0890 767 ( 0 2) */ ld [%g3],%o2
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0
+/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1
+/* 0x089c 771 ( 0 1) */ add %g4,1,%g4
+/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3
+/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5
+/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0
+/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0
+/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0
+/* 0x08b4 777 ( 3 4) */ st %o0,[%g2]
+/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2
+/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7
+/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50
+/* 0x08c4 ( 4 6) */ ld [%g3],%o2
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0
+/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4
+/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3
+/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240]
+/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2
+/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8
+/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244]
+/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4
+/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6
+/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18
+/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12
+/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16
+/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20
+/* 0x0910 819 ( 7 12) */ fitod %f4,%f14
+/* 0x0914 823 ( 8 10) */ ld [%i1],%g2
+/* 0x0918 824 ( 8 13) */ fitod %f5,%f4
+/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13
+/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6
+/* 0x0924 827 (10 12) */ ld [%sp+244],%f17
+/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10
+/* 0x092c 829 (11 13) */ ld [%i1+28],%o3
+/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20
+/* 0x0934 831 (12 14) */ ld [%i1+4],%g3
+/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12
+/* 0x093c 833 (13 14) */ stx %o3,[%sp+96]
+/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14
+/* 0x0944 835 (14 16) */ ld [%i1+8],%g4
+/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8
+/* 0x094c 837 (15 17) */ ld [%i1+12],%g5
+/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4
+/* 0x0954 839 (16 18) */ ld [%i1+16],%o0
+/* 0x0958 840 (16 21) */ fitod %f6,%f22
+/* 0x095c 841 (17 19) */ ld [%i1+20],%o1
+/* 0x0960 842 (17 22) */ fitod %f7,%f6
+/* 0x0964 843 (18 20) */ ld [%i1+24],%o2
+/* 0x0968 844 (18 23) */ fitod %f10,%f16
+/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24
+/* 0x0970 846 (19 24) */ fitod %f20,%f28
+/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14
+/* 0x0978 848 (20 25) */ fitod %f11,%f10
+/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26
+/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22
+/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4
+/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6
+/* 0x098c 853 (23 28) */ fdtox %f24,%f24
+/* 0x0990 854 (23 24) */ std %f24,[%sp+224]
+/* 0x0994 855 (24 29) */ fdtox %f14,%f14
+/* 0x0998 856 (24 25) */ std %f14,[%sp+232]
+/* 0x099c 857 (25 30) */ fdtox %f26,%f14
+/* 0x09a0 858 (25 26) */ std %f14,[%sp+208]
+/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4
+/* 0x09a8 860 (26 31) */ fitod %f21,%f20
+/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30
+/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5
+/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16
+/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22
+/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4
+/* 0x09c0 866 (28 33) */ fdtox %f4,%f4
+/* 0x09c4 867 (28 29) */ std %f4,[%sp+216]
+/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24
+/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26
+/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4
+/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6
+/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10
+/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2
+/* 0x09e0 874 (30 31) */ st %g2,[%i0]
+/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7
+/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5
+/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18
+/* 0x09f0 878 (32 37) */ fdtox %f30,%f28
+/* 0x09f4 879 (32 33) */ std %f28,[%sp+192]
+/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14
+/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4
+/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7
+/* 0x0a04 883 (33 38) */ fdtox %f22,%f20
+/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16
+/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200]
+/* 0x0a10 886 (34 39) */ fdtox %f24,%f20
+/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22
+/* 0x0a18 888 (35 36) */ std %f20,[%sp+176]
+/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4
+/* 0x0a20 890 (35 40) */ fdtox %f6,%f6
+/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4
+/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3
+/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3
+/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10
+/* 0x0a34 895 (37 38) */ std %f6,[%sp+184]
+/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3
+/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6
+/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20
+/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4
+/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3
+/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5
+/* 0x0a50 902 (38 43) */ fdtox %f16,%f14
+/* 0x0a54 903 (39 40) */ std %f6,[%sp+160]
+/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12
+/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7
+/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3
+/* 0x0a64 907 (40 45) */ fdtox %f4,%f16
+/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18
+/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168]
+/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4
+/* 0x0a74 911 (41 46) */ fdtox %f10,%f4
+/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3
+/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4
+/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4
+/* 0x0a84 915 (42 47) */ fdtox %f22,%f14
+/* 0x0a88 916 (43 44) */ std %f16,[%sp+144]
+/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5
+/* 0x0a90 918 (43 48) */ fdtox %f20,%f6
+/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7
+/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3
+/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16
+/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152]
+/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5
+/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8
+/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3
+/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4
+/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5
+/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128]
+/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5
+/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136]
+/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3
+/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112]
+/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0
+/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7
+/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0
+/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3
+/* 0x0adc 937 (52 53) */ std %f8,[%sp+120]
+/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4
+/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7
+/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104]
+/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3
+/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5
+/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1
+/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0
+/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1
+/* 0x0b00 946 (56 57) */ st %g3,[%i0+4]
+/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3
+/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4
+/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3
+/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0
+/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2
+/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3
+/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20]
+/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2
+/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2
+/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4
+/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3
+/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2
+/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0
+/* 0x0b38 960 (62 63) */ st %o2,[%i0+24]
+/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2
+/* 0x0b40 962 (63 64) */ st %o0,[%i0+16]
+/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2
+/* 0x0b48 964 (64 65) */ st %g4,[%i0+8]
+/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7
+/* 0x0b50 969 (65 66) */ st %g5,[%i0+12]
+/* 0x0b54 970 (66 67) */ st %g2,[%i0+28]
+/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0
+/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0b60 (69 71) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4
+/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6
+/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2
+/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3
+/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8
+/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4
+/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356]
+/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20
+/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6
+/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352]
+/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8
+/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30
+/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22
+/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10
+/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24
+/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12
+/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4
+/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14
+/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26
+/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16
+/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25
+/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28
+/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27
+/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32
+/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22
+/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18
+/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24
+/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20
+/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4
+/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26
+/* 0x0be8 1024 (19 24) */ fitod %f7,%f6
+/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28
+/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32
+/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22
+/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10
+/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44
+/* 0x0c00 1030 (22 27) */ fitod %f9,%f8
+/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4
+/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12
+/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6
+/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46
+/* 0x0c14 1035 (25 30) */ fitod %f10,%f34
+/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22
+/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336]
+/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22
+/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26
+/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328]
+/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4
+/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320]
+/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26
+/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8
+/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4
+/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312]
+/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4
+/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32
+/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344]
+/* 0x0c50 1050 (32 37) */ fitod %f11,%f10
+/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32
+/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34
+/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6
+/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304]
+/* 0x0c64 1058 (35 40) */ fitod %f12,%f36
+/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6
+/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22
+/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296]
+/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22
+/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4
+/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280]
+/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8
+/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10
+/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4
+/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12
+/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36
+/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6
+/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272]
+/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14
+/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6
+/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22
+/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264]
+/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8
+/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256]
+/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22
+/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4
+/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248]
+/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8
+/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4
+/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10
+/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38
+/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6
+/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240]
+/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12
+/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232]
+/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6
+/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8
+/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224]
+/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22
+/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216]
+/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8
+/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14
+/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4
+/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22
+/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16
+/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6
+/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208]
+/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6
+/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200]
+/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10
+/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40
+/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6
+/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4
+/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192]
+/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16
+/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4
+/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18
+/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32
+/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288]
+/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8
+/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6
+/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184]
+/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22
+/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6
+/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4
+/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176]
+/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10
+/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4
+/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168]
+/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4
+/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160]
+/* 0x0d70 1125 (69 74) */ fitod %f18,%f42
+/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4
+/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22
+/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6
+/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8
+/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4
+/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152]
+/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4
+/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6
+/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136]
+/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22
+/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144]
+/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22
+/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128]
+/* 0x0da8 1139 (79 84) */ fitod %f19,%f22
+/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6
+/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4
+/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22
+/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6
+/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120]
+/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4
+/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112]
+/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2
+/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3
+/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4
+/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2
+/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4
+/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5
+/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2
+/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6
+/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3
+/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4
+/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0
+/* 0x0df4 1161 (93 94) */ st %g4,[%i0]
+/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2
+/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4
+/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3
+/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2
+/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4
+/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5
+/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6
+/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1
+/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2
+/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3
+/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2
+/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2
+/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3
+/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4
+/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0
+/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5
+/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4]
+/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2
+/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3
+/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2
+/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4
+/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3
+/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0
+/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1
+/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4
+/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5
+/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8]
+/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3
+/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4
+/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0
+/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2
+/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4
+/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5
+/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1
+/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2
+/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2
+/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0
+/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3
+/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2
+/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5
+/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1
+/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12]
+/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4
+/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3
+/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0
+/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4
+/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3
+/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2
+/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1
+/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5
+/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4
+/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16]
+/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2
+/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4
+/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0
+/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1
+/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2
+/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2
+/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5
+/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2
+/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20]
+/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3
+/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2
+/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0
+/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1
+/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3
+/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2
+/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5
+/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3
+/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24]
+/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4
+/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3
+/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0
+/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1
+/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4
+/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2
+/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5
+/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4
+/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28]
+/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2
+/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4
+/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0
+/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1
+/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2
+/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2
+/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5
+/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2
+/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32]
+/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3
+/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2
+/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0
+/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1
+/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3
+/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2
+/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5
+/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3
+/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36]
+/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4
+/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3
+/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0
+/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1
+/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4
+/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2
+/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5
+/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4
+/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96]
+/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4
+/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40]
+/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2
+/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1
+/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0
+/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2
+/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104]
+/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5
+/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2
+/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44]
+/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2
+/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2
+/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1
+/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0
+/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48]
+/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3
+/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2
+/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3
+/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5
+/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3
+/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0
+/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4
+/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3
+/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52]
+/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1
+/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4
+/* 0x1008 1297 (151 152) */ st %g3,[%i0+56]
+/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2
+/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2
+/* 0x1014 1300 (152 153) */ st %g2,[%i0+60]
+/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0
+
+!
+! ENTRY .L900000159
+!
+
+ .L900000159: /* frequency 1.0 confidence 0.0 */
+/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+/* 0x11a8 1441 ( 0 0) */ .type mul_add,2
+/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x11a8 1445 ( 0 0) */ .align 16
+/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1
+/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4
+/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3
+/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2
+/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3
+/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1
+/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2
+/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1
+/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result =
+/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7
+/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2
+/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x11d8 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x11d8 8 ( 0 0) */ .word -2147483648
+/* 0x11dc 9 ( 0 0) */ .word -2147483648
+/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object
+/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
new file mode 100644
index 0000000000..e2fbe0bd00
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
@@ -0,0 +1,1645 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .register %g2,#scratch
+/* 000000 ( 0 0) */ .register %g3,#scratch
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 15 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000101
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */
+/* 000000 20 ( 0 0) */ .word 1127219200,0
+/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 22 ( 0 0) */ .align 8
+/* 0x0010 28 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1
+/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1
+/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3
+/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3
+/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1
+/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4
+/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2
+/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2
+/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0
+/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0
+/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7
+/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5
+/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0
+/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0060 ( 7 8) */ subcc %i3,8,%g0
+/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50
+/* 0x0068 ( 8 9) */ subcc %i3,16,%g0
+/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4
+/* 0x0070 195 (10 11) */ st %i4,[%sp+2287]
+/* 0x0074 196 (11 14) */ ldd [%o0],%f8
+/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4
+/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10
+/* 0x0080 199 (13 16) */ fitod %f4,%f12
+/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14
+/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7
+/* 0x008c 202 (14 17) */ fitod %f5,%f4
+/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10
+/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16
+/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18
+/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4
+/* 0x00a0 210 (17 20) */ ld [%i1],%g2
+/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16
+/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3
+/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4
+/* 0x00b0 214 (20 23) */ fitod %f16,%f20
+/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0
+/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5
+/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1
+/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2
+/* 0x00c4 219 (24 25) */ fmovs %f8,%f6
+/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3
+/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6
+/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8
+/* 0x00d4 223 (28 31) */ fitod %f10,%f12
+/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4
+/* 0x00dc 225 (29 32) */ fitod %f11,%f10
+/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8
+/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12
+/* 0x00e8 228 (32 35) */ fdtox %f4,%f4
+/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271]
+/* 0x00f0 230 (33 36) */ fdtox %f8,%f8
+/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279]
+/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12
+/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10
+/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4
+/* 0x0104 235 (36 39) */ fitod %f17,%f8
+/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16
+/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4
+/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10
+/* 0x0114 239 (38 41) */ fdtox %f12,%f12
+/* 0x0118 240 (38 39) */ std %f12,[%sp+2263]
+/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4
+/* 0x0120 242 (39 42) */ fitod %f16,%f18
+/* 0x0124 243 (39 40) */ add %o4,%g2,%g2
+/* 0x0128 244 (39 40) */ st %g2,[%i0]
+/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4
+/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8
+/* 0x0134 247 (40 41) */ srax %g2,32,%o5
+/* 0x0138 248 (41 44) */ fdtox %f10,%f10
+/* 0x013c 249 (41 42) */ std %f10,[%sp+2255]
+/* 0x0140 250 (42 45) */ fdtox %f4,%f4
+/* 0x0144 251 (42 43) */ std %f4,[%sp+2247]
+/* 0x0148 252 (42 43) */ add %o4,%g3,%o4
+/* 0x014c 253 (43 46) */ fitod %f17,%f12
+/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2
+/* 0x0154 255 (43 44) */ add %o4,%o5,%g3
+/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8
+/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10
+/* 0x0160 258 (44 45) */ st %g3,[%i0+4]
+/* 0x0164 259 (44 45) */ srax %g3,32,%g3
+/* 0x0168 260 (45 46) */ add %g2,%g4,%g4
+/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2
+/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4
+/* 0x0174 263 (46 47) */ add %g4,%g3,%g3
+/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4
+/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10
+/* 0x0180 266 (47 50) */ fdtox %f8,%f8
+/* 0x0184 267 (47 48) */ std %f8,[%sp+2239]
+/* 0x0188 268 (48 49) */ add %g4,%o0,%g4
+/* 0x018c 269 (48 49) */ add %g2,%g5,%g2
+/* 0x0190 270 (48 49) */ st %g3,[%i0+8]
+/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4
+/* 0x0198 272 (49 50) */ srax %g3,32,%o0
+/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5
+/* 0x01a0 274 (50 53) */ fdtox %f10,%f6
+/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231]
+/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2
+/* 0x01ac 277 (51 52) */ srax %g2,32,%g3
+/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1
+/* 0x01b4 279 (51 52) */ st %g2,[%i0+12]
+/* 0x01b8 280 (52 55) */ fdtox %f4,%f4
+/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223]
+/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3
+/* 0x01c4 283 (53 54) */ srax %g3,32,%g4
+/* 0x01c8 284 (53 54) */ st %g3,[%i0+16]
+/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0
+/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4
+/* 0x01d4 287 (55 56) */ srax %g4,32,%g2
+/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5
+/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2
+/* 0x01e0 290 (56 57) */ st %g4,[%i0+20]
+/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2
+/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5
+/* 0x01ec 293 (57 58) */ st %g2,[%i0+24]
+/* 0x01f0 294 (58 59) */ srax %g2,32,%g3
+/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2
+/* 0x01f8 296 (59 60) */ st %g2,[%i0+28]
+/* 0x01fc 300 (60 61) */ srax %g2,32,%o3
+/* 0x0200 301 (61 62) */ srl %o3,0,%i0
+/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (64 65) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50
+/* 0x0210 ( 0 1) */ st %i4,[%sp+2223]
+/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4
+/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351]
+/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8
+/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10
+/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14
+/* 0x022c 321 ( 5 8) */ fitod %f4,%f12
+/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7
+/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10
+/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16
+/* 0x023c 325 ( 7 10) */ fitod %f5,%f4
+/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18
+/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20
+/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16
+/* 0x024c 335 (10 13) */ ld [%i1],%g2
+/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4
+/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22
+/* 0x0258 338 (11 14) */ fitod %f16,%f28
+/* 0x025c 339 (12 15) */ ld [%i1+4],%g3
+/* 0x0260 340 (13 16) */ ld [%i1+8],%g4
+/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22
+/* 0x0268 342 (14 17) */ ld [%i1+12],%g5
+/* 0x026c 343 (15 18) */ ld [%i1+16],%o0
+/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24
+/* 0x0274 345 (17 20) */ ld [%i1+20],%o1
+/* 0x0278 346 (17 18) */ fmovs %f8,%f6
+/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26
+/* 0x0280 348 (19 22) */ ld [%i1+24],%o2
+/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6
+/* 0x0288 350 (20 23) */ ld [%i1+28],%o3
+/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8
+/* 0x0290 355 (21 24) */ ld [%i1+32],%o4
+/* 0x0294 356 (21 24) */ fitod %f10,%f12
+/* 0x0298 357 (22 25) */ ld [%i1+36],%o7
+/* 0x029c 358 (22 25) */ fitod %f11,%f10
+/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4
+/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1
+/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8
+/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5
+/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12
+/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10
+/* 0x02b8 365 (26 29) */ fdtox %f8,%f8
+/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343]
+/* 0x02c0 367 (27 30) */ fitod %f17,%f8
+/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12
+/* 0x02c8 369 (28 31) */ fdtox %f4,%f4
+/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335]
+/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10
+/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16
+/* 0x02d8 373 (30 33) */ fdtox %f12,%f12
+/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327]
+/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5
+/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8
+/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4
+/* 0x02ec 378 (33 36) */ fitod %f17,%f12
+/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2
+/* 0x02f4 380 (33 34) */ st %g2,[%i0]
+/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5
+/* 0x02fc 382 (34 37) */ fitod %f16,%f18
+/* 0x0300 383 (34 35) */ srax %g2,32,%l0
+/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16
+/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20
+/* 0x030c 386 (36 39) */ fdtox %f10,%f10
+/* 0x0310 387 (36 37) */ std %f10,[%sp+2319]
+/* 0x0314 388 (36 37) */ add %o5,%g3,%g3
+/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4
+/* 0x031c 390 (37 40) */ fitod %f16,%f8
+/* 0x0320 391 (37 38) */ add %g3,%l0,%g3
+/* 0x0324 392 (37 38) */ st %g3,[%i0+4]
+/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5
+/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18
+/* 0x0330 395 (38 39) */ srax %g3,32,%l3
+/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2
+/* 0x0338 397 (39 42) */ fdtox %f4,%f4
+/* 0x033c 398 (40 41) */ std %f4,[%sp+2311]
+/* 0x0340 399 (40 43) */ fdtox %f20,%f20
+/* 0x0344 400 (40 41) */ add %o5,%g4,%g4
+/* 0x0348 401 (41 42) */ std %f20,[%sp+2303]
+/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4
+/* 0x0350 403 (41 42) */ add %g4,%l3,%g4
+/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18
+/* 0x0358 405 (42 43) */ st %g4,[%i0+8]
+/* 0x035c 406 (42 45) */ fitod %f17,%f16
+/* 0x0360 407 (42 43) */ srax %g4,32,%l4
+/* 0x0364 408 (43 46) */ ld [%i1+44],%l0
+/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20
+/* 0x036c 410 (43 44) */ add %l2,%g5,%l2
+/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5
+/* 0x0374 412 (44 47) */ fitod %f22,%f8
+/* 0x0378 413 (44 45) */ add %l2,%l4,%l2
+/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4
+/* 0x0380 415 (45 46) */ st %l2,[%i0+12]
+/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10
+/* 0x0388 417 (46 49) */ ld [%i1+52],%l3
+/* 0x038c 418 (46 49) */ fdtox %f18,%f18
+/* 0x0390 419 (46 47) */ add %g5,%o0,%l4
+/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12
+/* 0x0398 421 (47 48) */ std %f18,[%sp+2295]
+/* 0x039c 422 (47 48) */ srax %l2,32,%o0
+/* 0x03a0 423 (47 50) */ fitod %f23,%f16
+/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5
+/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8
+/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4
+/* 0x03b0 427 (49 50) */ st %l4,[%i0+16]
+/* 0x03b4 428 (49 50) */ srax %l4,32,%o0
+/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18
+/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5
+/* 0x03c0 431 (50 53) */ fdtox %f4,%f4
+/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287]
+/* 0x03c8 433 (51 54) */ fdtox %f12,%f12
+/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4
+/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279]
+/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12
+/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2
+/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8
+/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5
+/* 0x03e4 440 (53 56) */ fitod %f18,%f10
+/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2
+/* 0x03ec 442 (54 55) */ st %g2,[%i0+20]
+/* 0x03f0 443 (54 57) */ fitod %f19,%f16
+/* 0x03f4 444 (54 55) */ srax %g2,32,%o0
+/* 0x03f8 445 (55 58) */ fdtox %f8,%f8
+/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263]
+/* 0x0400 447 (55 56) */ add %g5,%o2,%g3
+/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5
+/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10
+/* 0x040c 450 (56 57) */ add %g3,%o0,%g3
+/* 0x0410 451 (57 58) */ st %g3,[%i0+24]
+/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8
+/* 0x0418 453 (57 58) */ srax %g3,32,%o0
+/* 0x041c 454 (58 61) */ fdtox %f4,%f4
+/* 0x0420 455 (58 59) */ std %f4,[%sp+2271]
+/* 0x0424 456 (58 59) */ add %g5,%o3,%g4
+/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18
+/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4
+/* 0x0430 459 (59 60) */ add %g4,%o0,%g4
+/* 0x0434 460 (60 61) */ st %g4,[%i0+28]
+/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10
+/* 0x043c 462 (60 61) */ srax %g4,32,%o0
+/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5
+/* 0x0444 464 (61 64) */ fitod %f18,%f12
+/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8
+/* 0x044c 466 (62 65) */ fdtox %f4,%f4
+/* 0x0450 467 (62 63) */ std %f4,[%sp+2255]
+/* 0x0454 468 (63 64) */ add %g5,%o4,%l2
+/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5
+/* 0x045c 470 (63 66) */ fdtox %f10,%f16
+/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4
+/* 0x0464 472 (64 65) */ std %f16,[%sp+2247]
+/* 0x0468 473 (64 65) */ add %l2,%o0,%l2
+/* 0x046c 474 (65 68) */ fdtox %f8,%f8
+/* 0x0470 475 (65 66) */ std %f8,[%sp+2239]
+/* 0x0474 476 (65 66) */ add %g5,%o7,%l4
+/* 0x0478 477 (66 69) */ fitod %f19,%f10
+/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5
+/* 0x0480 479 (66 67) */ srax %l2,32,%o0
+/* 0x0484 480 (67 68) */ add %l4,%o0,%l4
+/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4
+/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0
+/* 0x0490 483 (68 69) */ srax %l4,32,%o1
+/* 0x0494 484 (68 69) */ add %g5,%l1,%l1
+/* 0x0498 485 (68 69) */ st %l2,[%i0+32]
+/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8
+/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3
+/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1
+/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5
+/* 0x04ac 490 (70 71) */ srax %o1,32,%o2
+/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0
+/* 0x04b4 492 (71 74) */ fdtox %f4,%f4
+/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231]
+/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2
+/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3
+/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4
+/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5
+/* 0x04cc 498 (73 74) */ srax %o2,32,%o3
+/* 0x04d0 499 (73 74) */ st %l4,[%i0+36]
+/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2
+/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0
+/* 0x04dc 502 (75 76) */ srax %g2,32,%g3
+/* 0x04e0 503 (75 78) */ fdtox %f4,%f4
+/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223]
+/* 0x04e8 505 (76 77) */ st %o1,[%i0+40]
+/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3
+/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5
+/* 0x04f4 508 (77 78) */ st %o2,[%i0+44]
+/* 0x04f8 509 (77 78) */ srax %g3,32,%g4
+/* 0x04fc 510 (78 79) */ st %g2,[%i0+48]
+/* 0x0500 511 (78 79) */ add %g5,%g4,%g4
+/* 0x0504 512 (79 80) */ st %g3,[%i0+52]
+/* 0x0508 513 (79 80) */ srax %g4,32,%g5
+/* 0x050c 514 (80 83) */ ld [%i1+60],%g3
+/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2
+/* 0x0514 516 (82 83) */ st %g4,[%i0+56]
+/* 0x0518 517 (83 84) */ add %g2,%g3,%g2
+/* 0x051c 518 (84 85) */ add %g2,%g5,%g2
+/* 0x0520 519 (84 85) */ st %g2,[%i0+60]
+/* 0x0524 523 (85 86) */ srax %g2,32,%o3
+/* 0x0528 524 (86 87) */ srl %o3,0,%i0
+/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0530 (89 90) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000157
+!
+
+ .L900000157: /* frequency 1.0 confidence 0.0 */
+/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14
+/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8
+/* 0x053c 539 ( 0 1) */ add %i3,1,%g2
+/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7
+/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3
+/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4
+/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0550 547 ( 2 3) */ or %g0,0,%g5
+/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18
+/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6
+/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1
+/* 0x0560 551 ( 3 4) */ or %g0,0,%o0
+/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0
+/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3
+/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16
+/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50
+/* 0x0574 ( 6 7) */ subcc %i3,0,%g0
+/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2
+/* 0x057c 558 ( 7 8) */ or %g0,0,%i0
+/* 0x0580 559 ( 7 8) */ or %g0,1,%g3
+/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0
+/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x058c ( 9 10) */ or %g0,0,%o1
+/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0
+/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3
+/* 0x0598 565 (10 13) */ ldd [%i2],%f2
+/* 0x059c 566 (10 11) */ or %g0,7,%o0
+/* 0x05a0 567 (10 11) */ or %g0,2,%i0
+/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8
+/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4
+/* 0x05ac 570 (11 12) */ or %g0,16,%o2
+/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2
+/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6
+/* 0x05b8 573 (12 13) */ or %g0,48,%o4
+/* 0x05bc 574 (13 16) */ fitod %f8,%f12
+/* 0x05c0 575 (13 14) */ or %g0,24,%o1
+/* 0x05c4 576 (13 14) */ or %g0,3,%g3
+/* 0x05c8 577 (14 17) */ fitod %f2,%f0
+/* 0x05cc 578 (15 18) */ fitod %f3,%f20
+/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2
+/* 0x05d4 580 (16 19) */ fitod %f9,%f10
+/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8
+/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0
+/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22
+/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20
+/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12
+/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10
+/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0
+/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4
+/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22
+/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6
+/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20
+/* 0x0604 592 (23 26) */ fdtox %f0,%f0
+/* 0x0608 593 (23 24) */ std %f0,[%fp-217]
+/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10
+/* 0x0610 595 (24 27) */ fdtox %f22,%f22
+/* 0x0614 596 (24 25) */ std %f22,[%fp-209]
+/* 0x0618 597 (25 28) */ fitod %f5,%f0
+/* 0x061c 598 (26 29) */ fdtox %f10,%f10
+/* 0x0620 599 (27 30) */ fdtox %f20,%f20
+/* 0x0624 600 (27 28) */ std %f20,[%fp-201]
+/* 0x0628 601 (28 31) */ fitod %f4,%f4
+/* 0x062c 602 (28 29) */ std %f10,[%fp-193]
+/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10
+/* 0x0634 604 (30 33) */ fitod %f7,%f2
+/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0
+/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4
+/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8
+
+!
+! ENTRY .L900000144
+!
+
+ .L900000144: /* frequency 1.0 confidence 0.0 */
+/* 0x0644 609 ( 0 3) */ fitod %f11,%f22
+/* 0x0648 610 ( 0 1) */ add %o0,3,%o0
+/* 0x064c 611 ( 0 1) */ add %g3,6,%g3
+/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0
+/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24
+/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0
+/* 0x065c 615 ( 1 2) */ add %i0,6,%i0
+/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x0664 617 ( 2 5) */ fitod %f6,%f4
+/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0
+/* 0x066c 619 ( 3 4) */ add %o4,8,%i1
+/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20
+/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6
+/* 0x0678 622 ( 4 5) */ add %o2,16,%o4
+/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4
+/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4]
+/* 0x0684 625 ( 5 6) */ add %o1,16,%o2
+/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6
+/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4]
+/* 0x0690 628 ( 7 10) */ fitod %f9,%f0
+/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2
+/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24
+/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12
+/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4
+/* 0x06a4 633 (10 13) */ fdtox %f2,%f2
+/* 0x06a8 634 (10 11) */ add %i1,8,%o1
+/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22
+/* 0x06b0 636 (11 14) */ fdtox %f24,%f10
+/* 0x06b4 637 (11 12) */ add %o4,16,%i4
+/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4
+/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4]
+/* 0x06c0 640 (12 13) */ add %o2,16,%i1
+/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10
+/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4]
+/* 0x06cc 643 (14 17) */ fitod %f7,%f2
+/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12
+/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24
+/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0
+/* 0x06dc 647 (16 19) */ fitod %f8,%f4
+/* 0x06e0 648 (17 20) */ fdtox %f12,%f20
+/* 0x06e4 649 (17 18) */ add %o1,8,%o4
+/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12
+/* 0x06ec 651 (18 21) */ fdtox %f24,%f8
+/* 0x06f0 652 (18 19) */ add %i4,16,%o2
+/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4
+/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4]
+/* 0x06fc 655 (19 20) */ add %i1,16,%o1
+/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8
+/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50
+/* 0x0708 (20 21) */ std %f20,[%o1+%g4]
+
+!
+! ENTRY .L900000147
+!
+
+ .L900000147: /* frequency 1.0 confidence 0.0 */
+/* 0x070c 660 ( 0 3) */ fitod %f6,%f6
+/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24
+/* 0x0714 662 ( 0 1) */ add %i4,32,%l4
+/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22
+/* 0x0720 665 ( 1 2) */ add %i1,32,%l3
+/* 0x0724 666 ( 2 5) */ fitod %f10,%f28
+/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2
+/* 0x072c 668 ( 2 3) */ add %i4,48,%l2
+/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4
+/* 0x0734 670 ( 3 4) */ add %i1,48,%l1
+/* 0x0738 671 ( 3 4) */ add %i4,64,%l0
+/* 0x073c 672 ( 4 7) */ fitod %f11,%f26
+/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1
+/* 0x0744 674 ( 4 5) */ add %i1,64,%i5
+/* 0x0748 675 ( 5 8) */ fitod %f8,%f6
+/* 0x074c 676 ( 5 6) */ add %i4,80,%i4
+/* 0x0750 677 ( 5 6) */ add %i1,80,%i1
+/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0
+/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20
+/* 0x075c 680 ( 6 7) */ add %i4,16,%o4
+/* 0x0760 681 ( 7 10) */ fitod %f9,%f4
+/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12
+/* 0x0768 683 ( 7 8) */ add %i1,16,%o3
+/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10
+/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0
+/* 0x0774 686 ( 8 9) */ add %g3,12,%g3
+/* 0x0778 687 ( 9 12) */ fitod %f0,%f2
+/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8
+/* 0x0780 689 (11 14) */ fitod %f1,%f0
+/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10
+/* 0x0788 691 (12 15) */ fdtox %f24,%f24
+/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4]
+/* 0x0790 693 (12 13) */ add %i0,12,%i0
+/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6
+/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8
+/* 0x079c 696 (14 17) */ fdtox %f22,%f22
+/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4]
+/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4
+/* 0x07a8 699 (16 19) */ fdtox %f20,%f20
+/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4]
+/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6
+/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2
+/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0
+/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4
+/* 0x07c0 705 (19 22) */ fdtox %f12,%f12
+/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4]
+/* 0x07c8 707 (20 23) */ fdtox %f10,%f10
+/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4]
+/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2
+/* 0x07d4 710 (21 24) */ fdtox %f8,%f8
+/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4]
+/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0
+/* 0x07e0 713 (22 25) */ fdtox %f6,%f6
+/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4]
+/* 0x07e8 715 (23 26) */ fdtox %f4,%f4
+/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4]
+/* 0x07f0 717 (24 27) */ fdtox %f2,%f2
+/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4]
+/* 0x07f8 719 (25 28) */ fdtox %f0,%f0
+/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0800 (25 26) */ std %f0,[%o3+%g4]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L900000155
+!
+
+ .L900000155: /* frequency 1.0 confidence 0.0 */
+/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1
+/* 0x0810 727 ( 0 1) */ add %o0,1,%o0
+/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4
+/* 0x0818 729 ( 1 2) */ add %i0,2,%i0
+/* 0x081c 730 ( 2 5) */ fitod %f0,%f2
+/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1
+/* 0x0824 732 ( 2 3) */ add %g3,2,%g3
+/* 0x0828 733 ( 3 6) */ fitod %f1,%f0
+/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1
+/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0
+/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2
+/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2
+/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1
+/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0
+/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0
+/* 0x084c 742 (11 14) */ fdtox %f2,%f2
+/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4]
+/* 0x0854 744 (12 15) */ fdtox %f0,%f0
+/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4]
+/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50
+/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0
+
+!
+! ENTRY .L900000156
+!
+
+ .L900000156: /* frequency 1.0 confidence 0.0 */
+/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50
+/* 0x086c ( 0 1) */ or %g0,%g5,%o3
+/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1
+/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3
+/* 0x0878 763 ( 1 2) */ or %g0,0,%i0
+/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0
+/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2
+/* 0x0888 767 ( 3 6) */ ld [%o5],%i3
+/* 0x088c 768 ( 3 4) */ or %g0,8,%g2
+/* 0x0890 769 ( 3 4) */ or %g0,16,%o4
+/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3
+/* 0x0898 771 ( 4 5) */ or %g0,3,%i0
+/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1
+/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2
+/* 0x08a4 774 ( 6 7) */ st %o1,[%o7]
+/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1
+/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2
+/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0
+/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1
+/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1
+/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0
+/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4]
+/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000140
+!
+
+ .L900000140: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1
+/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4
+/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2
+/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5
+/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1
+/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0
+/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1
+/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0
+/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2]
+/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5
+/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0
+/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2
+/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4
+/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2
+/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0
+/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1
+/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0
+/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1]
+/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50
+/* 0x0914 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000143
+!
+
+ .L900000143: /* frequency 1.0 confidence 0.0 */
+/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3
+/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0
+/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0
+/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2]
+/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5
+/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0934 ( 3 4) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2
+/* 0x0940 817 ( 0 1) */ add %i0,1,%i0
+/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4
+/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2
+/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0
+/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1
+/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0
+/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0
+/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2]
+/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5
+/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50
+/* 0x096c ( 7 8) */ sra %i0,0,%o0
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+
+/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0
+/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x097c ( 4 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4
+/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6
+/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3
+/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351]
+/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4
+/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355]
+/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12
+/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10
+/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4
+/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18
+/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12
+/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9
+/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10
+/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15
+/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22
+/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20
+/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12
+/* 0x09d4 876 (10 13) */ ld [%i1],%g2
+/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3
+/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22
+/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4
+/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18
+/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5
+/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12
+/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0
+/* 0x09f8 885 (14 17) */ fitod %f18,%f26
+/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1
+/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20
+/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2
+/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3
+/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8
+/* 0x0a10 891 (20 21) */ fmovs %f6,%f14
+/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8
+/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6
+/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14
+/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10
+/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24
+/* 0x0a28 897 (28 31) */ fdtox %f14,%f14
+/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335]
+/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28
+/* 0x0a34 900 (29 32) */ fitod %f19,%f14
+/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18
+/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10
+/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343]
+/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4
+/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22
+/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18
+/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311]
+/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10
+/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4
+/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12
+/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319]
+/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12
+/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5
+/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4
+/* 0x0a70 915 (35 38) */ fdtox %f4,%f4
+/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327]
+/* 0x0a78 917 (36 39) */ fdtox %f28,%f24
+/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303]
+/* 0x0a80 919 (37 40) */ fitod %f20,%f4
+/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4
+/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24
+/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7
+/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14
+/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4
+/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12
+/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5
+/* 0x0aa0 927 (39 42) */ fitod %f21,%f18
+/* 0x0aa4 928 (40 41) */ st %o4,[%i0]
+/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7
+/* 0x0aac 930 (40 43) */ fdtox %f22,%f20
+/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287]
+/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10
+/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5
+/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20
+/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295]
+/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7
+/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4
+/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14
+/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2
+/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10
+/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3
+/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271]
+/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3
+/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12
+/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0
+/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10
+/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16
+/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279]
+/* 0x0af8 949 (46 49) */ fdtox %f20,%f12
+/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4
+/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255]
+/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0
+/* 0x0b08 953 (47 50) */ fdtox %f14,%f12
+/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5
+/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2
+/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8
+/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1
+/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0
+/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4
+/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6
+/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263]
+/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2
+/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4
+/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0
+/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5
+/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2
+/* 0x0b40 967 (52 53) */ st %g3,[%i0+4]
+/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2
+/* 0x0b48 969 (52 55) */ fdtox %f16,%f10
+/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7
+/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2
+/* 0x0b54 972 (53 56) */ fdtox %f4,%f4
+/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239]
+/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7
+/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247]
+/* 0x0b64 976 (55 58) */ fdtox %f8,%f4
+/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7
+/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5
+/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0
+/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223]
+/* 0x0b78 981 (57 60) */ fdtox %f6,%f4
+/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5
+/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7
+/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231]
+/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0
+/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5
+/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1
+/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5
+/* 0x0b98 989 (61 62) */ st %g2,[%i0+12]
+/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0
+/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4
+/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1
+/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1
+/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7
+/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3
+/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5
+/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4
+/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16]
+/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2
+/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20]
+/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4
+/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8]
+/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2
+/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4
+/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24]
+/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2
+/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3
+/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2
+/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2
+/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28]
+/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3
+/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0
+/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0bfc (76 77) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0
+/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4
+/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483]
+/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2
+/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479]
+/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8
+/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10
+/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14
+/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4
+/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13
+/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7
+/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10
+/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14
+/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12
+/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6
+/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12
+/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6
+/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8
+/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18
+/* 0x0c64 1056 (20 23) */ fitod %f15,%f14
+/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10
+/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8
+/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20
+/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4
+/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14
+/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10
+/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463]
+/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10
+/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18
+/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471]
+/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8
+/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4
+/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447]
+/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4
+/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18
+/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455]
+/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10
+/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439]
+/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14
+/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8
+/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431]
+/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10
+/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4
+/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8
+/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14
+/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423]
+/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10
+/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4
+/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415]
+/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8
+/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10
+/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14
+/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8
+/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10
+/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14
+/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18
+/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4
+/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8
+/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20
+/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10
+/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4
+/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8
+/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399]
+/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8
+/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18
+/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407]
+/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10
+/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383]
+/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14
+/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10
+/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18
+/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391]
+/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8
+/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375]
+/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4
+/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14
+/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367]
+/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8
+/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10
+/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4
+/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351]
+/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8
+/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14
+/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359]
+/* 0x0d68 1124 (54 57) */ fitod %f8,%f4
+/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10
+/* 0x0d70 1126 (56 59) */ fitod %f9,%f8
+/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4
+/* 0x0d78 1128 (58 61) */ fitod %f10,%f14
+/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8
+/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18
+/* 0x0d84 1131 (60 63) */ fitod %f11,%f10
+/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4
+/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14
+/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20
+/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8
+/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10
+/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4
+/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335]
+/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4
+/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18
+/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343]
+/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8
+/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319]
+/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14
+/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8
+/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18
+/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327]
+/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4
+/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311]
+/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10
+/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14
+/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303]
+/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4
+/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8
+/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10
+/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287]
+/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4
+/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14
+/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295]
+/* 0x0df8 1160 (74 77) */ fitod %f4,%f10
+/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4
+/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8
+/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10
+/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4
+/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14
+/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18
+/* 0x0e14 1167 (80 83) */ fitod %f9,%f8
+/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10
+/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20
+/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14
+/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18
+/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279]
+/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4
+/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10
+/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271]
+/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10
+/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263]
+/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4
+/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255]
+/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10
+/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4
+/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10
+/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247]
+/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4
+/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239]
+/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2
+/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4
+/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3
+/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4
+/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2
+/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5
+/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2
+/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6
+/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3
+/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4
+/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4
+/* 0x0e8c 1200 (96 97) */ st %g4,[%i0]
+/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4
+/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0
+/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2
+/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6
+/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3
+/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2
+/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4
+/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5
+/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1
+/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2
+/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3
+/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4
+/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4]
+/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223]
+/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2
+/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3
+/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2
+/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5
+/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2
+/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231]
+/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0
+/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4
+/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5
+/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2
+/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8]
+/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0
+/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3
+/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5
+/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3
+/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1
+/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3
+/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12]
+/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5
+/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0
+/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2
+/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5
+/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4
+/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2
+/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1
+/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5
+/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2
+/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16]
+/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0
+/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3
+/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5
+/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3
+/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4
+/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3
+/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1
+/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5
+/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20]
+/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2
+/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5
+/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0
+/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2
+/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4
+/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2
+/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1
+/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5
+/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24]
+/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3
+/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5
+/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0
+/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3
+/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4
+/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3
+/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5
+/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1
+/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2
+/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5
+/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4
+/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0
+/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2
+/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28]
+/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5
+/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2
+/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1
+/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3
+/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5
+/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0
+/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4
+/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3
+/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5
+/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32]
+/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3
+/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2
+/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5
+/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4
+/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0
+/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1
+/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5
+/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36]
+/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3
+/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5
+/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2
+/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4
+/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2
+/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1
+/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5
+/* 0x101c 1303 (141 142) */ st %g2,[%i0+40]
+/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0
+/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2
+/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5
+/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3
+/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4
+/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3
+/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1
+/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5
+/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0
+/* 0x1044 1313 (145 146) */ st %g3,[%i0+44]
+/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3
+/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5
+/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2
+/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1
+/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2
+/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4
+/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5
+/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0
+/* 0x1068 1322 (149 150) */ st %g2,[%i0+48]
+/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3
+/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1
+/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2
+/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3
+/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5
+/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0
+/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4
+/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2
+/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5
+/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1
+/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2
+/* 0x1098 1334 (154 155) */ st %g3,[%i0+52]
+/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3
+/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56]
+/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2
+/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3
+/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2
+/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60]
+/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3
+/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0
+/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x10c0 (161 162) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+
+
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+
+/* 0x124c 1476 ( 0 0) */ .type mul_add,2
+/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x124c 1480 ( 0 0) */ .align 8
+/* 0x1250 1486 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp
+/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3
+/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2
+/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0
+/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1
+/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result =
+/* 0x1268 ( 4 5) */ srl %i3,0,%o4
+/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0
+/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0
+/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2
+/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x1278 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160
+/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object
+/* 0x1280 10 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 0000000000..94e86eedb9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a1, %v0", a, b); \
+ Phi = asm("umulh %a0, %a1, %v0", a, b); \
+ }
+
+/* This is empty for the loop in s_mpv_mul_d */
+#define CARRY_ADD
+
+#define ONE_MUL \
+ a_i = *a++; \
+ MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+ a0b0 += carry; \
+ if (a0b0 < carry) \
+ ++a1b1; \
+ CARRY_ADD \
+ *c++ = a0b0; \
+ carry = a1b1;
+
+#define FOUR_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL
+
+#define SIXTEEN_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL
+
+#define THIRTYTWO_MUL \
+ SIXTEEN_MUL \
+ SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL
+
+#define EXPAND_256(CALL) \
+ mp_digit carry = 0; \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ if (a_len & 255) { \
+ if (a_len & 1) { \
+ ONE_MUL \
+ } \
+ if (a_len & 2) { \
+ ONE_MUL \
+ ONE_MUL \
+ } \
+ if (a_len & 4) { \
+ FOUR_MUL \
+ } \
+ if (a_len & 8) { \
+ FOUR_MUL \
+ FOUR_MUL \
+ } \
+ if (a_len & 16) { \
+ SIXTEEN_MUL \
+ } \
+ if (a_len & 32) { \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 64) { \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 128) { \
+ ONETWENTYEIGHT_MUL \
+ } \
+ a_len = a_len & (-256); \
+ } \
+ if (a_len >= 256) { \
+ carry = CALL(a, a_len, b, c, carry); \
+ c += a_len; \
+ }
+
+#define FUNC_NAME(NAME) \
+ mp_digit NAME(const mp_digit *a, \
+ mp_size a_len, \
+ mp_digit b, mp_digit *c, \
+ mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+ FUNC_NAME(FNAME) \
+ { \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ while (a_len) { \
+ ONETWENTYEIGHT_MUL \
+ ONETWENTYEIGHT_MUL \
+ a_len -= 256; \
+ } \
+ return carry; \
+ }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+ (admittedly) small number of tests (i.e., timetest) used to
+ measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+ the routine that uses it; this helps locality somewhat */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+#else
+ EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+ *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD \
+ a0b0 += a_i = *c; \
+ if (a0b0 < a_i) \
+ ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+ two routines that use it; this helps locality somewhat */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c
new file mode 100644
index 0000000000..461d40ab36
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mulsqr.c
@@ -0,0 +1,84 @@
+/*
+ * Test whether to include squaring code given the current settings
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_SQUARE 1 /* make sure squaring code is included */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ntests, prec, ix;
+ unsigned int seed;
+ clock_t start, stop;
+ double multime, sqrtime;
+ mp_int a, c;
+
+ seed = (unsigned int)time(NULL);
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
+ return 1;
+ }
+
+ if ((ntests = abs(atoi(argv[1]))) == 0) {
+ fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
+ return 1;
+ }
+ if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
+ fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
+ CHAR_BIT);
+ return 1;
+ }
+
+ prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+
+ mp_init_size(&a, prec);
+ mp_init_size(&c, 2 * prec);
+
+ /* Test multiplication by self */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_mul(&a, &a, &c);
+ }
+ stop = clock();
+
+ multime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ /* Test squaring */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_sqr(&a, &c);
+ }
+ stop = clock();
+
+ sqrtime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ printf("Multiply: %.4f\n", multime);
+ printf("Square: %.4f\n", sqrtime);
+ if (multime < sqrtime) {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime));
+ printf("Prefer: multiply\n");
+ } else {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime));
+ printf("Prefer: square\n");
+ }
+
+ mp_clear(&a);
+ mp_clear(&c);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c
new file mode 100644
index 0000000000..3e64a2acaa
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/primes.c
@@ -0,0 +1,841 @@
+/*
+ * These tables of primes wwere generated using the 'sieve' program
+ * (sieve.c) and converted to this format with 'ptab.pl'.
+ *
+ * The 'small' table is just the first 128 primes. The 'large' table
+ * is a table of all the prime values that will fit into a single
+ * mp_digit (given the current size of an mp_digit, which is two bytes).
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if SMALL_TABLE
+#define MP_PRIME_TAB_SIZE 128
+#else
+#define MP_PRIME_TAB_SIZE 6542
+#endif
+
+const int prime_tab_size = MP_PRIME_TAB_SIZE;
+const mp_digit prime_tab[] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+#if !SMALL_TABLE
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+ 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D,
+ 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3,
+ 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713,
+ 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755,
+ 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D,
+ 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3,
+ 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815,
+ 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851,
+ 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B,
+ 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB,
+ 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907,
+ 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949,
+ 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977,
+ 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7,
+ 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13,
+ 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61,
+ 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85,
+ 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5,
+ 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1,
+ 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D,
+ 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B,
+ 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3,
+ 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07,
+ 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F,
+ 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95,
+ 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5,
+ 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13,
+ 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55,
+ 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3,
+ 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5,
+ 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17,
+ 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57,
+ 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F,
+ 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5,
+ 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17,
+ 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53,
+ 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3,
+ 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9,
+ 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021,
+ 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073,
+ 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3,
+ 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1,
+ 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139,
+ 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181,
+ 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3,
+ 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D,
+ 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241,
+ 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F,
+ 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD,
+ 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D,
+ 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367,
+ 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393,
+ 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF,
+ 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F,
+ 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471,
+ 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD,
+ 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517,
+ 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543,
+ 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583,
+ 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5,
+ 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615,
+ 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F,
+ 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F,
+ 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9,
+ 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709,
+ 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777,
+ 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9,
+ 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5,
+ 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843,
+ 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F,
+ 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9,
+ 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB,
+ 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949,
+ 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3,
+ 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED,
+ 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D,
+ 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B,
+ 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1,
+ 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB,
+ 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B,
+ 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73,
+ 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1,
+ 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27,
+ 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55,
+ 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5,
+ 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21,
+ 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D,
+ 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89,
+ 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5,
+ 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B,
+ 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D,
+ 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD,
+ 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF,
+ 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49,
+ 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97,
+ 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1,
+ 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027,
+ 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063,
+ 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1,
+ 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB,
+ 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159,
+ 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1,
+ 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5,
+ 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B,
+ 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263,
+ 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D,
+ 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED,
+ 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333,
+ 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383,
+ 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9,
+ 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B,
+ 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F,
+ 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1,
+ 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9,
+ 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513,
+ 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573,
+ 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1,
+ 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605,
+ 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F,
+ 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681,
+ 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9,
+ 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735,
+ 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773,
+ 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3,
+ 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807,
+ 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F,
+ 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881,
+ 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB,
+ 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923,
+ 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983,
+ 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3,
+ 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13,
+ 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B,
+ 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9,
+ 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB,
+ 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F,
+ 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99,
+ 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3,
+ 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35,
+ 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87,
+ 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB,
+ 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F,
+ 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89,
+ 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3,
+ 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25,
+ 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F,
+ 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5,
+ 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB,
+ 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45,
+ 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81,
+ 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB,
+ 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023,
+ 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071,
+ 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9,
+ 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF,
+ 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D,
+ 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D,
+ 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3,
+ 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215,
+ 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B,
+ 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7,
+ 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9,
+ 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335,
+ 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379,
+ 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB,
+ 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419,
+ 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D,
+ 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB,
+ 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D,
+ 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571,
+ 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B,
+ 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7,
+ 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637,
+ 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D,
+ 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1,
+ 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F,
+ 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D,
+ 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3,
+ 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847,
+ 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D,
+ 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5,
+ 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923,
+ 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B,
+ 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1,
+ 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5,
+ 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27,
+ 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D,
+ 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1,
+ 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21,
+ 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71,
+ 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD,
+ 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3,
+ 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29,
+ 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71,
+ 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1,
+ 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B,
+ 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F,
+ 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D,
+ 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3,
+ 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F,
+ 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87,
+ 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7,
+ 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37,
+ 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79,
+ 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD,
+ 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B,
+ 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D,
+ 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB,
+ 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115,
+ 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B,
+ 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB,
+ 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F,
+ 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261,
+ 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D,
+ 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307,
+ 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F,
+ 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF,
+ 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED,
+ 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B,
+ 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F,
+ 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF,
+ 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513,
+ 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577,
+ 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7,
+ 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609,
+ 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645,
+ 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D,
+ 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF,
+ 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F,
+ 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771,
+ 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF,
+ 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807,
+ 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855,
+ 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1,
+ 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919,
+ 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969,
+ 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1,
+ 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41,
+ 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89,
+ 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF,
+ 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B,
+ 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD,
+ 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF,
+ 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D,
+ 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55,
+ 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93,
+ 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05,
+ 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51,
+ 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B,
+ 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9,
+ 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35,
+ 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85,
+ 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5,
+ 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09,
+ 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D,
+ 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9,
+ 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF,
+ 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047,
+ 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7,
+ 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B,
+ 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149,
+ 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7,
+ 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7,
+ 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243,
+ 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297,
+ 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9,
+ 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341,
+ 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387,
+ 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9,
+ 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413,
+ 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455,
+ 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D,
+ 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7,
+ 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D,
+ 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585,
+ 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED,
+ 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F,
+ 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B,
+ 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD,
+ 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707,
+ 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761,
+ 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5,
+ 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F,
+ 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D,
+ 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB,
+ 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1,
+ 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B,
+ 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F,
+ 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED,
+ 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17,
+ 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77,
+ 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB,
+ 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19,
+ 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79,
+ 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1,
+ 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29,
+ 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F,
+ 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF,
+ 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05,
+ 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47,
+ 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95,
+ 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7,
+ 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B,
+ 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49,
+ 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5,
+ 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27,
+ 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65,
+ 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5,
+ 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F,
+ 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085,
+ 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9,
+ 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B,
+ 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191,
+ 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1,
+ 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241,
+ 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295,
+ 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD,
+ 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F,
+ 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377,
+ 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB,
+ 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427,
+ 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475,
+ 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7,
+ 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521,
+ 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F,
+ 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD,
+ 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607,
+ 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B,
+ 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B,
+ 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5,
+ 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F,
+ 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791,
+ 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809,
+ 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B,
+ 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871,
+ 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7,
+ 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917,
+ 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B,
+ 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB,
+ 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07,
+ 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B,
+ 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1,
+ 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39,
+ 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93,
+ 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF,
+ 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59,
+ 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83,
+ 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3,
+ 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25,
+ 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61,
+ 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7,
+ 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15,
+ 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B,
+ 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3,
+ 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F,
+ 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D,
+ 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5,
+ 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9,
+ 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027,
+ 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079,
+ 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7,
+ 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121,
+ 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183,
+ 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB,
+ 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219,
+ 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277,
+ 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB,
+ 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303,
+ 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F,
+ 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1,
+ 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D,
+ 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D,
+ 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1,
+ 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D,
+ 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D,
+ 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5,
+ 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633,
+ 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685,
+ 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1,
+ 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741,
+ 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD,
+ 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF,
+ 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853,
+ 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B,
+ 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD,
+ 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F,
+ 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993,
+ 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF,
+ 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11,
+ 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59,
+ 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1,
+ 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D,
+ 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71,
+ 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7,
+ 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19,
+ 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81,
+ 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7,
+ 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F,
+ 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D,
+ 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5,
+ 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F,
+ 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B,
+ 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7,
+ 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7,
+ 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D,
+ 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87,
+ 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED,
+ 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F,
+ 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095,
+ 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB,
+ 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D,
+ 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D,
+ 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB,
+ 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231,
+ 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F,
+ 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7,
+ 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B,
+ 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D,
+ 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F,
+ 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7,
+ 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425,
+ 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483,
+ 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF,
+ 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D,
+ 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9,
+ 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9,
+ 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639,
+ 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699,
+ 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1,
+ 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B,
+ 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777,
+ 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5,
+ 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F,
+ 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B,
+ 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED,
+ 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927,
+ 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981,
+ 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB,
+ 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35,
+ 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79,
+ 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD,
+ 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13,
+ 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF,
+ 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF,
+ 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53,
+ 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F,
+ 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9,
+ 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41,
+ 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7,
+ 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5,
+ 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69,
+ 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF,
+ 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB,
+ 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45,
+ 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D,
+ 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9,
+ 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023,
+ 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053,
+ 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF,
+ 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113,
+ 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167,
+ 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9,
+ 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215,
+ 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275,
+ 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD,
+ 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7,
+ 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D,
+ 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5,
+ 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409,
+ 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469,
+ 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1,
+ 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535,
+ 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589,
+ 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD,
+ 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635,
+ 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9,
+ 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD,
+ 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739,
+ 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787,
+ 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9,
+ 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F,
+ 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887,
+ 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD,
+ 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929,
+ 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D,
+ 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7,
+ 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13,
+ 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75,
+ 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF,
+ 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F,
+ 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D,
+ 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD,
+ 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23,
+ 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67,
+ 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1,
+ 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15,
+ 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97,
+ 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1,
+ 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47,
+ 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95,
+ 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05,
+ 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D,
+ 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3,
+ 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5,
+ 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057,
+ 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5,
+ 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1,
+ 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129,
+ 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D,
+ 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED,
+ 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243,
+ 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B,
+ 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5,
+ 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B,
+ 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387,
+ 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5,
+ 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F,
+ 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459,
+ 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3,
+ 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3,
+ 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553,
+ 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D,
+ 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9,
+ 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D,
+ 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691,
+ 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD,
+ 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F,
+ 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759,
+ 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9,
+ 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805,
+ 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F,
+ 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7,
+ 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937,
+ 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3,
+ 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15,
+ 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59,
+ 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB,
+ 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07,
+ 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73,
+ 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5,
+ 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09,
+ 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57,
+ 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9,
+ 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED,
+ 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D,
+ 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1,
+ 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05,
+ 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61,
+ 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7,
+ 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD,
+ 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57,
+ 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3,
+ 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015,
+ 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069,
+ 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF,
+ 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119,
+ 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173,
+ 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD,
+ 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D,
+ 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281,
+ 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9,
+ 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D,
+ 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B,
+ 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B,
+ 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B,
+ 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5,
+ 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501,
+ 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F,
+ 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF,
+ 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609,
+ 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659,
+ 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5,
+ 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF,
+ 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755,
+ 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3,
+ 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827,
+ 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893,
+ 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5,
+ 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F,
+ 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D,
+ 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1,
+ 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25,
+ 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65,
+ 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF,
+ 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F,
+ 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61,
+ 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF,
+ 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33,
+ 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F,
+ 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3,
+ 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F,
+ 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F,
+ 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3,
+ 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27,
+ 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D,
+ 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9,
+ 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39,
+ 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89,
+ 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD,
+ 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029,
+ 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D,
+ 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7,
+ 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119,
+ 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179,
+ 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD,
+ 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221,
+ 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B,
+ 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD,
+ 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313,
+ 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371,
+ 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5,
+ 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF,
+ 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461,
+ 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7,
+ 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509,
+ 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F,
+ 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3,
+ 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641,
+ 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1,
+ 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1,
+ 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757,
+ 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD,
+ 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803,
+ 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857,
+ 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7,
+ 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF,
+ 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937,
+ 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989,
+ 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9,
+ 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37,
+ 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F,
+ 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9,
+ 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29,
+ 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87,
+ 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7,
+ 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B,
+ 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91,
+ 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15,
+ 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57,
+ 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB,
+ 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD,
+ 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65,
+ 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7,
+ 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07,
+ 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61,
+ 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9,
+ 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7,
+ 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F,
+ 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB,
+ 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D,
+ 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171,
+ 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD,
+ 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217,
+ 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279,
+ 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3,
+ 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315,
+ 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B,
+ 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD,
+ 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B,
+ 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D,
+ 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1,
+ 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F,
+ 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565,
+ 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1,
+ 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D,
+ 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687,
+ 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1,
+ 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F,
+ 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7,
+ 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB,
+ 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D,
+ 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF,
+ 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933,
+ 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F,
+ 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9,
+ 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09,
+ 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51,
+ 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D,
+ 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5,
+ 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43,
+ 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD,
+ 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D,
+ 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F,
+ 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9,
+ 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF,
+ 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53,
+ 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B,
+ 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9,
+ 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D,
+ 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77,
+ 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1,
+ 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F,
+ 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67,
+ 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5,
+ 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005,
+ 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071,
+ 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7,
+ 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135,
+ 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F,
+ 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1,
+ 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219,
+ 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279,
+ 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD,
+ 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327,
+ 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363,
+ 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9,
+ 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419,
+ 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455,
+ 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD,
+ 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537,
+ 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585,
+ 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615,
+ 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657,
+ 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695,
+ 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3,
+ 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F,
+ 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B,
+ 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF,
+ 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B,
+ 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869,
+ 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB,
+ 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915,
+ 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963,
+ 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7,
+ 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D,
+ 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3,
+ 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB,
+ 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B,
+ 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1,
+ 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B,
+ 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B,
+ 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD,
+ 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F,
+ 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B,
+ 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3,
+ 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19,
+ 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81,
+ 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1,
+ 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71,
+ 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5,
+ 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B,
+ 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071,
+ 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF,
+ 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7,
+ 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175,
+ 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7,
+ 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227,
+ 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277,
+ 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF,
+ 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319,
+ 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B,
+ 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409,
+ 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455,
+ 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9,
+ 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B,
+ 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B,
+ 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF,
+ 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D,
+ 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F,
+ 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB,
+ 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755,
+ 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D,
+ 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7,
+ 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D,
+ 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877,
+ 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7,
+ 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9,
+ 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941,
+ 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3,
+ 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13,
+ 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B,
+ 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF,
+ 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F,
+ 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7,
+ 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37,
+ 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73,
+ 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD,
+ 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51,
+ 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99,
+ 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03,
+ 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D,
+ 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93,
+ 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3,
+ 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D,
+ 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D,
+ 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1
+#endif
+};
diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il
new file mode 100644
index 0000000000..d2e8024ac2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_32.il
@@ -0,0 +1,1291 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! The interface to the VIS instructions as declared below (and in the VIS
+! User's Manual) will not change, but the macro implementation might change
+! in the future.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,8
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,8
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,8
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,8
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,8
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,8
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,8
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,8
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,8
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,8
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,8
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,8
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,8
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,8
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ faligndata %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq32 %f4,%f10,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ st %o2,[%sp+0x4c]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16au %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16al %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ std %o0,[%sp+0x40]
+ ldd [%sp+0x40],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd32s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub32s %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ std %f0,[%o4]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpackfix %f4,%f0
+ fpackfix %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpackfix %f4,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+ fpackfix %f4,%f0
+ fpackfix %f6,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+!
+ .inline vis_pdist,24
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ pdist %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpmerge %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fmovs %f5, %f2
+ fexpand %f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnor %f4,%f10,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnors %f4,%f10,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fandnot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fandnot1s %f4,%f10,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fnot1 %f4,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fnot1s %f4,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxor %f4,%f10,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxors %f4,%f10,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnand %f4,%f10,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnands %f4,%f10,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fand %f4,%f10,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fands %f4,%f10,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxnor %f4,%f10,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxnors %f4,%f10,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fsrc1 %f4,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fsrc1s %f4,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fornot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fornot1s %f4,%f10,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ for %f4,%f10,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fors %f4,%f10,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,16
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,4
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,8
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,8
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,8
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,4
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,4
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,8
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,4
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,4
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,8
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,4
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,4
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,8
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,4
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array8 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array16 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array32 %o3,%o2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0;
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ st %o0,[%sp+0x48] ! store float frs1
+ ld [%sp+0x48],%f0
+ st %o1,[%sp+0x48] ! store float frs2
+ ld [%sp+0x48],%f1
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,4
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,4
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,4
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,4
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,4
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,4
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,4
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,4
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,4
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,8
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,8
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,4
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,4
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il
new file mode 100644
index 0000000000..cbe2b5aa27
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_64.il
@@ -0,0 +1,997 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! This file is to be used in place of vis.il in 64-bit builds.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,16
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,16
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,16
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,16
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,16
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,16
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,16
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,16
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,16
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,16
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,16
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,16
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,12
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,12
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ faligndata %f0,%f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ fcmple16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ fcmpne16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ fcmple32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ fcmpne32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ fcmpgt16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ fcmpeq16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ fcmpgt32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ fcmpeq32 %f0,%f2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ fmul8x16 %f1,%f2,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ fmul8x16 %f1,%f4,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ fmul8x16au %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ fmul8x16al %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ fmul8sux16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ fmul8ulx16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ fmuld8sux16 %f1,%f3,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ fmuld8ulx16 %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ fpadd16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ fpadd16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ fpadd32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ fpadd32s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ fpsub16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ fpsub16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ fpsub32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ fpsub32s %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ fpack16 %f0,%f0
+ .end
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ std %f0,[%o2]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,24
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ fpack16 %f2,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ fpack16 %f2,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ fpack32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ fpackfix %f0,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/,
+! double pxls2 /*frs2*/);
+!
+ .inline vis_pxldist64,24
+ pdist %f2,%f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ fpmerge %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ fexpand %f1,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ fexpand %f0,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ fexpand %f1,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ fnor %f0,%f2,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ fnors %f1,%f3,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ fandnot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ fandnot1s %f1,%f3,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ fnot1 %f0,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ fnot1s %f1,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ fxor %f0,%f2,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ fxors %f1,%f3,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ fnand %f0,%f2,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ fnands %f1,%f3,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ fand %f0,%f2,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ fands %f1,%f3,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ fxnor %f0,%f2,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ fxnors %f1,%f3,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ fsrc1 %f0,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ fsrc1s %f1,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ fornot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ fornot1s %f1,%f3,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ for %f0,%f2,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ fors %f1,%f3,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,20
+ stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,20
+ stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,28
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,20
+ stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,20
+ stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,20
+ stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,20
+ stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,20
+ stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,16
+ stda %f0,[%o1]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,24
+ stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,16
+ stda %f0,[%o1]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,16
+ stda %f0,[%o1]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,24
+ stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,16
+ stda %f0,[%o1]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,16
+ stda %f0,[%o1]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,16
+ stda %f0,[%o1]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,16
+ stda %f0,[%o1]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,16
+ stda %f0,[%o1]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,8
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,16
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,12
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,12
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,8
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,8
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,16
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,8
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,8
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,16
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,8
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,8
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,16
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,8
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ array8 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ array16 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ array32 %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ fmovs %f0,%f0
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ fmovs %f1,%f0 ! %f1 = float frs1; put in hi;
+ fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1;
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ st %o1,[%sp+2183]
+ ld [%sp+2183],%f1
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ stx %o0,[%sp+2183]
+ ldd [%sp+2183],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,8
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,8
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,8
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,8
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,8
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,8
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,12
+ sta %f1,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,12
+ sta %f1,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,12
+ sta %f1,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,16
+ stda %f0,[%o1+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,16
+ stda %f0,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,16
+ stda %f0,[%o1]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,8
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,8
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,8
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,16
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,16
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,8
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,8
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h
new file mode 100644
index 0000000000..275de59df8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_proto.h
@@ -0,0 +1,234 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Prototypes for the inline templates in vis.il
+ */
+
+#ifndef VIS_PROTO_H
+#define VIS_PROTO_H
+
+#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Pure edge handling instructions */
+int vis_edge8(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32l(void * /*frs1*/, void * /*frs2*/);
+
+/* Edge handling instructions with negative return values if cc set. */
+int vis_edge8cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/);
+
+/* Alignment instructions. */
+void *vis_alignaddr(void * /*rs1*/, int /*rs2*/);
+void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/);
+double vis_faligndata(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned comparison instructions. */
+int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned multiplication. */
+#if 0
+double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+#endif
+double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+
+/* Partitioned addition & subtraction. */
+double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+
+/* Pixel packing & clamping. */
+float vis_fpack16(double /*frs2*/);
+double vis_fpack32(double /*frs1*/, double /*frs2*/);
+float vis_fpackfix(double /*frs2*/);
+
+/* Combined pack ops. */
+double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+void vis_st2_fpack16(double, double, double *);
+void vis_std_fpack16(double, double, double *);
+void vis_st2_fpackfix(double, double, double *);
+
+double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+
+/* Motion estimation. */
+double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+
+/* Channel merging. */
+double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+
+/* Pixel expansion. */
+double vis_fexpand(float /*frs2*/);
+double vis_fexpand_hi(double /*frs2*/);
+double vis_fexpand_lo(double /*frs2*/);
+
+/* Bitwise logical operators. */
+double vis_fnor(double /*frs1*/, double /*frs2*/);
+float vis_fnors(float /*frs1*/, float /*frs2*/);
+double vis_fandnot(double /*frs1*/, double /*frs2*/);
+float vis_fandnots(float /*frs1*/, float /*frs2*/);
+double vis_fnot(double /*frs1*/);
+float vis_fnots(float /*frs1*/);
+double vis_fxor(double /*frs1*/, double /*frs2*/);
+float vis_fxors(float /*frs1*/, float /*frs2*/);
+double vis_fnand(double /*frs1*/, double /*frs2*/);
+float vis_fnands(float /*frs1*/, float /*frs2*/);
+double vis_fand(double /*frs1*/, double /*frs2*/);
+float vis_fands(float /*frs1*/, float /*frs2*/);
+double vis_fxnor(double /*frs1*/, double /*frs2*/);
+float vis_fxnors(float /*frs1*/, float /*frs2*/);
+double vis_fsrc(double /*frs1*/);
+float vis_fsrcs(float /*frs1*/);
+double vis_fornot(double /*frs1*/, double /*frs2*/);
+float vis_fornots(float /*frs1*/, float /*frs2*/);
+double vis_for(double /*frs1*/, double /*frs2*/);
+float vis_fors(float /*frs1*/, float /*frs2*/);
+double vis_fzero(void);
+float vis_fzeros(void);
+double vis_fone(void);
+float vis_fones(void);
+
+/* Partial stores. */
+void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/,
+ void * /*rs3*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+
+/* Byte & short stores. */
+void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/);
+
+/* Byte & short loads. */
+double vis_lddfa_ASI_FL8P(void * /*rs1*/);
+double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8S(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL16S(void * /*rs1*/);
+double vis_lddfa_ASI_FL8PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL8SL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16SL(void * /*rs1*/);
+
+/* Direct write to GSR, read from GSR */
+void vis_write_gsr(unsigned int /*GSR*/);
+unsigned int vis_read_gsr(void);
+
+/* Voxel texture mapping. */
+#if !defined(_NO_LONGLONG)
+unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Register aliasing and type casts. */
+float vis_read_hi(double /*frs1*/);
+float vis_read_lo(double /*frs1*/);
+double vis_write_hi(double /*frs1*/, float /*frs2*/);
+double vis_write_lo(double /*frs1*/, float /*frs2*/);
+double vis_freg_pair(float /*frs1*/, float /*frs2*/);
+float vis_to_float(unsigned int /*value*/);
+double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+double vis_to_double_dup(unsigned int /*value*/);
+#if !defined(_NO_LONGLONG)
+double vis_ll_to_double(unsigned long long /*value*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Miscellany (no inlines) */
+void vis_error(char * /*fmt*/, int /*a0*/);
+void vis_sim_init(void);
+
+/* For better performance */
+#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg))
+
+/* Nicknames for explicit ASI loads and stores. */
+#define vis_st_u8 vis_stdfa_ASI_FL8P
+#define vis_st_u8_i vis_stdfa_ASI_FL8P_index
+#define vis_st_u8_le vis_stdfa_ASI_FL8PL
+#define vis_st_u16 vis_stdfa_ASI_FL16P
+#define vis_st_u16_i vis_stdfa_ASI_FL16P_index
+#define vis_st_u16_le vis_stdfa_ASI_FL16PL
+
+#define vis_ld_u8 vis_lddfa_ASI_FL8P
+#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index
+#define vis_ld_u8_le vis_lddfa_ASI_FL8PL
+#define vis_ld_u16 vis_lddfa_ASI_FL16P
+#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index
+#define vis_ld_u16_le vis_lddfa_ASI_FL16PL
+
+#define vis_pst_8 vis_stdfa_ASI_PST8P
+#define vis_pst_16 vis_stdfa_ASI_PST16P
+#define vis_pst_32 vis_stdfa_ASI_PST32P
+
+#define vis_st_u8s vis_stdfa_ASI_FL8S
+#define vis_st_u8s_le vis_stdfa_ASI_FL8SL
+#define vis_st_u16s vis_stdfa_ASI_FL16S
+#define vis_st_u16s_le vis_stdfa_ASI_FL16SL
+
+#define vis_ld_u8s vis_lddfa_ASI_FL8S
+#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL
+#define vis_ld_u16s vis_lddfa_ASI_FL16S
+#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL
+
+#define vis_pst_8s vis_stdfa_ASI_PST8S
+#define vis_pst_16s vis_stdfa_ASI_PST16S
+#define vis_pst_32s vis_stdfa_ASI_PST32S
+
+/* "<" and ">=" may be implemented in terms of ">" and "<=". */
+#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a))
+#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a))
+#define vis_fcmpge16(a, b) vis_fcmple16((b), (a))
+#define vis_fcmpge32(a, b) vis_fcmple32((b), (a))
+
+#ifdef __cplusplus
+} // End of extern "C"
+#endif /* __cplusplus */
+
+#endif /* VIS_PROTO_H */
diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c
new file mode 100644
index 0000000000..cf9e8ac523
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.c
@@ -0,0 +1,131 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "prenv.h"
+#include "secerr.h"
+#include "blapi.h"
+#include "hasht.h"
+#include "plhash.h"
+#include "nsslowhash.h"
+#include "blapii.h"
+
+struct NSSLOWInitContextStr {
+ int count;
+};
+
+struct NSSLOWHASHContextStr {
+ const SECHashObject *hashObj;
+ void *hashCtxt;
+};
+
+static NSSLOWInitContext dummyContext = { 0 };
+static PRBool post_failed = PR_TRUE;
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+#ifdef FREEBL_NO_DEPEND
+ (void)FREEBL_InitStubs();
+#endif
+
+#ifndef NSS_FIPS_DISABLED
+ /* make sure the FIPS product is installed if we are trying to
+ * go into FIPS mode */
+ if (NSS_GetSystemFIPSEnabled()) {
+ if (BL_FIPSEntryOK(PR_TRUE, PR_FALSE) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ post_failed = PR_TRUE;
+ return NULL;
+ }
+ }
+#endif
+ post_failed = PR_FALSE;
+
+ return &dummyContext;
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+ PORT_Assert(context == &dummyContext);
+ return;
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+ PORT_Assert(context == &dummyContext);
+ return;
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(NSSLOWInitContext *initContext,
+ HASH_HashType hashType)
+{
+ NSSLOWHASHContext *context;
+
+ if (post_failed) {
+ PORT_SetError(SEC_ERROR_PKCS11_DEVICE_ERROR);
+ return NULL;
+ }
+
+ if (initContext != &dummyContext) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return (NULL);
+ }
+
+ context = PORT_ZNew(NSSLOWHASHContext);
+ if (!context) {
+ return NULL;
+ }
+ context->hashObj = HASH_GetRawHashObject(hashType);
+ if (!context->hashObj) {
+ PORT_Free(context);
+ return NULL;
+ }
+ context->hashCtxt = context->hashObj->create();
+ if (!context->hashCtxt) {
+ PORT_Free(context);
+ return NULL;
+ }
+
+ return context;
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+ return context->hashObj->begin(context->hashCtxt);
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf,
+ unsigned int len)
+{
+ return context->hashObj->update(context->hashCtxt, buf, len);
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf,
+ unsigned int *ret, unsigned int len)
+{
+ return context->hashObj->end(context->hashCtxt, buf, ret, len);
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+ context->hashObj->destroy(context->hashCtxt, PR_TRUE);
+ PORT_Free(context);
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+ return context->hashObj->length;
+}
diff --git a/security/nss/lib/freebl/nsslowhash.h b/security/nss/lib/freebl/nsslowhash.h
new file mode 100644
index 0000000000..d8f058715b
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Provide FIPS validated hashing for applications that only need hashing.
+ * NOTE: mac'ing requires keys and will not work in this interface.
+ * Also NOTE: this only works with Hashing. Only the FIPS interface is enabled.
+ */
+
+#ifndef _NSSLOWHASH_H_
+#define _NSSLOWHASH_H_
+
+typedef struct NSSLOWInitContextStr NSSLOWInitContext;
+typedef struct NSSLOWHASHContextStr NSSLOWHASHContext;
+
+NSSLOWInitContext *NSSLOW_Init(void);
+void NSSLOW_Shutdown(NSSLOWInitContext *context);
+void NSSLOW_Reset(NSSLOWInitContext *context);
+NSSLOWHASHContext *NSSLOWHASH_NewContext(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType);
+void NSSLOWHASH_Begin(NSSLOWHASHContext *context);
+void NSSLOWHASH_Update(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len);
+void NSSLOWHASH_End(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len);
+void NSSLOWHASH_Destroy(NSSLOWHASHContext *context);
+unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context);
+
+#endif
diff --git a/security/nss/lib/freebl/ppc-crypto.h b/security/nss/lib/freebl/ppc-crypto.h
new file mode 100644
index 0000000000..4d283895f2
--- /dev/null
+++ b/security/nss/lib/freebl/ppc-crypto.h
@@ -0,0 +1,31 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef PPC_CRYPTO_H
+#define PPC_CRYPTO_H 1
+
+#if defined(__powerpc64__) && defined(__ALTIVEC__) && \
+ !defined(NSS_DISABLE_ALTIVEC)
+#include "altivec-types.h"
+
+/* The ghash freebl test tries to use this in C++, and gcc defines conflict. */
+#ifdef __cplusplus
+#undef pixel
+#undef vector
+#undef bool
+#endif
+
+/*
+ * PPC CRYPTO requires at least gcc 8 or clang. The LE check is purely
+ * because it's only been tested on LE. If you're interested in BE,
+ * please send a patch.
+ */
+#if (defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 8)) && \
+ defined(IS_LITTLE_ENDIAN) && defined(__VSX__)
+#define USE_PPC_CRYPTO
+#endif
+
+#endif /* defined(__powerpc64__) && !defined(NSS_DISABLE_ALTIVEC) && defined(__ALTIVEC__) */
+
+#endif
diff --git a/security/nss/lib/freebl/ppc-gcm-wrap.c b/security/nss/lib/freebl/ppc-gcm-wrap.c
new file mode 100644
index 0000000000..ac58744cbd
--- /dev/null
+++ b/security/nss/lib/freebl/ppc-gcm-wrap.c
@@ -0,0 +1,458 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Copyright(c) 2013, Intel Corp. */
+
+/* Wrapper functions for PowerPC optimized implementation of AES-GCM */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+#include <stdio.h>
+
+#include "ppc-gcm.h"
+#include "rijndael.h"
+
+struct ppc_AES_GCMContextStr {
+ unsigned char Htbl[8 * AES_BLOCK_SIZE];
+ unsigned char X0[AES_BLOCK_SIZE];
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned char CTR[AES_BLOCK_SIZE];
+ AESContext *aes_context;
+ unsigned long tagBits;
+ unsigned long Alen;
+ unsigned long Mlen;
+ freeblCipherFunc cipher;
+ PRBool ctr_context_init;
+ gcmIVContext gcm_iv;
+};
+
+SECStatus ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm,
+ const unsigned char *iv,
+ unsigned long ivLen, unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen);
+
+ppc_AES_GCMContext *
+ppc_AES_GCM_CreateContext(void *context,
+ freeblCipherFunc cipher,
+ const unsigned char *params)
+{
+ ppc_AES_GCMContext *gcm = NULL;
+ AESContext *aes = (AESContext *)context;
+ const CK_NSS_GCM_PARAMS *gcmParams = (const CK_NSS_GCM_PARAMS *)params;
+ SECStatus rv;
+
+ gcm = PORT_ZNew(ppc_AES_GCMContext);
+ if (gcm == NULL) {
+ return NULL;
+ }
+
+ /* initialize context fields */
+ gcm->aes_context = aes;
+ gcm->cipher = cipher;
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+ gcm->ctr_context_init = PR_FALSE;
+
+ /* first prepare H and its derivatives for ghash */
+ ppc_aes_gcmINIT(gcm->Htbl, aes->k.expandedKey, aes->Nr);
+
+ gcm_InitIVContext(&gcm->gcm_iv);
+
+ /* if gcmParams is NULL, then we are creating an PKCS #11 MESSAGE
+ * style context, in which we initialize the key once, then do separate
+ * iv/aad's for each message. If we are doing that kind of operation,
+ * we've finished with init here. We'll init the Counter in each AEAD
+ * call */
+ if (gcmParams == NULL) {
+ return gcm;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv,
+ gcmParams->ulIvLen, gcmParams->ulTagBits,
+ gcmParams->pAAD, gcmParams->ulAADLen);
+ if (rv != SECSuccess) {
+ PORT_Free(gcm);
+ return NULL;
+ }
+ gcm->ctr_context_init = PR_TRUE;
+
+ return gcm;
+}
+
+SECStatus
+ppc_aes_gcmInitCounter(ppc_AES_GCMContext *gcm,
+ const unsigned char *iv, unsigned long ivLen,
+ unsigned long tagBits,
+ const unsigned char *aad, unsigned long aadLen)
+{
+ unsigned int j;
+ SECStatus rv;
+
+ if (ivLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (tagBits != 128 && tagBits != 120 && tagBits != 112 &&
+ tagBits != 104 && tagBits != 96 && tagBits != 64 &&
+ tagBits != 32) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ gcm->tagBits = tagBits;
+
+ /* reset the aad and message length counters */
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+
+ /* Initial TAG value is zero */
+ PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE);
+ PORT_Memset(gcm->X0, 0, AES_BLOCK_SIZE);
+
+ /* Init the counter */
+ if (ivLen == 12) {
+ PORT_Memcpy(gcm->CTR, iv, AES_BLOCK_SIZE - 4);
+ gcm->CTR[12] = 0;
+ gcm->CTR[13] = 0;
+ gcm->CTR[14] = 0;
+ gcm->CTR[15] = 1;
+ } else {
+ /* If IV size is not 96 bits, then the initial counter value is GHASH
+ * of the IV */
+ ppc_aes_gcmHASH(gcm->Htbl, iv, ivLen, gcm->T);
+
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ ivLen,
+ 0,
+ gcm->X0,
+ gcm->CTR);
+
+ /* TAG should be zero again */
+ PORT_Memset(gcm->T, 0, AES_BLOCK_SIZE);
+ }
+
+ /* Encrypt the initial counter, will be used to encrypt the GHASH value,
+ * in the end */
+ rv = (*gcm->cipher)(gcm->aes_context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ /* Promote the counter by 1 */
+ gcm->CTR[14] += !(++gcm->CTR[15]);
+ gcm->CTR[13] += !(gcm->CTR[15]) && !(gcm->CTR[14]);
+ gcm->CTR[12] += !(gcm->CTR[15]) && !(gcm->CTR[14]) && !(gcm->CTR[13]);
+
+ /* Now hash AAD - it would actually make sense to seperate the context
+ * creation from the AAD, because that would allow to reuse the H, which
+ * only changes when the AES key changes, and not every package, like the
+ * IV and AAD */
+ ppc_aes_gcmHASH(gcm->Htbl, aad, aadLen, gcm->T);
+ gcm->Alen += aadLen;
+ return SECSuccess;
+}
+
+void
+ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit)
+{
+ PORT_Memset(gcm, 0, sizeof(ppc_AES_GCMContext));
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+SECStatus
+ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned int j;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ ppc_aes_gcmCRYPT(
+ inbuf,
+ outbuf,
+ inlen,
+ gcm->CTR,
+ gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+ ppc_aes_gcmHASH(
+ gcm->Htbl,
+ outbuf,
+ inlen,
+ gcm->T);
+
+ gcm->Mlen += inlen;
+
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ *outlen = inlen + tagBytes;
+
+ for (j = 0; j < tagBytes; j++) {
+ outbuf[inlen + j] = T[j];
+ }
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+
+ if (!gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ ppc_aes_gcmHASH(
+ gcm->Htbl,
+ inbuf,
+ inlen,
+ gcm->T);
+ ppc_aes_gcmCRYPT(
+ inbuf,
+ outbuf,
+ inlen,
+ gcm->CTR,
+ gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+
+ gcm->Mlen += inlen;
+ ppc_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* if we were initialized with the C_EncryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = gcm_GenerateIV(&gcm->gcm_iv, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulIvFixedBits, gcmParams->ivGenerator);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+ ppc_aes_gcmHASH(gcm->Htbl, outbuf, inlen, gcm->T);
+
+ gcm->Mlen += inlen;
+
+ ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ *outlen = inlen;
+ PORT_Memcpy(gcmParams->pTag, T, tagBytes);
+ return SECSuccess;
+}
+
+SECStatus
+ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+ const CK_GCM_MESSAGE_PARAMS *gcmParams =
+ (const CK_GCM_MESSAGE_PARAMS *)params;
+ SECStatus rv;
+
+ /* paramLen comes all the way from the application layer, make sure
+ * it's correct */
+ if (paramLen != sizeof(CK_GCM_MESSAGE_PARAMS)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* if we were initialized with the C_DecryptInit, we shouldn't be in this
+ * function */
+ if (gcm->ctr_context_init) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ // GCM has a 16 octet block, with a 32-bit block counter
+ // Limit in accordance with SP800-38D
+ if (sizeof(inlen) > 4 &&
+ inlen >= ((1ULL << 32) - 2) * AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = ppc_aes_gcmInitCounter(gcm, gcmParams->pIv, gcmParams->ulIvLen,
+ gcmParams->ulTagBits, aad, aadLen);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ intag = gcmParams->pTag;
+ PORT_Assert(tagBytes != 0);
+
+ ppc_aes_gcmHASH(gcm->Htbl, inbuf, inlen, gcm->T);
+ ppc_aes_gcmCRYPT(inbuf, outbuf, inlen, gcm->CTR, gcm->aes_context->k.expandedKey,
+ gcm->aes_context->Nr);
+
+ gcm->Mlen += inlen;
+ ppc_aes_gcmTAG(gcm->Htbl, gcm->T, gcm->Mlen, gcm->Alen, gcm->X0, T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/ppc-gcm.h b/security/nss/lib/freebl/ppc-gcm.h
new file mode 100644
index 0000000000..1d94c0c338
--- /dev/null
+++ b/security/nss/lib/freebl/ppc-gcm.h
@@ -0,0 +1,76 @@
+/******************************************************************************/
+/* LICENSE: */
+/* This submission to NSS is to be made available under the terms of the */
+/* Mozilla Public License, v. 2.0. You can obtain one at http: */
+/* //mozilla.org/MPL/2.0/. */
+/******************************************************************************/
+
+#ifndef PPC_GCM_H
+#define PPC_GCM_H 1
+
+#include "blapii.h"
+
+typedef struct ppc_AES_GCMContextStr ppc_AES_GCMContext;
+
+ppc_AES_GCMContext *ppc_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params);
+
+void ppc_AES_GCM_DestroyContext(ppc_AES_GCMContext *gcm, PRBool freeit);
+
+SECStatus ppc_AES_GCM_EncryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+SECStatus ppc_AES_GCM_DecryptUpdate(ppc_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus ppc_AES_GCM_EncryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+SECStatus ppc_AES_GCM_DecryptAEAD(ppc_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ void *params, unsigned int paramLen,
+ const unsigned char *aad, unsigned int aadLen,
+ unsigned int blocksize);
+
+/* Prototypes of the functions defined in the assembler file. */
+
+/* Prepares the constants used in the aggregated reduction method */
+void ppc_aes_gcmINIT(unsigned char Htbl[8 * 16],
+ PRUint32 *KS,
+ int NR);
+
+/* Produces the final GHASH value */
+void ppc_aes_gcmTAG(unsigned char Htbl[8 * 16],
+ unsigned char *Tp,
+ unsigned long Mlen,
+ unsigned long Alen,
+ unsigned char *X0,
+ unsigned char *TAG);
+
+/* Hashes the Additional Authenticated Data, should be used before enc/dec.
+ Operates on any length of data. Partial block is padded internally. */
+void ppc_aes_gcmHASH(unsigned char Htbl[8 * 16],
+ const unsigned char *AAD,
+ unsigned long Alen,
+ unsigned char *Tp);
+
+/* Crypt only, used in combination with ppc_aes_gcmAAD().
+ Operates on any length of data, however partial block should only be encrypted
+ at the last call, otherwise the result will be incorrect. */
+void ppc_aes_gcmCRYPT(const unsigned char *PT,
+ unsigned char *CT,
+ unsigned long len,
+ unsigned char *CTRP,
+ PRUint32 *KS,
+ int NR);
+
+#endif
diff --git a/security/nss/lib/freebl/ppc-gcm.s b/security/nss/lib/freebl/ppc-gcm.s
new file mode 100644
index 0000000000..06ad5862c1
--- /dev/null
+++ b/security/nss/lib/freebl/ppc-gcm.s
@@ -0,0 +1,1051 @@
+# This submission to NSS is to be made available under the terms of the
+# Mozilla Public License, v. 2.0. You can obtain one at //mozilla.org/MPL/2.0/
+# Copyright(c) 2021, Niels Möller and Mamone Tarsha
+
+# Registers:
+
+.set SP, 1
+.set TOCP, 2
+
+.macro VEC_LOAD_DATA VR, DATA, GPR
+ addis \GPR, 2, \DATA@got@ha
+ ld \GPR, \DATA@got@l(\GPR)
+ lvx \VR, 0, \GPR
+.endm
+
+.macro VEC_LOAD VR, GPR, IDX
+ lxvd2x \VR+32, \IDX, \GPR
+ vperm \VR, \VR, \VR, SWAP_MASK
+.endm
+
+.macro VEC_LOAD_INC VR, GPR, IDX
+ lxvd2x \VR+32, \IDX, \GPR
+ addi \IDX,\IDX,16
+ vperm \VR, \VR, \VR, SWAP_MASK
+.endm
+
+.macro VEC_STORE VR, GPR, IDX
+ vperm \VR, \VR, \VR, SWAP_MASK
+ stxvd2x \VR+32, \IDX, \GPR
+.endm
+
+# 0 < LEN < 16, pad the remaining bytes with zeros
+.macro LOAD_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2
+ li \TMP0, 0
+ li \VAL1, 0
+ li \VAL0, 0
+ andi. \TMP1, \LEN, 8
+ beq 1f
+ ldbrx \VAL1, 0, \DATA
+ li \TMP0, 8
+1:
+ andi. \TMP1, \LEN, 7
+ beq 3f
+ li \TMP1, 56
+2:
+ lbzx \TMP2, \TMP0, \DATA
+ sld \TMP2, \TMP2, \TMP1
+ subi \TMP1, \TMP1, 8
+ or \VAL0, \VAL0, \TMP2
+ addi \TMP0, \TMP0, 1
+ cmpld \TMP0, \LEN
+ bne 2b
+ andi. \TMP1, \LEN, 8
+ bne 3f
+ mr \VAL1, \VAL0
+ li \VAL0, 0
+3:
+.endm
+
+# 0 < LEN < 16
+.macro STORE_LEN DATA, LEN, VAL1, VAL0, TMP0, TMP1, TMP2
+ andi. \TMP1, \LEN, 8
+ beq 1f
+ stdbrx \VAL1, 0, \DATA
+ li \TMP0, 8
+ b 2f
+1:
+ li \TMP0, 0
+ mr \VAL0, \VAL1
+2:
+ andi. \TMP1, \LEN, 7
+ beq 4f
+ li \TMP1, 56
+3:
+ srd \TMP2, \VAL0, \TMP1
+ subi \TMP1, \TMP1, 8
+ stbx \TMP2, \TMP0, \DATA
+ addi \TMP0, \TMP0, 1
+ cmpld \TMP0, \LEN
+ bne 3b
+4:
+.endm
+
+.text
+
+################################################################################
+# Generates the H table
+# void ppc_aes_gcmINIT(uint8_t Htbl[16*8], uint32_t *KS, int NR);
+.globl ppc_aes_gcmINIT
+.type ppc_aes_gcmINIT,@function
+.align 5
+ppc_aes_gcmINIT:
+addis TOCP,12,(.TOC.-ppc_aes_gcmINIT)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmINIT)@l
+.localentry ppc_aes_gcmINIT, .-ppc_aes_gcmINIT
+
+.set Htbl, 3
+.set KS, 4
+.set NR, 5
+
+.set ZERO, 19
+.set MSB, 18
+.set ONE, 17
+.set SWAP_MASK, 0
+.set POLY, 1
+.set K, 2
+.set H, 3
+.set H2, 4
+.set H3, 5
+.set H4, 6
+.set HP, 7
+.set HS, 8
+.set R, 9
+.set F, 10
+.set T, 11
+.set H1M, 12
+.set H1L, 13
+.set H2M, 14
+.set H2L, 15
+.set H3M, 16
+.set H3L, 17
+.set H4M, 18
+.set H4L, 19
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 6
+ VEC_LOAD_DATA POLY, .Lpoly, 6
+
+ li 6, 0
+ VEC_LOAD_INC H, KS, 6
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ cmpwi NR, 10
+ beq .LH_done
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ cmpwi NR, 12
+ beq .LH_done
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+ VEC_LOAD_INC K, KS, 6
+ vcipher H, H, K
+
+.LH_done:
+ VEC_LOAD K, KS, 6
+ vcipherlast H, H, K
+
+ vupkhsb MSB, H
+ vspltisb ONE, 1
+ vspltb MSB, MSB, 0
+ vsl H, H, ONE
+ vand MSB, MSB, POLY
+ vxor ZERO, ZERO, ZERO
+ vxor H, H, MSB
+ vsldoi POLY, ZERO, POLY, 8
+
+ vpmsumd HP, H, POLY
+ vsldoi HS, H, H, 8
+ vxor HP, HP, HS
+ vsldoi H1L, HP, HS, 8
+ vsldoi H1M, HS, HP, 8
+ vsldoi H1L, H1L, H1L, 8
+
+ # calculate H^2
+
+ vpmsumd F, H, H1L
+ vpmsumd R, H, H1M
+
+ vpmsumd T, F, POLY
+ vsldoi H2, F, F, 8
+ vxor R, R, T
+ vxor H2, H2, R
+
+ vpmsumd HP, H2, POLY
+ vsldoi HS, H2, H2, 8
+ vxor HP, HP, HS
+ vsldoi H2L, HP, HS, 8
+ vsldoi H2M, HS, HP, 8
+ vsldoi H2L, H2L, H2L, 8
+
+ # calculate H^3
+
+ vpmsumd F, H2, H1L
+ vpmsumd R, H2, H1M
+
+ vpmsumd T, F, POLY
+ vsldoi H3, F, F, 8
+ vxor R, R, T
+ vxor H3, H3, R
+
+ vpmsumd HP, H3, POLY
+ vsldoi HS, H3, H3, 8
+ vxor HP, HP, HS
+ vsldoi H3L, HP, HS, 8
+ vsldoi H3M, HS, HP, 8
+ vsldoi H3L, H3L, H3L, 8
+
+ # calculate H^4
+
+ vpmsumd F, H2, H2L
+ vpmsumd R, H2, H2M
+
+ vpmsumd T, F, POLY
+ vsldoi H4, F, F, 8
+ vxor R, R, T
+ vxor H4, H4, R
+
+ vpmsumd HP, H4, POLY
+ vsldoi HS, H4, H4, 8
+ vxor HP, HP, HS
+ vsldoi H4L, HP, HS, 8
+ vsldoi H4M, HS, HP, 8
+ vsldoi H4L, H4L, H4L, 8
+
+ li 8, 16*1
+ li 9, 16*2
+ li 10, 16*3
+ stxvd2x H1L+32, 0, Htbl
+ stxvd2x H1M+32, 8, Htbl
+ stxvd2x H2L+32, 9, Htbl
+ stxvd2x H2M+32, 10, Htbl
+ li 7, 16*4
+ li 8, 16*5
+ li 9, 16*6
+ li 10, 16*7
+ stxvd2x H3L+32, 7, Htbl
+ stxvd2x H3M+32, 8, Htbl
+ stxvd2x H4L+32, 9, Htbl
+ stxvd2x H4M+32, 10, Htbl
+
+ blr
+.size ppc_aes_gcmINIT, . - ppc_aes_gcmINIT
+
+################################################################################
+# Authenticate only
+# void ppc_aes_gcmHASH(uint8_t Htbl[16*8], uint8_t *AAD, uint64_t Alen, uint8_t *Tp);
+.globl ppc_aes_gcmHASH
+.type ppc_aes_gcmHASH,@function
+.align 5
+ppc_aes_gcmHASH:
+addis TOCP,12,(.TOC.-ppc_aes_gcmHASH)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmHASH)@l
+.localentry ppc_aes_gcmHASH, .-ppc_aes_gcmHASH
+
+.set Htbl, 3
+.set AAD, 4
+.set Alen, 5
+.set Tp, 6
+
+.set SWAP_MASK, 0
+.set POLY, 1
+.set D, 2
+.set C0, 3
+.set C1, 4
+.set C2, 5
+.set C3, 6
+.set T, 7
+.set R, 8
+.set F, 9
+.set R2, 10
+.set F2, 11
+.set R3, 12
+.set F3, 13
+.set R4, 14
+.set F4, 15
+.set H1M, 16
+.set H1L, 17
+.set H2M, 18
+.set H2L, 19
+.set H3M, 28
+.set H3L, 29
+.set H4M, 30
+.set H4L, 31
+
+ # store non-volatile vector registers
+ addi 7, SP, -16
+ stvx 31, 0, 7
+ addi 7, SP, -32
+ stvx 30, 0, 7
+ addi 7, SP, -48
+ stvx 29, 0, 7
+ addi 7, SP, -64
+ stvx 28, 0, 7
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 7
+ VEC_LOAD_DATA POLY, .Lpoly_r, 7
+
+ VEC_LOAD D, Tp, 0
+
+ # --- process 4 blocks ---
+
+ srdi. 7, Alen, 6 # 4-blocks loop count
+ beq .L2x
+
+ mtctr 7 # set counter register
+
+ # load table elements
+ li 8, 1*16
+ li 9, 2*16
+ li 10, 3*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+ lxvd2x H2L+32, 9, Htbl
+ lxvd2x H2M+32, 10, Htbl
+ li 7, 4*16
+ li 8, 5*16
+ li 9, 6*16
+ li 10, 7*16
+ lxvd2x H3L+32, 7, Htbl
+ lxvd2x H3M+32, 8, Htbl
+ lxvd2x H4L+32, 9, Htbl
+ lxvd2x H4M+32, 10, Htbl
+
+ li 8, 0x10
+ li 9, 0x20
+ li 10, 0x30
+.align 5
+.L4x_loop:
+ # load input
+ lxvd2x C0+32, 0, AAD
+ lxvd2x C1+32, 8, AAD
+ lxvd2x C2+32, 9, AAD
+ lxvd2x C3+32, 10, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+ vperm C1, C1, C1, SWAP_MASK
+ vperm C2, C2, C2, SWAP_MASK
+ vperm C3, C3, C3, SWAP_MASK
+
+ # digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F2, H3L, C1
+ vpmsumd R2, H3M, C1
+ vpmsumd F3, H2L, C2
+ vpmsumd R3, H2M, C2
+ vpmsumd F4, H1L, C3
+ vpmsumd R4, H1M, C3
+ vpmsumd F, H4L, C0
+ vpmsumd R, H4M, C0
+
+ # deferred recombination of partial products
+ vxor F3, F3, F4
+ vxor R3, R3, R4
+ vxor F, F, F2
+ vxor R, R, R2
+ vxor F, F, F3
+ vxor R, R, R3
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x40
+ bdnz .L4x_loop
+
+ clrldi Alen, Alen, 58
+.L2x:
+ # --- process 2 blocks ---
+
+ srdi. 7, Alen, 5
+ beq .L1x
+
+ # load table elements
+ li 8, 1*16
+ li 9, 2*16
+ li 10, 3*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+ lxvd2x H2L+32, 9, Htbl
+ lxvd2x H2M+32, 10, Htbl
+
+ # load input
+ li 10, 0x10
+ lxvd2x C0+32, 0, AAD
+ lxvd2x C1+32, 10, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+ vperm C1, C1, C1, SWAP_MASK
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F2, H1L, C1
+ vpmsumd R2, H1M, C1
+ vpmsumd F, H2L, C0
+ vpmsumd R, H2M, C0
+
+ # deferred recombination of partial products
+ vxor F, F, F2
+ vxor R, R, R2
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x20
+ clrldi Alen, Alen, 59
+.L1x:
+ # --- process 1 block ---
+
+ srdi. 7, Alen, 4
+ beq .Ltail
+
+ # load table elements
+ li 8, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+
+ # load input
+ lxvd2x C0+32, 0, AAD
+
+ vperm C0, C0, C0, SWAP_MASK
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ addi AAD, AAD, 0x10
+ clrldi Alen, Alen, 60
+
+.Ltail:
+ cmpldi Alen, 0
+ beq .Lh_done
+ # --- process the final partial block ---
+
+ # load table elements
+ li 8, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 8, Htbl
+
+ LOAD_LEN AAD, Alen, 10, 9, 3, 7, 8
+ mtvrd C0, 10
+ mtvrd C1, 9
+ xxmrghd C0+32, C0+32, C1+32
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+.Lh_done:
+ VEC_STORE D, Tp, 0
+
+ # restore non-volatile vector registers
+ addi 7, SP, -16
+ lvx 31, 0, 7
+ addi 7, SP, -32
+ lvx 30, 0, 7
+ addi 7, SP, -48
+ lvx 29, 0, 7
+ addi 7, SP, -64
+ lvx 28, 0, 7
+ blr
+.size ppc_aes_gcmHASH, . - ppc_aes_gcmHASH
+
+################################################################################
+# Generates the final GCM tag
+# void ppc_aes_gcmTAG(uint8_t Htbl[16*8], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG);
+.globl ppc_aes_gcmTAG
+.type ppc_aes_gcmTAG,@function
+.align 5
+ppc_aes_gcmTAG:
+addis TOCP,12,(.TOC.-ppc_aes_gcmTAG)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmTAG)@l
+.localentry ppc_aes_gcmTAG, .-ppc_aes_gcmTAG
+
+.set Htbl, 3
+.set Tp, 4
+.set Mlen, 5
+.set Alen, 6
+.set X0, 7
+.set TAG, 8
+
+.set SWAP_MASK, 0
+.set POLY, 1
+.set D, 2
+.set C0, 3
+.set C1, 4
+.set T, 5
+.set R, 6
+.set F, 7
+.set H1M, 8
+.set H1L, 9
+.set X, 10
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9
+ VEC_LOAD_DATA POLY, .Lpoly_r, 9
+
+ VEC_LOAD D, Tp, 0
+
+ # load table elements
+ li 9, 1*16
+ lxvd2x H1L+32, 0, Htbl
+ lxvd2x H1M+32, 9, Htbl
+
+ sldi Alen, Alen, 3
+ sldi Mlen, Mlen, 3
+ mtvrd C0, Alen
+ mtvrd C1, Mlen
+ xxmrghd C0+32, C0+32, C1+32
+
+ # previous digest combining
+ vxor C0, C0, D
+
+ # polynomial multiplication
+ vpmsumd F, H1L, C0
+ vpmsumd R, H1M, C0
+
+ # reduction
+ vpmsumd T, F, POLY
+ vsldoi D, F, F, 8
+ vxor R, R, T
+ vxor D, R, D
+
+ lxvd2x X+32, 0, X0
+ vperm D, D, D, SWAP_MASK
+ vxor X, X, D
+ stxvd2x X+32, 0, TAG
+
+ blr
+.size ppc_aes_gcmTAG, . - ppc_aes_gcmTAG
+
+################################################################################
+# Crypt only
+# void ppc_aes_gcmCRYPT(const uint8_t* PT, uint8_t* CT, uint64_t LEN, uint8_t *CTRP, uint32_t *KS, int NR);
+.globl ppc_aes_gcmCRYPT
+.type ppc_aes_gcmCRYPT,@function
+.align 5
+ppc_aes_gcmCRYPT:
+addis TOCP,12,(.TOC.-ppc_aes_gcmCRYPT)@ha
+addi TOCP,TOCP,(.TOC.-ppc_aes_gcmCRYPT)@l
+.localentry ppc_aes_gcmCRYPT, .-ppc_aes_gcmCRYPT
+
+.set PT, 3
+.set CT, 4
+.set LEN, 5
+.set CTRP, 6
+.set KS, 7
+.set NR, 8
+
+.set SWAP_MASK, 0
+.set K, 1
+.set CTR, 2
+.set CTR0, 3
+.set CTR1, 4
+.set CTR2, 5
+.set CTR3, 6
+.set CTR4, 7
+.set CTR5, 8
+.set CTR6, 9
+.set CTR7, 10
+.set ZERO, 11
+.set I1, 12
+.set I2, 13
+.set I3, 14
+.set I4, 15
+.set I5, 16
+.set I6, 17
+.set I7, 18
+.set I8, 19
+.set IN0, 24
+.set IN1, 25
+.set IN2, 26
+.set IN3, 27
+.set IN4, 28
+.set IN5, 29
+.set IN6, 30
+.set IN7, 31
+
+.macro ROUND_8
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+ vcipher CTR2, CTR2, K
+ vcipher CTR3, CTR3, K
+ vcipher CTR4, CTR4, K
+ vcipher CTR5, CTR5, K
+ vcipher CTR6, CTR6, K
+ vcipher CTR7, CTR7, K
+.endm
+
+.macro ROUND_4
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+ vcipher CTR2, CTR2, K
+ vcipher CTR3, CTR3, K
+.endm
+
+.macro ROUND_2
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+ vcipher CTR1, CTR1, K
+.endm
+
+.macro ROUND_1
+ VEC_LOAD_INC K, KS, 10
+ vcipher CTR0, CTR0, K
+.endm
+
+ # store non-volatile general registers
+ std 31,-8(SP);
+ std 30,-16(SP);
+ std 29,-24(SP);
+ std 28,-32(SP);
+ std 27,-40(SP);
+ std 26,-48(SP);
+ std 25,-56(SP);
+
+ # store non-volatile vector registers
+ addi 9, SP, -80
+ stvx 31, 0, 9
+ addi 9, SP, -96
+ stvx 30, 0, 9
+ addi 9, SP, -112
+ stvx 29, 0, 9
+ addi 9, SP, -128
+ stvx 28, 0, 9
+ addi 9, SP, -144
+ stvx 27, 0, 9
+ addi 9, SP, -160
+ stvx 26, 0, 9
+ addi 9, SP, -176
+ stvx 25, 0, 9
+ addi 9, SP, -192
+ stvx 24, 0, 9
+
+ VEC_LOAD_DATA SWAP_MASK, .Ldb_bswap_mask, 9
+
+ vxor ZERO, ZERO, ZERO
+ vspltisb I1, 1
+ vspltisb I2, 2
+ vspltisb I3, 3
+ vspltisb I4, 4
+ vspltisb I5, 5
+ vspltisb I6, 6
+ vspltisb I7, 7
+ vspltisb I8, 8
+ vsldoi I1, ZERO, I1, 1
+ vsldoi I2, ZERO, I2, 1
+ vsldoi I3, ZERO, I3, 1
+ vsldoi I4, ZERO, I4, 1
+ vsldoi I5, ZERO, I5, 1
+ vsldoi I6, ZERO, I6, 1
+ vsldoi I7, ZERO, I7, 1
+ vsldoi I8, ZERO, I8, 1
+
+ VEC_LOAD CTR, CTRP, 0
+
+ srdi. 9, LEN, 7
+ beq .Lctr_4x
+
+ mtctr 9
+
+ li 25, 0x10
+ li 26, 0x20
+ li 27, 0x30
+ li 28, 0x40
+ li 29, 0x50
+ li 30, 0x60
+ li 31, 0x70
+
+.align 5
+.L8x_loop:
+ li 10, 0
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+ vadduwm CTR2, CTR, I2
+ vadduwm CTR3, CTR, I3
+ vadduwm CTR4, CTR, I4
+ vadduwm CTR5, CTR, I5
+ vadduwm CTR6, CTR, I6
+ vadduwm CTR7, CTR, I7
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+ vxor CTR2, CTR2, K
+ vxor CTR3, CTR3, K
+ vxor CTR4, CTR4, K
+ vxor CTR5, CTR5, K
+ vxor CTR6, CTR6, K
+ vxor CTR7, CTR7, K
+
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ ROUND_8
+ cmpwi NR, 10
+ beq .Llast_8
+ ROUND_8
+ ROUND_8
+ cmpwi NR, 12
+ beq .Llast_8
+ ROUND_8
+ ROUND_8
+
+.Llast_8:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+ vcipherlast CTR2, CTR2, K
+ vcipherlast CTR3, CTR3, K
+ vcipherlast CTR4, CTR4, K
+ vcipherlast CTR5, CTR5, K
+ vcipherlast CTR6, CTR6, K
+ vcipherlast CTR7, CTR7, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 25, PT
+ lxvd2x IN2+32, 26, PT
+ lxvd2x IN3+32, 27, PT
+ lxvd2x IN4+32, 28, PT
+ lxvd2x IN5+32, 29, PT
+ lxvd2x IN6+32, 30, PT
+ lxvd2x IN7+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+ vperm CTR2, CTR2, CTR2, SWAP_MASK
+ vperm CTR3, CTR3, CTR3, SWAP_MASK
+ vperm CTR4, CTR4, CTR4, SWAP_MASK
+ vperm CTR5, CTR5, CTR5, SWAP_MASK
+ vperm CTR6, CTR6, CTR6, SWAP_MASK
+ vperm CTR7, CTR7, CTR7, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+ vxor IN2, IN2, CTR2
+ vxor IN3, IN3, CTR3
+ vxor IN4, IN4, CTR4
+ vxor IN5, IN5, CTR5
+ vxor IN6, IN6, CTR6
+ vxor IN7, IN7, CTR7
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 25, CT
+ stxvd2x IN2+32, 26, CT
+ stxvd2x IN3+32, 27, CT
+ stxvd2x IN4+32, 28, CT
+ stxvd2x IN5+32, 29, CT
+ stxvd2x IN6+32, 30, CT
+ stxvd2x IN7+32, 31, CT
+
+ vadduwm CTR, CTR, I8
+ addi PT, PT, 0x80
+ addi CT, CT, 0x80
+ bdnz .L8x_loop
+
+ clrldi LEN, LEN, 57
+
+.Lctr_4x:
+ srdi. 9, LEN, 6
+ beq .Lctr_2x
+
+ li 10, 0
+ li 29, 0x10
+ li 30, 0x20
+ li 31, 0x30
+
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+ vadduwm CTR2, CTR, I2
+ vadduwm CTR3, CTR, I3
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+ vxor CTR2, CTR2, K
+ vxor CTR3, CTR3, K
+
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ ROUND_4
+ cmpwi NR, 10
+ beq .Llast_4
+ ROUND_4
+ ROUND_4
+ cmpwi NR, 12
+ beq .Llast_4
+ ROUND_4
+ ROUND_4
+
+.Llast_4:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+ vcipherlast CTR2, CTR2, K
+ vcipherlast CTR3, CTR3, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 29, PT
+ lxvd2x IN2+32, 30, PT
+ lxvd2x IN3+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+ vperm CTR2, CTR2, CTR2, SWAP_MASK
+ vperm CTR3, CTR3, CTR3, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+ vxor IN2, IN2, CTR2
+ vxor IN3, IN3, CTR3
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 29, CT
+ stxvd2x IN2+32, 30, CT
+ stxvd2x IN3+32, 31, CT
+
+ vadduwm CTR, CTR, I4
+ addi PT, PT, 0x40
+ addi CT, CT, 0x40
+
+ clrldi LEN, LEN, 58
+
+.Lctr_2x:
+ srdi. 9, LEN, 5
+ beq .Lctr_1x
+
+ li 10, 0
+ li 31, 0x10
+
+ VEC_LOAD_INC K, KS, 10
+
+ vadduwm CTR1, CTR, I1
+
+ vxor CTR0, CTR, K
+ vxor CTR1, CTR1, K
+
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ ROUND_2
+ cmpwi NR, 10
+ beq .Llast_2
+ ROUND_2
+ ROUND_2
+ cmpwi NR, 12
+ beq .Llast_2
+ ROUND_2
+ ROUND_2
+
+.Llast_2:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+ vcipherlast CTR1, CTR1, K
+
+ lxvd2x IN0+32, 0, PT
+ lxvd2x IN1+32, 31, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+ vperm CTR1, CTR1, CTR1, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+ vxor IN1, IN1, CTR1
+
+ stxvd2x IN0+32, 0, CT
+ stxvd2x IN1+32, 31, CT
+
+ vadduwm CTR, CTR, I2
+ addi PT, PT, 0x20
+ addi CT, CT, 0x20
+
+ clrldi LEN, LEN, 59
+
+.Lctr_1x:
+ srdi. 9, LEN, 4
+ beq .Lctr_tail
+
+ li 10, 0
+
+ VEC_LOAD_INC K, KS, 10
+ vxor CTR0, CTR, K
+
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 10
+ beq .Llast_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 12
+ beq .Llast_1
+ ROUND_1
+ ROUND_1
+
+.Llast_1:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+
+ lxvd2x IN0+32, 0, PT
+
+ vperm CTR0, CTR0, CTR0, SWAP_MASK
+
+ vxor IN0, IN0, CTR0
+
+ stxvd2x IN0+32, 0, CT
+
+ vadduwm CTR, CTR, I1
+ addi PT, PT, 0x10
+ addi CT, CT, 0x10
+
+ clrldi LEN, LEN, 60
+
+.Lctr_tail:
+ cmpldi LEN, 0
+ beq .Lc_done
+
+ li 10, 0
+
+ VEC_LOAD_INC K, KS, 10
+ vxor CTR0, CTR, K
+
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 10
+ beq .Llast_tail
+ ROUND_1
+ ROUND_1
+ cmpwi NR, 12
+ beq .Llast_tail
+ ROUND_1
+ ROUND_1
+
+.Llast_tail:
+ VEC_LOAD K, KS, 10
+ vcipherlast CTR0, CTR0, K
+
+ LOAD_LEN PT, LEN, 10, 9, 29, 30, 31
+
+ vsldoi CTR1, CTR0, CTR0, 8
+ mfvrd 31, CTR0
+ mfvrd 30, CTR1
+
+ xor 10, 10, 31
+ xor 9, 9, 30
+
+ STORE_LEN CT, LEN, 10, 9, 29, 30, 31
+
+ vadduwm CTR, CTR, I1
+
+.Lc_done:
+ VEC_STORE CTR, CTRP, 0
+
+ # restore non-volatile vector registers
+ addi 9, SP, -80
+ lvx 31, 0, 9
+ addi 9, SP, -96
+ lvx 30, 0, 9
+ addi 9, SP, -112
+ lvx 29, 0, 9
+ addi 9, SP, -128
+ lvx 28, 0, 9
+ addi 9, SP, -144
+ lvx 27, 0, 9
+ addi 9, SP, -160
+ lvx 26, 0, 9
+ addi 9, SP, -176
+ lvx 25, 0, 9
+ addi 9, SP, -192
+ lvx 24, 0, 9
+
+ # restore non-volatile general registers
+ ld 31,-8(SP);
+ ld 30,-16(SP);
+ ld 29,-24(SP);
+ ld 28,-32(SP);
+ ld 27,-40(SP);
+ ld 26,-48(SP);
+ ld 25,-56(SP);
+ blr
+.size ppc_aes_gcmCRYPT, . - ppc_aes_gcmCRYPT
+
+.data
+.align 4
+.Lpoly:
+ .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lpoly_r:
+ .byte 0,0,0,0,0,0,0,0xc2,0,0,0,0,0,0,0,0
+.Ldb_bswap_mask:
+ .byte 8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7
diff --git a/security/nss/lib/freebl/pqg.c b/security/nss/lib/freebl/pqg.c
new file mode 100644
index 0000000000..4fb113f906
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.c
@@ -0,0 +1,1926 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * PQG parameter generation/verification. Based on FIPS 186-3.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+
+#define MAX_ITERATIONS 1000 /* Maximum number of iterations of primegen */
+
+typedef enum {
+ FIPS186_1_TYPE, /* Probablistic */
+ FIPS186_3_TYPE, /* Probablistic */
+ FIPS186_3_ST_TYPE /* Shawe-Taylor provable */
+} pqgGenType;
+
+/*
+ * These test iterations are quite a bit larger than we previously had.
+ * This is because FIPS 186-3 is worried about the primes in PQG generation.
+ * It may be possible to purposefully construct composites which more
+ * iterations of Miller-Rabin than the for your normal randomly selected
+ * numbers.There are 3 ways to counter this: 1) use one of the cool provably
+ * prime algorithms (which would require a lot more work than DSA-2 deservers.
+ * 2) add a Lucas primality test (which requires coding a Lucas primality test,
+ * or 3) use a larger M-R test count. I chose the latter. It increases the time
+ * that it takes to prove the selected prime, but it shouldn't increase the
+ * overall time to run the algorithm (non-primes should still faile M-R
+ * realively quickly). If you want to get that last bit of performance,
+ * implement Lucas and adjust these two functions. See FIPS 186-3 Appendix C
+ * and F for more information.
+ */
+static int
+prime_testcount_p(int L, int N)
+{
+ switch (L) {
+ case 1024:
+ return 40;
+ case 2048:
+ return 56;
+ case 3072:
+ return 64;
+ default:
+ break;
+ }
+ return 50; /* L = 512-960 */
+}
+
+/* The q numbers are different if you run M-R followd by Lucas. I created
+ * a separate function so if someone wanted to add the Lucas check, they
+ * could do so fairly easily */
+static int
+prime_testcount_q(int L, int N)
+{
+ return prime_testcount_p(L, N);
+}
+
+/*
+ * generic function to make sure our input matches DSA2 requirements
+ * this gives us one place to go if we need to bump the requirements in the
+ * future.
+ */
+static SECStatus
+pqg_validate_dsa2(unsigned int L, unsigned int N)
+{
+
+ switch (L) {
+ case 1024:
+ if (N != DSA1_Q_BITS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ case 2048:
+ if ((N != 224) && (N != 256)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ case 3072:
+ if (N != 256) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static unsigned int
+pqg_get_default_N(unsigned int L)
+{
+ unsigned int N = 0;
+ switch (L) {
+ case 1024:
+ N = DSA1_Q_BITS;
+ break;
+ case 2048:
+ N = 224;
+ break;
+ case 3072:
+ N = 256;
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ break; /* N already set to zero */
+ }
+ return N;
+}
+
+/*
+ * Select the lowest hash algorithm usable
+ */
+static HASH_HashType
+getFirstHash(unsigned int L, unsigned int N)
+{
+ if (N < 224) {
+ return HASH_AlgSHA1;
+ }
+ if (N < 256) {
+ return HASH_AlgSHA224;
+ }
+ if (N < 384) {
+ return HASH_AlgSHA256;
+ }
+ if (N < 512) {
+ return HASH_AlgSHA384;
+ }
+ return HASH_AlgSHA512;
+}
+
+/*
+ * find the next usable hash algorthim
+ */
+static HASH_HashType
+getNextHash(HASH_HashType hashtype)
+{
+ switch (hashtype) {
+ case HASH_AlgSHA1:
+ hashtype = HASH_AlgSHA224;
+ break;
+ case HASH_AlgSHA224:
+ hashtype = HASH_AlgSHA256;
+ break;
+ case HASH_AlgSHA256:
+ hashtype = HASH_AlgSHA384;
+ break;
+ case HASH_AlgSHA384:
+ hashtype = HASH_AlgSHA512;
+ break;
+ case HASH_AlgSHA512:
+ default:
+ hashtype = HASH_AlgTOTAL;
+ break;
+ }
+ return hashtype;
+}
+
+static unsigned int
+HASH_ResultLen(HASH_HashType type)
+{
+ const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+ PORT_Assert(hash_obj != NULL);
+ if (hash_obj == NULL) {
+ /* type is always a valid HashType. Thus a null hash_obj must be a bug */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return 0;
+ }
+ PORT_Assert(hash_obj->length != 0);
+ return hash_obj->length;
+}
+
+SECStatus
+PQG_HashBuf(HASH_HashType type, unsigned char *dest,
+ const unsigned char *src, PRUint32 src_len)
+{
+ const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+ void *hashcx = NULL;
+ unsigned int dummy;
+
+ if (hash_obj == NULL) {
+ return SECFailure;
+ }
+
+ hashcx = hash_obj->create();
+ if (hashcx == NULL) {
+ return SECFailure;
+ }
+ hash_obj->begin(hashcx);
+ hash_obj->update(hashcx, src, src_len);
+ hash_obj->end(hashcx, dest, &dummy, hash_obj->length);
+ hash_obj->destroy(hashcx, PR_TRUE);
+ return SECSuccess;
+}
+
+unsigned int
+PQG_GetLength(const SECItem *obj)
+{
+ unsigned int len = obj->len;
+
+ if (obj->data == NULL) {
+ return 0;
+ }
+ if (len > 1 && obj->data[0] == 0) {
+ len--;
+ }
+ return len;
+}
+
+SECStatus
+PQG_Check(const PQGParams *params)
+{
+ unsigned int L, N;
+ SECStatus rv = SECSuccess;
+
+ if (params == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+ N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+
+ if (L < 1024) {
+ int j;
+
+ /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+ if (N != DSA1_Q_BITS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ j = PQG_PBITS_TO_INDEX(L);
+ if (j < 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ }
+ } else {
+ /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+ rv = pqg_validate_dsa2(L, N);
+ }
+ return rv;
+}
+
+HASH_HashType
+PQG_GetHashType(const PQGParams *params)
+{
+ unsigned int L, N;
+
+ if (params == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return HASH_AlgNULL;
+ }
+
+ L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+ N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+ return getFirstHash(L, N);
+}
+
+/* Get a seed for generating P and Q. If in testing mode, copy in the
+** seed from FIPS 186-1 appendix 5. Otherwise, obtain bytes from the
+** global random number generator.
+*/
+static SECStatus
+getPQseed(SECItem *seed, PLArenaPool *arena)
+{
+ SECStatus rv;
+
+ if (!seed->data) {
+ seed->data = (unsigned char *)PORT_ArenaZAlloc(arena, seed->len);
+ }
+ if (!seed->data) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ rv = RNG_GenerateGlobalRandomBytes(seed->data, seed->len);
+ /*
+ * NIST CMVP disallows a sequence of 20 bytes with the most
+ * significant byte equal to 0. Perhaps they interpret
+ * "a sequence of at least 160 bits" as "a number >= 2^159".
+ * So we always set the most significant bit to 1. (bug 334533)
+ */
+ seed->data[0] |= 0x80;
+ return rv;
+}
+
+/* Generate a candidate h value. If in testing mode, use the h value
+** specified in FIPS 186-1 appendix 5, h = 2. Otherwise, obtain bytes
+** from the global random number generator.
+*/
+static SECStatus
+generate_h_candidate(SECItem *hit, mp_int *H)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+#ifdef FIPS_186_1_A5_TEST
+ memset(hit->data, 0, hit->len);
+ hit->data[hit->len - 1] = 0x02;
+#else
+ rv = RNG_GenerateGlobalRandomBytes(hit->data, hit->len);
+#endif
+ if (rv)
+ return SECFailure;
+ err = mp_read_unsigned_octets(H, hit->data, hit->len);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+addToSeed(const SECItem *seed,
+ unsigned long addend,
+ int seedlen, /* g in 186-1 */
+ SECItem *seedout)
+{
+ mp_int s, sum, modulus, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&sum) = 0;
+ MP_DIGITS(&modulus) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&sum));
+ CHECK_MPI_OK(mp_init(&modulus));
+ SECITEM_TO_MPINT(*seed, &s); /* s = seed */
+ /* seed += addend */
+ if (sizeof(addend) < sizeof(mp_digit) || addend < MP_DIGIT_MAX) {
+ CHECK_MPI_OK(mp_add_d(&s, (mp_digit)addend, &s));
+ } else {
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_set_ulong(&tmp, addend));
+ CHECK_MPI_OK(mp_add(&s, &tmp, &s));
+ }
+ /*sum = s mod 2**seedlen */
+ CHECK_MPI_OK(mp_div_2d(&s, (mp_digit)seedlen, NULL, &sum));
+ if (seedout->data != NULL) {
+ SECITEM_ZfreeItem(seedout, PR_FALSE);
+ }
+ MPINT_TO_SECITEM(&sum, seedout, NULL);
+cleanup:
+ mp_clear(&s);
+ mp_clear(&sum);
+ mp_clear(&modulus);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/* Compute Hash[(SEED + addend) mod 2**g]
+** Result is placed in shaOutBuf.
+** This computation is used in steps 2 and 7 of FIPS 186 Appendix 2.2 and
+** step 11.2 of FIPS 186-3 Appendix A.1.1.2 .
+*/
+static SECStatus
+addToSeedThenHash(HASH_HashType hashtype,
+ const SECItem *seed,
+ unsigned long addend,
+ int seedlen, /* g in 186-1 */
+ unsigned char *hashOutBuf)
+{
+ SECItem str = { 0, 0, 0 };
+ SECStatus rv;
+ rv = addToSeed(seed, addend, seedlen, &str);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ rv = PQG_HashBuf(hashtype, hashOutBuf, str.data, str.len); /* hash result */
+ if (str.data)
+ SECITEM_ZfreeItem(&str, PR_FALSE);
+ return rv;
+}
+
+/*
+** Perform steps 2 and 3 of FIPS 186-1, appendix 2.2.
+** Generate Q from seed.
+*/
+static SECStatus
+makeQfromSeed(
+ unsigned int g, /* input. Length of seed in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q) /* output. */
+{
+ unsigned char sha1[SHA1_LENGTH];
+ unsigned char sha2[SHA1_LENGTH];
+ unsigned char U[SHA1_LENGTH];
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int i;
+ /* ******************************************************************
+ ** Step 2.
+ ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]."
+ **/
+ CHECK_SEC_OK(SHA1_HashBuf(sha1, seed->data, seed->len));
+ CHECK_SEC_OK(addToSeedThenHash(HASH_AlgSHA1, seed, 1, g, sha2));
+ for (i = 0; i < SHA1_LENGTH; ++i)
+ U[i] = sha1[i] ^ sha2[i];
+ /* ******************************************************************
+ ** Step 3.
+ ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+ ** and the least signficant bit to 1. In terms of boolean operations,
+ ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160."
+ */
+ U[0] |= 0x80; /* U is MSB first */
+ U[SHA1_LENGTH - 1] |= 0x01;
+ err = mp_read_unsigned_octets(Q, U, SHA1_LENGTH);
+cleanup:
+ memset(U, 0, SHA1_LENGTH);
+ memset(sha1, 0, SHA1_LENGTH);
+ memset(sha2, 0, SHA1_LENGTH);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Perform steps 6 and 7 of FIPS 186-3, appendix A.1.1.2.
+** Generate Q from seed.
+*/
+static SECStatus
+makeQ2fromSeed(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int N, /* input. Length of q in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q) /* output. */
+{
+ unsigned char U[HASH_LENGTH_MAX];
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int N_bytes = N / PR_BITS_PER_BYTE; /* length of N in bytes rather than bits */
+ int hashLen = HASH_ResultLen(hashtype);
+ int offset = 0;
+
+ /* ******************************************************************
+ ** Step 6.
+ ** "Compute U = hash[SEED] mod 2**N-1]."
+ **/
+ CHECK_SEC_OK(PQG_HashBuf(hashtype, U, seed->data, seed->len));
+ /* mod 2**N . Step 7 will explicitly set the top bit to 1, so no need
+ * to handle mod 2**N-1 */
+ if (hashLen > N_bytes) {
+ offset = hashLen - N_bytes;
+ }
+ /* ******************************************************************
+ ** Step 7.
+ ** computed_q = 2**(N-1) + U + 1 - (U mod 2)
+ **
+ ** This is the same as:
+ ** computed_q = 2**(N-1) | U | 1;
+ */
+ U[offset] |= 0x80; /* U is MSB first */
+ U[hashLen - 1] |= 0x01;
+ err = mp_read_unsigned_octets(Q, &U[offset], N_bytes);
+cleanup:
+ memset(U, 0, HASH_LENGTH_MAX);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Perform steps from FIPS 186-3, Appendix A.1.2.1 and Appendix C.6
+**
+** This generates a provable prime from two smaller prime. The resulting
+** prime p will have q0 as a multiple of p-1. q0 can be 1.
+**
+** This implments steps 4 thorough 22 of FIPS 186-3 A.1.2.1 and
+** steps 16 through 34 of FIPS 186-2 C.6
+*/
+static SECStatus
+makePrimefromPrimesShaweTaylor(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int length, /* input. Length of prime in bits. */
+ unsigned int seedlen, /* input seed length in bits */
+ mp_int *c0, /* seed prime */
+ mp_int *q, /* sub prime, can be 1 */
+ mp_int *prime, /* output. */
+ SECItem *prime_seed, /* input/output. */
+ unsigned int *prime_gen_counter) /* input/output. */
+{
+ mp_int c;
+ mp_int c0_2;
+ mp_int t;
+ mp_int a;
+ mp_int z;
+ mp_int two_length_minus_1;
+ SECStatus rv = SECFailure;
+ int hashlen = HASH_ResultLen(hashtype);
+ int outlen = hashlen * PR_BITS_PER_BYTE;
+ int offset;
+ unsigned char bit, mask;
+ /* x needs to hold roundup(L/outlen)*outlen.
+ * This can be no larger than L+outlen-1, So we set it's size to
+ * our max L + max outlen and know we are safe */
+ unsigned char x[DSA_MAX_P_BITS / 8 + HASH_LENGTH_MAX];
+ mp_err err = MP_OKAY;
+ int i;
+ int iterations;
+ int old_counter;
+
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&c0_2) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&z) = 0;
+ MP_DIGITS(&two_length_minus_1) = 0;
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&c0_2));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&z));
+ CHECK_MPI_OK(mp_init(&two_length_minus_1));
+
+ /*
+ ** There is a slight mapping of variable names depending on which
+ ** FIPS 186 steps are being carried out. The mapping is as follows:
+ ** variable A.1.2.1 C.6
+ ** c0 p0 c0
+ ** q q 1
+ ** c p c
+ ** c0_2 2*p0*q 2*c0
+ ** length L length
+ ** prime_seed pseed prime_seed
+ ** prime_gen_counter pgen_counter prime_gen_counter
+ **
+ ** Also note: or iterations variable is actually iterations+1, since
+ ** iterations+1 works better in C.
+ */
+
+ /* Step 4/16 iterations = ceiling(length/outlen)-1 */
+ iterations = (length + outlen - 1) / outlen; /* NOTE: iterations +1 */
+ /* Step 5/17 old_counter = prime_gen_counter */
+ old_counter = *prime_gen_counter;
+ /*
+ ** Comment: Generate a pseudorandom integer x in the interval
+ ** [2**(length-1), 2**length].
+ **
+ ** Step 6/18 x = 0
+ */
+ PORT_Memset(x, 0, sizeof(x));
+ /*
+ ** Step 7/19 for i = 0 to iterations do
+ ** x = x + (HASH(prime_seed + i) * 2^(i*outlen))
+ */
+ for (i = 0; i < iterations; i++) {
+ /* is bigger than prime_seed should get to */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+ seedlen, &x[(iterations - i - 1) * hashlen]));
+ }
+ /* Step 8/20 prime_seed = prime_seed + iterations + 1 */
+ CHECK_SEC_OK(addToSeed(prime_seed, iterations, seedlen, prime_seed));
+ /*
+ ** Step 9/21 x = 2 ** (length-1) + x mod 2 ** (length-1)
+ **
+ ** This step mathematically sets the high bit and clears out
+ ** all the other bits higher than length. 'x' is stored
+ ** in the x array, MSB first. The above formula gives us an 'x'
+ ** which is length bytes long and has the high bit set. We also know
+ ** that length <= iterations*outlen since
+ ** iterations=ceiling(length/outlen). First we find the offset in
+ ** bytes into the array where the high bit is.
+ */
+ offset = (outlen * iterations - length) / PR_BITS_PER_BYTE;
+ /* now we want to set the 'high bit', since length may not be a
+ * multiple of 8,*/
+ bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+ /* we need to zero out the rest of the bits in the byte above */
+ mask = (bit - 1);
+ /* now we set it */
+ x[offset] = (mask & x[offset]) | bit;
+ /*
+ ** Comment: Generate a candidate prime c in the interval
+ ** [2**(length-1), 2**length].
+ **
+ ** Step 10 t = ceiling(x/(2q(p0)))
+ ** Step 22 t = ceiling(x/(2(c0)))
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&t, &x[offset],
+ hashlen * iterations - offset)); /* t = x */
+ CHECK_MPI_OK(mp_mul(c0, q, &c0_2)); /* c0_2 is now c0*q */
+ CHECK_MPI_OK(mp_add(&c0_2, &c0_2, &c0_2)); /* c0_2 is now 2*q*c0 */
+ CHECK_MPI_OK(mp_add(&t, &c0_2, &t)); /* t = x+2*q*c0 */
+ CHECK_MPI_OK(mp_sub_d(&t, (mp_digit)1, &t)); /* t = x+2*q*c0 -1 */
+ /* t = floor((x+2qc0-1)/2qc0) = ceil(x/2qc0) */
+ CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+ /*
+ ** step 11: if (2tqp0 +1 > 2**length), then t = ceiling(2**(length-1)/2qp0)
+ ** step 12: t = 2tqp0 +1.
+ **
+ ** step 23: if (2tc0 +1 > 2**length), then t = ceiling(2**(length-1)/2c0)
+ ** step 24: t = 2tc0 +1.
+ */
+ CHECK_MPI_OK(mp_2expt(&two_length_minus_1, length - 1));
+step_23:
+ CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); /* c = t*2qc0 */
+ CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/
+ if (mpl_significant_bits(&c) > length) { /* if c > 2**length */
+ CHECK_MPI_OK(mp_sub_d(&c0_2, (mp_digit)1, &t)); /* t = 2qc0-1 */
+ /* t = 2**(length-1) + 2qc0 -1 */
+ CHECK_MPI_OK(mp_add(&two_length_minus_1, &t, &t));
+ /* t = floor((2**(length-1)+2qc0 -1)/2qco)
+ * = ceil(2**(length-2)/2qc0) */
+ CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+ CHECK_MPI_OK(mp_mul(&t, &c0_2, &c));
+ CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/
+ }
+ /* Step 13/25 prime_gen_counter = prime_gen_counter + 1*/
+ (*prime_gen_counter)++;
+ /*
+ ** Comment: Test the candidate prime c for primality; first pick an
+ ** integer a between 2 and c-2.
+ **
+ ** Step 14/26 a=0
+ */
+ PORT_Memset(x, 0, sizeof(x)); /* use x for a */
+ /*
+ ** Step 15/27 for i = 0 to iterations do
+ ** a = a + (HASH(prime_seed + i) * 2^(i*outlen))
+ **
+ ** NOTE: we reuse the x array for 'a' initially.
+ */
+ for (i = 0; i < iterations; i++) {
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+ seedlen, &x[(iterations - i - 1) * hashlen]));
+ }
+ /* Step 16/28 prime_seed = prime_seed + iterations + 1 */
+ CHECK_SEC_OK(addToSeed(prime_seed, iterations, seedlen, prime_seed));
+ /* Step 17/29 a = 2 + (a mod (c-3)). */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&a, x, iterations * hashlen));
+ CHECK_MPI_OK(mp_sub_d(&c, (mp_digit)3, &z)); /* z = c -3 */
+ CHECK_MPI_OK(mp_mod(&a, &z, &a)); /* a = a mod c -3 */
+ CHECK_MPI_OK(mp_add_d(&a, (mp_digit)2, &a)); /* a = 2 + a mod c -3 */
+ /*
+ ** Step 18 z = a**(2tq) mod p.
+ ** Step 30 z = a**(2t) mod c.
+ */
+ CHECK_MPI_OK(mp_mul(&t, q, &z)); /* z = tq */
+ CHECK_MPI_OK(mp_add(&z, &z, &z)); /* z = 2tq */
+ CHECK_MPI_OK(mp_exptmod(&a, &z, &c, &z)); /* z = a**(2tq) mod c */
+ /*
+ ** Step 19 if (( 1 == GCD(z-1,p)) and ( 1 == z**p0 mod p )), then
+ ** Step 31 if (( 1 == GCD(z-1,c)) and ( 1 == z**c0 mod c )), then
+ */
+ CHECK_MPI_OK(mp_sub_d(&z, (mp_digit)1, &a));
+ CHECK_MPI_OK(mp_gcd(&a, &c, &a));
+ if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+ CHECK_MPI_OK(mp_exptmod(&z, c0, &c, &a));
+ if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+ /* Step 31.1 prime = c */
+ CHECK_MPI_OK(mp_copy(&c, prime));
+ /*
+ ** Step 31.2 return Success, prime, prime_seed,
+ ** prime_gen_counter
+ */
+ rv = SECSuccess;
+ goto cleanup;
+ }
+ }
+ /*
+ ** Step 20/32 If (prime_gen_counter > 4 * length + old_counter then
+ ** return (FAILURE, 0, 0, 0).
+ ** NOTE: the test is reversed, so we fall through on failure to the
+ ** cleanup routine
+ */
+ if (*prime_gen_counter < (4 * length + old_counter)) {
+ /* Step 21/33 t = t + 1 */
+ CHECK_MPI_OK(mp_add_d(&t, (mp_digit)1, &t));
+ /* Step 22/34 Go to step 23/11 */
+ goto step_23;
+ }
+
+ /* if (prime_gencont > (4*length + old_counter), fall through to failure */
+ rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+ mp_clear(&c);
+ mp_clear(&c0_2);
+ mp_clear(&t);
+ mp_clear(&a);
+ mp_clear(&z);
+ mp_clear(&two_length_minus_1);
+ PORT_Memset(x, 0, sizeof(x));
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv == SECFailure) {
+ mp_zero(prime);
+ if (prime_seed->data) {
+ SECITEM_ZfreeItem(prime_seed, PR_FALSE);
+ }
+ *prime_gen_counter = 0;
+ }
+ return rv;
+}
+
+/*
+** Perform steps from FIPS 186-3, Appendix C.6
+**
+** This generates a provable prime from a seed
+*/
+static SECStatus
+makePrimefromSeedShaweTaylor(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int length, /* input. Length of prime in bits. */
+ const SECItem *input_seed, /* input. */
+ mp_int *prime, /* output. */
+ SECItem *prime_seed, /* output. */
+ unsigned int *prime_gen_counter) /* output. */
+{
+ mp_int c;
+ mp_int c0;
+ mp_int one;
+ SECStatus rv = SECFailure;
+ int hashlen = HASH_ResultLen(hashtype);
+ int outlen = hashlen * PR_BITS_PER_BYTE;
+ int offset;
+ int seedlen = input_seed->len * 8; /*seedlen is in bits */
+ unsigned char bit, mask;
+ unsigned char x[HASH_LENGTH_MAX * 2];
+ mp_digit dummy;
+ mp_err err = MP_OKAY;
+ int i;
+
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&c0) = 0;
+ MP_DIGITS(&one) = 0;
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&c0));
+ CHECK_MPI_OK(mp_init(&one));
+
+ /* Step 1. if length < 2 then return (FAILURE, 0, 0, 0) */
+ if (length < 2) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* Step 2. if length >= 33 then goto step 14 */
+ if (length >= 33) {
+ mp_zero(&one);
+ CHECK_MPI_OK(mp_add_d(&one, (mp_digit)1, &one));
+
+ /* Step 14 (status, c0, prime_seed, prime_gen_counter) =
+ ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+ */
+ rv = makePrimefromSeedShaweTaylor(hashtype, (length + 1) / 2 + 1,
+ input_seed, &c0, prime_seed, prime_gen_counter);
+ /* Step 15 if FAILURE is returned, return (FAILURE, 0, 0, 0). */
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ /* Steps 16-34 */
+ rv = makePrimefromPrimesShaweTaylor(hashtype, length, seedlen, &c0, &one,
+ prime, prime_seed, prime_gen_counter);
+ goto cleanup; /* we're done, one way or the other */
+ }
+ /* Step 3 prime_seed = input_seed */
+ CHECK_SEC_OK(SECITEM_CopyItem(NULL, prime_seed, input_seed));
+ /* Step 4 prime_gen_count = 0 */
+ *prime_gen_counter = 0;
+
+step_5:
+ /* Step 5 c = Hash(prime_seed) xor Hash(prime_seed+1). */
+ CHECK_SEC_OK(PQG_HashBuf(hashtype, x, prime_seed->data, prime_seed->len));
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, 1, seedlen, &x[hashlen]));
+ for (i = 0; i < hashlen; i++) {
+ x[i] = x[i] ^ x[i + hashlen];
+ }
+ /* Step 6 c = 2**length-1 + c mod 2**length-1 */
+ /* This step mathematically sets the high bit and clears out
+ ** all the other bits higher than length. Right now c is stored
+ ** in the x array, MSB first. The above formula gives us a c which
+ ** is length bytes long and has the high bit set. We also know that
+ ** length < outlen since the smallest outlen is 160 bits and the largest
+ ** length at this point is 32 bits. So first we find the offset in bytes
+ ** into the array where the high bit is.
+ */
+ offset = (outlen - length) / PR_BITS_PER_BYTE;
+ /* now we want to set the 'high bit'. We have to calculate this since
+ * length may not be a multiple of 8.*/
+ bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+ /* we need to zero out the rest of the bits in the byte above */
+ mask = (bit - 1);
+ /* now we set it */
+ x[offset] = (mask & x[offset]) | bit;
+ /* Step 7 c = c*floor(c/2) + 1 */
+ /* set the low bit. much easier to find (the end of the array) */
+ x[hashlen - 1] |= 1;
+ /* now that we've set our bits, we can create our candidate "c" */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&c, &x[offset], hashlen - offset));
+ /* Step 8 prime_gen_counter = prime_gen_counter + 1 */
+ (*prime_gen_counter)++;
+ /* Step 9 prime_seed = prime_seed + 2 */
+ CHECK_SEC_OK(addToSeed(prime_seed, 2, seedlen, prime_seed));
+ /* Step 10 Perform deterministic primality test on c. For example, since
+ ** c is small, it's primality can be tested by trial division, See
+ ** See Appendic C.7.
+ **
+ ** We in fact test with trial division. mpi has a built int trial divider
+ ** that divides all divisors up to 2^16.
+ */
+ if (prime_tab[prime_tab_size - 1] < 0xFFF1) {
+ /* we aren't testing all the primes between 0 and 2^16, we really
+ * can't use this construction. Just fail. */
+ rv = SECFailure;
+ goto cleanup;
+ }
+ dummy = prime_tab_size;
+ err = mpp_divis_primes(&c, &dummy);
+ /* Step 11 if c is prime then */
+ if (err == MP_NO) {
+ /* Step 11.1 prime = c */
+ CHECK_MPI_OK(mp_copy(&c, prime));
+ /* Step 11.2 return SUCCESS prime, prime_seed, prime_gen_counter */
+ err = MP_OKAY;
+ rv = SECSuccess;
+ goto cleanup;
+ } else if (err != MP_YES) {
+ goto cleanup; /* function failed, bail out */
+ } else {
+ /* reset mp_err */
+ err = MP_OKAY;
+ }
+ /*
+ ** Step 12 if (prime_gen_counter > (4*len))
+ ** then return (FAILURE, 0, 0, 0))
+ ** Step 13 goto step 5
+ */
+ if (*prime_gen_counter <= (4 * length)) {
+ goto step_5;
+ }
+ /* if (prime_gencont > 4*length), fall through to failure */
+ rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+ mp_clear(&c);
+ mp_clear(&c0);
+ mp_clear(&one);
+ PORT_Memset(x, 0, sizeof(x));
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv == SECFailure) {
+ mp_zero(prime);
+ if (prime_seed->data) {
+ SECITEM_ZfreeItem(prime_seed, PR_FALSE);
+ }
+ *prime_gen_counter = 0;
+ }
+ return rv;
+}
+
+/*
+ * Find a Q and algorithm from Seed.
+ */
+static SECStatus
+findQfromSeed(
+ unsigned int L, /* input. Length of p in bits. */
+ unsigned int N, /* input. Length of q in bits. */
+ unsigned int g, /* input. Length of seed in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q, /* input. */
+ mp_int *Q_, /* output. */
+ unsigned int *qseed_len, /* output */
+ HASH_HashType *hashtypePtr, /* output. Hash uses */
+ pqgGenType *typePtr, /* output. Generation Type used */
+ unsigned int *qgen_counter) /* output. q_counter */
+{
+ HASH_HashType hashtype = HASH_AlgNULL;
+ SECItem firstseed = { 0, 0, 0 };
+ SECItem qseed = { 0, 0, 0 };
+ SECStatus rv;
+
+ *qseed_len = 0; /* only set if FIPS186_3_ST_TYPE */
+
+ /* handle legacy small DSA first can only be FIPS186_1_TYPE */
+ if (L < 1024) {
+ rv = makeQfromSeed(g, seed, Q_);
+ if ((rv == SECSuccess) && (mp_cmp(Q, Q_) == 0)) {
+ *hashtypePtr = HASH_AlgSHA1;
+ *typePtr = FIPS186_1_TYPE;
+ return SECSuccess;
+ }
+ mp_zero(Q_);
+ return SECFailure;
+ }
+ /* 1024 could use FIPS186_1 or FIPS186_3 algorithms, we need to try
+ * them both */
+ if (L == 1024) {
+ rv = makeQfromSeed(g, seed, Q_);
+ if (rv == SECSuccess) {
+ if (mp_cmp(Q, Q_) == 0) {
+ *hashtypePtr = HASH_AlgSHA1;
+ *typePtr = FIPS186_1_TYPE;
+ return SECSuccess;
+ }
+ }
+ /* fall through for FIPS186_3 types */
+ }
+ /* at this point we know we aren't using FIPS186_1, start trying FIPS186_3
+ * with appropriate hash types */
+ for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+ hashtype = getNextHash(hashtype)) {
+ rv = makeQ2fromSeed(hashtype, N, seed, Q_);
+ if (rv != SECSuccess) {
+ continue;
+ }
+ if (mp_cmp(Q, Q_) == 0) {
+ *hashtypePtr = hashtype;
+ *typePtr = FIPS186_3_TYPE;
+ return SECSuccess;
+ }
+ }
+ /*
+ * OK finally try FIPS186_3 Shawe-Taylor
+ */
+ firstseed = *seed;
+ firstseed.len = seed->len / 3;
+ for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+ hashtype = getNextHash(hashtype)) {
+ unsigned int count;
+
+ rv = makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, Q_,
+ &qseed, &count);
+ if (rv != SECSuccess) {
+ continue;
+ }
+ if (mp_cmp(Q, Q_) == 0) {
+ /* check qseed as well... */
+ int offset = seed->len - qseed.len;
+ if ((offset < 0) ||
+ (PORT_Memcmp(&seed->data[offset], qseed.data, qseed.len) != 0)) {
+ /* we found q, but the seeds don't match. This isn't an
+ * accident, someone has been tweeking with the seeds, just
+ * fail a this point. */
+ SECITEM_FreeItem(&qseed, PR_FALSE);
+ mp_zero(Q_);
+ return SECFailure;
+ }
+ *qseed_len = qseed.len;
+ *hashtypePtr = hashtype;
+ *typePtr = FIPS186_3_ST_TYPE;
+ *qgen_counter = count;
+ SECITEM_ZfreeItem(&qseed, PR_FALSE);
+ return SECSuccess;
+ }
+ SECITEM_ZfreeItem(&qseed, PR_FALSE);
+ }
+ /* no hash algorithms found which match seed to Q, fail */
+ mp_zero(Q_);
+ return SECFailure;
+}
+
+/*
+** Perform steps 7, 8 and 9 of FIPS 186, appendix 2.2.
+** which are the same as steps 11.1-11.5 of FIPS 186-2, App A.1.1.2
+** Generate P from Q, seed, L, and offset.
+*/
+static SECStatus
+makePfromQandSeed(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int L, /* Length of P in bits. Per FIPS 186. */
+ unsigned int N, /* Length of Q in bits. Per FIPS 186. */
+ unsigned int offset, /* Per FIPS 186, App 2.2. & 186-3 App A.1.1.2 */
+ unsigned int seedlen, /* input. Length of seed in bits. (g in 186-1)*/
+ const SECItem *seed, /* input. */
+ const mp_int *Q, /* input. */
+ mp_int *P) /* output. */
+{
+ unsigned int j; /* Per FIPS 186-3 App. A.1.1.2 (k in 186-1)*/
+ unsigned int n; /* Per FIPS 186, appendix 2.2. */
+ mp_digit b; /* Per FIPS 186, appendix 2.2. */
+ unsigned int outlen; /* Per FIPS 186-3 App. A.1.1.2 */
+ unsigned int hashlen; /* outlen in bytes */
+ unsigned char V_j[HASH_LENGTH_MAX];
+ mp_int W, X, c, twoQ, V_n, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ /* Initialize bignums */
+ MP_DIGITS(&W) = 0;
+ MP_DIGITS(&X) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&twoQ) = 0;
+ MP_DIGITS(&V_n) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&W));
+ CHECK_MPI_OK(mp_init(&X));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&twoQ));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&V_n));
+
+ hashlen = HASH_ResultLen(hashtype);
+ outlen = hashlen * PR_BITS_PER_BYTE;
+
+ PORT_Assert(outlen > 0);
+
+ /* L - 1 = n*outlen + b */
+ n = (L - 1) / outlen;
+ b = (L - 1) % outlen;
+
+ /* ******************************************************************
+ ** Step 11.1 (Step 7 in 186-1)
+ ** "for j = 0 ... n let
+ ** V_j = SHA[(SEED + offset + j) mod 2**seedlen]."
+ **
+ ** Step 11.2 (Step 8 in 186-1)
+ ** "W = V_0 + (V_1 * 2**outlen) + ... + (V_n-1 * 2**((n-1)*outlen))
+ ** + ((V_n mod 2**b) * 2**(n*outlen))
+ */
+ for (j = 0; j < n; ++j) { /* Do the first n terms of V_j */
+ /* Do step 11.1 for iteration j.
+ ** V_j = HASH[(seed + offset + j) mod 2**g]
+ */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + j, seedlen, V_j));
+ /* Do step 11.2 for iteration j.
+ ** W += V_j * 2**(j*outlen)
+ */
+ OCTETS_TO_MPINT(V_j, &tmp, hashlen); /* get bignum V_j */
+ CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, j * outlen)); /* tmp=V_j << j*outlen */
+ CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */
+ }
+ /* Step 11.2, continued.
+ ** [W += ((V_n mod 2**b) * 2**(n*outlen))]
+ */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + n, seedlen, V_j));
+ OCTETS_TO_MPINT(V_j, &V_n, hashlen); /* get bignum V_n */
+ CHECK_MPI_OK(mp_div_2d(&V_n, b, NULL, &tmp)); /* tmp = V_n mod 2**b */
+ CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, n * outlen)); /* tmp = tmp << n*outlen */
+ CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */
+ /* Step 11.3, (Step 8 in 186-1)
+ ** "X = W + 2**(L-1).
+ ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+ */
+ CHECK_MPI_OK(mpl_set_bit(&X, (mp_size)(L - 1), 1)); /* X = 2**(L-1) */
+ CHECK_MPI_OK(mp_add(&X, &W, &X)); /* X += W */
+ /*************************************************************
+ ** Step 11.4. (Step 9 in 186-1)
+ ** "c = X mod 2q"
+ */
+ CHECK_MPI_OK(mp_mul_2(Q, &twoQ)); /* 2q */
+ CHECK_MPI_OK(mp_mod(&X, &twoQ, &c)); /* c = X mod 2q */
+ /*************************************************************
+ ** Step 11.5. (Step 9 in 186-1)
+ ** "p = X - (c - 1).
+ ** Note that p is congruent to 1 mod 2q."
+ */
+ CHECK_MPI_OK(mp_sub_d(&c, 1, &c)); /* c -= 1 */
+ CHECK_MPI_OK(mp_sub(&X, &c, P)); /* P = X - c */
+cleanup:
+ PORT_Memset(V_j, 0, sizeof V_j);
+ mp_clear(&W);
+ mp_clear(&X);
+ mp_clear(&c);
+ mp_clear(&twoQ);
+ mp_clear(&V_n);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ mp_zero(P);
+ return SECFailure;
+ }
+ if (rv != SECSuccess) {
+ mp_zero(P);
+ }
+ return rv;
+}
+
+/*
+** Generate G from h, P, and Q.
+*/
+static SECStatus
+makeGfromH(const mp_int *P, /* input. */
+ const mp_int *Q, /* input. */
+ mp_int *H, /* input and output. */
+ mp_int *G, /* output. */
+ PRBool *passed)
+{
+ mp_int exp, pm1;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ *passed = PR_FALSE;
+ MP_DIGITS(&exp) = 0;
+ MP_DIGITS(&pm1) = 0;
+ CHECK_MPI_OK(mp_init(&exp));
+ CHECK_MPI_OK(mp_init(&pm1));
+ CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */
+ if (mp_cmp(H, &pm1) >= 0) /* H >= P-1 */
+ CHECK_MPI_OK(mp_sub(H, &pm1, H)); /* H = H mod (P-1) */
+ /* Let b = 2**n (smallest power of 2 greater than P).
+ ** Since P-1 >= b/2, and H < b, quotient(H/(P-1)) = 0 or 1
+ ** so the above operation safely computes H mod (P-1)
+ */
+ /* Check for H = to 0 or 1. Regen H if so. (Regen means return error). */
+ if (mp_cmp_d(H, 1) <= 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* Compute G, according to the equation G = (H ** ((P-1)/Q)) mod P */
+ CHECK_MPI_OK(mp_div(&pm1, Q, &exp, NULL)); /* exp = (P-1)/Q */
+ CHECK_MPI_OK(mp_exptmod(H, &exp, P, G)); /* G = H ** exp mod P */
+ /* Check for G == 0 or G == 1, return error if so. */
+ if (mp_cmp_d(G, 1) <= 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ *passed = PR_TRUE;
+cleanup:
+ mp_clear(&exp);
+ mp_clear(&pm1);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv != SECSuccess) {
+ mp_zero(G);
+ }
+ return rv;
+}
+
+/*
+** Generate G from seed, index, P, and Q.
+*/
+static SECStatus
+makeGfromIndex(HASH_HashType hashtype,
+ const mp_int *P, /* input. */
+ const mp_int *Q, /* input. */
+ const SECItem *seed, /* input. */
+ unsigned char index, /* input. */
+ mp_int *G) /* input/output */
+{
+ mp_int e, pm1, W;
+ unsigned int count;
+ unsigned char data[HASH_LENGTH_MAX];
+ unsigned int len;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ const SECHashObject *hashobj = NULL;
+ void *hashcx = NULL;
+
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&pm1) = 0;
+ MP_DIGITS(&W) = 0;
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&pm1));
+ CHECK_MPI_OK(mp_init(&W));
+
+ /* initialize our hash stuff */
+ hashobj = HASH_GetRawHashObject(hashtype);
+ if (hashobj == NULL) {
+ /* shouldn't happen */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ hashcx = hashobj->create();
+ if (hashcx == NULL) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */
+ /* Step 3 e = (p-1)/q */
+ CHECK_MPI_OK(mp_div(&pm1, Q, &e, NULL)); /* e = (P-1)/Q */
+/* Steps 4, 5, and 6 */
+/* count is a 16 bit value in the spec. We actually represent count
+ * as more than 16 bits so we can easily detect the 16 bit overflow */
+#define MAX_COUNT 0x10000
+ for (count = 1; count < MAX_COUNT; count++) {
+ /* step 7
+ * U = domain_param_seed || "ggen" || index || count
+ * step 8
+ * W = HASH(U)
+ */
+ hashobj->begin(hashcx);
+ hashobj->update(hashcx, seed->data, seed->len);
+ hashobj->update(hashcx, (unsigned char *)"ggen", 4);
+ hashobj->update(hashcx, &index, 1);
+ data[0] = (count >> 8) & 0xff;
+ data[1] = count & 0xff;
+ hashobj->update(hashcx, data, 2);
+ hashobj->end(hashcx, data, &len, sizeof(data));
+ OCTETS_TO_MPINT(data, &W, len);
+ /* step 9. g = W**e mod p */
+ CHECK_MPI_OK(mp_exptmod(&W, &e, P, G));
+ /* step 10. if (g < 2) then goto step 5 */
+ /* NOTE: this weird construct is to keep the flow according to the spec.
+ * the continue puts us back to step 5 of the for loop */
+ if (mp_cmp_d(G, 2) < 0) {
+ continue;
+ }
+ break; /* step 11 follows step 10 if the test condition is false */
+ }
+ if (count >= MAX_COUNT) {
+ rv = SECFailure; /* last part of step 6 */
+ }
+/* step 11.
+ * return valid G */
+cleanup:
+ PORT_Memset(data, 0, sizeof(data));
+ if (hashcx) {
+ hashobj->destroy(hashcx, PR_TRUE);
+ }
+ mp_clear(&e);
+ mp_clear(&pm1);
+ mp_clear(&W);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* This code uses labels and gotos, so that it can follow the numbered
+** steps in the algorithms from FIPS 186-3 appendix A.1.1.2 very closely,
+** and so that the correctness of this code can be easily verified.
+** So, please forgive the ugly c code.
+**/
+static SECStatus
+pqg_ParamGen(unsigned int L, unsigned int N, pqgGenType type,
+ unsigned int seedBytes, PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int n; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int seedlen; /* Per FIPS 186-3 app A.1.1.2 (was 'g' 186-1)*/
+ unsigned int counter; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int offset; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int outlen; /* Per FIPS 186-3, appendix A.1.1.2. */
+ unsigned int maxCount;
+ HASH_HashType hashtype = HASH_AlgNULL;
+ SECItem *seed; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ PLArenaPool *arena = NULL;
+ PQGParams *params = NULL;
+ PQGVerify *verify = NULL;
+ PRBool passed;
+ SECItem hit = { 0, 0, 0 };
+ SECItem firstseed = { 0, 0, 0 };
+ SECItem qseed = { 0, 0, 0 };
+ SECItem pseed = { 0, 0, 0 };
+ mp_int P, Q, G, H, l, p0;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECFailure;
+ int iterations = 0;
+
+ /* Step 1. L and N already checked by caller*/
+ /* Step 2. if (seedlen < N) return INVALID; */
+ if (seedBytes < N / PR_BITS_PER_BYTE || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Initialize bignums */
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&G) = 0;
+ MP_DIGITS(&H) = 0;
+ MP_DIGITS(&l) = 0;
+ MP_DIGITS(&p0) = 0;
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&G));
+ CHECK_MPI_OK(mp_init(&H));
+ CHECK_MPI_OK(mp_init(&l));
+ CHECK_MPI_OK(mp_init(&p0));
+
+ /* parameters have been passed in, only generate G */
+ if (*pParams != NULL) {
+ /* we only support G index generation if generating separate from PQ */
+ if ((*pVfy == NULL) || (type == FIPS186_1_TYPE) ||
+ ((*pVfy)->h.len != 1) || ((*pVfy)->h.data == NULL) ||
+ ((*pVfy)->seed.data == NULL) || ((*pVfy)->seed.len == 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ params = *pParams;
+ verify = *pVfy;
+
+ /* fill in P Q, */
+ SECITEM_TO_MPINT((*pParams)->prime, &P);
+ SECITEM_TO_MPINT((*pParams)->subPrime, &Q);
+ hashtype = getFirstHash(L, N);
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &(*pVfy)->seed,
+ (*pVfy)->h.data[0], &G));
+ MPINT_TO_SECITEM(&G, &(*pParams)->base, (*pParams)->arena);
+ goto cleanup;
+ }
+ /* Initialize an arena for the params. */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ params = (PQGParams *)PORT_ArenaZAlloc(arena, sizeof(PQGParams));
+ if (!params) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ params->arena = arena;
+ /* Initialize an arena for the verify. */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(params->arena, PR_TRUE);
+ return SECFailure;
+ }
+ verify = (PQGVerify *)PORT_ArenaZAlloc(arena, sizeof(PQGVerify));
+ if (!verify) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ PORT_FreeArena(params->arena, PR_TRUE);
+ return SECFailure;
+ }
+ verify->arena = arena;
+ seed = &verify->seed;
+ arena = NULL;
+
+ /* Select Hash and Compute lengths. */
+ /* getFirstHash gives us the smallest acceptable hash for this key
+ * strength */
+ hashtype = getFirstHash(L, N);
+ outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+
+ /* Step 3: n = Ceil(L/outlen)-1; (same as n = Floor((L-1)/outlen)) */
+ n = (L - 1) / outlen;
+ /* Step 4: (skipped since we don't use b): b = L -1 - (n*outlen); */
+ seedlen = seedBytes * PR_BITS_PER_BYTE; /* bits in seed */
+step_5:
+ /* ******************************************************************
+ ** Step 5. (Step 1 in 186-1)
+ ** "Choose an abitrary sequence of at least N bits and call it SEED.
+ ** Let g be the length of SEED in bits."
+ */
+ if (++iterations > MAX_ITERATIONS) { /* give up after a while */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+ seed->len = seedBytes;
+ CHECK_SEC_OK(getPQseed(seed, verify->arena));
+ /* ******************************************************************
+ ** Step 6. (Step 2 in 186-1)
+ **
+ ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]. (186-1)"
+ ** "Compute U = HASH[SEED] 2**(N-1). (186-3)"
+ **
+ ** Step 7. (Step 3 in 186-1)
+ ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+ ** and the least signficant bit to 1. In terms of boolean operations,
+ ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160. (186-1)"
+ **
+ ** "q = 2**(N-1) + U + 1 - (U mod 2) (186-3)
+ **
+ ** Note: Both formulations are the same for U < 2**(N-1) and N=160
+ **
+ ** If using Shawe-Taylor, We do the entire A.1.2.1.2 setps in the block
+ ** FIPS186_3_ST_TYPE.
+ */
+ if (type == FIPS186_1_TYPE) {
+ CHECK_SEC_OK(makeQfromSeed(seedlen, seed, &Q));
+ } else if (type == FIPS186_3_TYPE) {
+ CHECK_SEC_OK(makeQ2fromSeed(hashtype, N, seed, &Q));
+ } else {
+ /* FIPS186_3_ST_TYPE */
+ unsigned int qgen_counter, pgen_counter;
+
+ /* Step 1 (L,N) already checked for acceptability */
+
+ firstseed = *seed;
+ qgen_counter = 0;
+ /* Step 2. Use N and firstseed to generate random prime q
+ * using Apendix C.6 */
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, &Q,
+ &qseed, &qgen_counter));
+ /* Step 3. Use floor(L/2+1) and qseed to generate random prime p0
+ * using Appendix C.6 */
+ pgen_counter = 0;
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+ &qseed, &p0, &pseed, &pgen_counter));
+ /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+ CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, seedBytes * 8,
+ &p0, &Q, &P, &pseed, &pgen_counter));
+
+ /* combine all the seeds */
+ if ((qseed.len > firstseed.len) || (pseed.len > firstseed.len)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); /* shouldn't happen */
+ goto cleanup;
+ }
+ /* If the seed overflows, then pseed and qseed may have leading zeros which the mpl code clamps.
+ * we want to make sure those are added back in so the individual seed lengths are predictable from
+ * the overall seed length */
+ seed->len = firstseed.len * 3;
+ seed->data = PORT_ArenaZAlloc(verify->arena, seed->len);
+ if (seed->data == NULL) {
+ goto cleanup;
+ }
+ PORT_Memcpy(seed->data, firstseed.data, firstseed.len);
+ PORT_Memcpy(seed->data + 2 * firstseed.len - pseed.len, pseed.data, pseed.len);
+ PORT_Memcpy(seed->data + 3 * firstseed.len - qseed.len, qseed.data, qseed.len);
+ counter = (qgen_counter << 16) | pgen_counter;
+
+ /* we've generated both P and Q now, skip to generating G */
+ goto generate_G;
+ }
+ /* ******************************************************************
+ ** Step 8. (Step 4 in 186-1)
+ ** "Use a robust primality testing algorithm to test whether q is prime."
+ **
+ ** Appendix 2.1 states that a Rabin test with at least 50 iterations
+ ** "will give an acceptable probability of error."
+ */
+ /*CHECK_SEC_OK( prm_RabinTest(&Q, &passed) );*/
+ err = mpp_pprime_secure(&Q, prime_testcount_q(L, N));
+ passed = (err == MP_YES) ? SECSuccess : SECFailure;
+ /* ******************************************************************
+ ** Step 9. (Step 5 in 186-1) "If q is not prime, goto step 5 (1 in 186-1)."
+ */
+ if (passed != SECSuccess)
+ goto step_5;
+ /* ******************************************************************
+ ** Step 10.
+ ** offset = 1;
+ **( Step 6b 186-1)"Let counter = 0 and offset = 2."
+ */
+ offset = (type == FIPS186_1_TYPE) ? 2 : 1;
+ /*
+ ** Step 11. (Step 6a,13a,14 in 186-1)
+ ** For counter - 0 to (4L-1) do
+ **
+ */
+ maxCount = L >= 1024 ? (4 * L - 1) : 4095;
+ for (counter = 0; counter <= maxCount; counter++) {
+ /* ******************************************************************
+ ** Step 11.1 (Step 7 in 186-1)
+ ** "for j = 0 ... n let
+ ** V_j = HASH[(SEED + offset + j) mod 2**seedlen]."
+ **
+ ** Step 11.2 (Step 8 in 186-1)
+ ** "W = V_0 + V_1*2**outlen+...+ V_n-1 * 2**((n-1)*outlen) +
+ ** ((Vn* mod 2**b)*2**(n*outlen))"
+ ** Step 11.3 (Step 8 in 186-1)
+ ** "X = W + 2**(L-1)
+ ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+ **
+ ** Step 11.4 (Step 9 in 186-1).
+ ** "c = X mod 2q"
+ **
+ ** Step 11.5 (Step 9 in 186-1).
+ ** " p = X - (c - 1).
+ ** Note that p is congruent to 1 mod 2q."
+ */
+ CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, seedlen,
+ seed, &Q, &P));
+ /*************************************************************
+ ** Step 11.6. (Step 10 in 186-1)
+ ** "if p < 2**(L-1), then goto step 11.9. (step 13 in 186-1)"
+ */
+ CHECK_MPI_OK(mpl_set_bit(&l, (mp_size)(L - 1), 1)); /* l = 2**(L-1) */
+ if (mp_cmp(&P, &l) < 0)
+ goto step_11_9;
+ /************************************************************
+ ** Step 11.7 (step 11 in 186-1)
+ ** "Perform a robust primality test on p."
+ */
+ /*CHECK_SEC_OK( prm_RabinTest(&P, &passed) );*/
+ err = mpp_pprime_secure(&P, prime_testcount_p(L, N));
+ passed = (err == MP_YES) ? SECSuccess : SECFailure;
+ /* ******************************************************************
+ ** Step 11.8. "If p is determined to be primed return VALID
+ ** values of p, q, seed and counter."
+ */
+ if (passed == SECSuccess)
+ break;
+ step_11_9:
+ /* ******************************************************************
+ ** Step 11.9. "offset = offset + n + 1."
+ */
+ offset += n + 1;
+ }
+ /* ******************************************************************
+ ** Step 12. "goto step 5."
+ **
+ ** NOTE: if counter <= maxCount, then we exited the loop at Step 11.8
+ ** and now need to return p,q, seed, and counter.
+ */
+ if (counter > maxCount)
+ goto step_5;
+
+generate_G:
+ /* ******************************************************************
+ ** returning p, q, seed and counter
+ */
+ if (type == FIPS186_1_TYPE) {
+ /* Generate g, This is called the "Unverifiable Generation of g
+ * in FIPA186-3 Appedix A.2.1. For compatibility we maintain
+ * this version of the code */
+ SECITEM_AllocItem(NULL, &hit, L / 8); /* h is no longer than p */
+ if (!hit.data)
+ goto cleanup;
+ do {
+ /* loop generate h until 1<h<p-1 and (h**[(p-1)/q])mod p > 1 */
+ CHECK_SEC_OK(generate_h_candidate(&hit, &H));
+ CHECK_SEC_OK(makeGfromH(&P, &Q, &H, &G, &passed));
+ } while (passed != PR_TRUE);
+ MPINT_TO_SECITEM(&H, &verify->h, verify->arena);
+ } else {
+ unsigned char index = 1; /* default to 1 */
+ verify->h.data = (unsigned char *)PORT_ArenaZAlloc(verify->arena, 1);
+ if (verify->h.data == NULL) {
+ goto cleanup;
+ }
+ verify->h.len = 1;
+ verify->h.data[0] = index;
+ /* Generate g, using the FIPS 186-3 Appendix A.23 */
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, seed, index, &G));
+ }
+ /* All generation is done. Now, save the PQG params. */
+ MPINT_TO_SECITEM(&P, &params->prime, params->arena);
+ MPINT_TO_SECITEM(&Q, &params->subPrime, params->arena);
+ MPINT_TO_SECITEM(&G, &params->base, params->arena);
+ verify->counter = counter;
+ *pParams = params;
+ *pVfy = verify;
+cleanup:
+ if (pseed.data) {
+ SECITEM_ZfreeItem(&pseed, PR_FALSE);
+ }
+ if (qseed.data) {
+ SECITEM_ZfreeItem(&qseed, PR_FALSE);
+ }
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&G);
+ mp_clear(&H);
+ mp_clear(&l);
+ mp_clear(&p0);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv) {
+ if (params) {
+ PORT_FreeArena(params->arena, PR_TRUE);
+ }
+ if (verify) {
+ PORT_FreeArena(verify->arena, PR_TRUE);
+ }
+ }
+ if (hit.data) {
+ SECITEM_ZfreeItem(&hit, PR_FALSE);
+ }
+ return rv;
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int L; /* Length of P in bits. Per FIPS 186. */
+ unsigned int seedBytes;
+
+ if (j > 8 || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ L = 512 + (j * 64); /* bits in P */
+ seedBytes = L / 8;
+ return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+ pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int L; /* Length of P in bits. Per FIPS 186. */
+
+ if (j > 8 || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ L = 512 + (j * 64); /* bits in P */
+ return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+ pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (N == 0) {
+ N = pqg_get_default_N(L);
+ }
+ if (seedBytes == 0) {
+ /* seedBytes == L/8 for probable primes, N/8 for Shawe-Taylor Primes */
+ seedBytes = N / 8;
+ }
+ if (pqg_validate_dsa2(L, N) != SECSuccess) {
+ /* error code already set */
+ return SECFailure;
+ }
+ return pqg_ParamGen(L, N, FIPS186_3_ST_TYPE, seedBytes, pParams, pVfy);
+}
+
+/*
+ * verify can use vfy structures returned from either FIPS186-1 or
+ * FIPS186-2, and can handle differences in selected Hash functions to
+ * generate the parameters.
+ */
+SECStatus
+PQG_VerifyParams(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int g, n, L, N, offset, outlen;
+ mp_int p0, P, Q, G, P_, Q_, G_, r, h;
+ mp_err err = MP_OKAY;
+ int j;
+ unsigned int counter_max = 0; /* handle legacy L < 1024 */
+ unsigned int qseed_len;
+ unsigned int qgen_counter_ = 0;
+ SECItem pseed_ = { 0, 0, 0 };
+ HASH_HashType hashtype = HASH_AlgNULL;
+ pqgGenType type = FIPS186_1_TYPE;
+
+#define CHECKPARAM(cond) \
+ if (!(cond)) { \
+ *result = SECFailure; \
+ goto cleanup; \
+ }
+ if (!params || !vfy || !result) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* always need at least p, q, and seed for any meaningful check */
+ if ((params->prime.len == 0) || (params->subPrime.len == 0) ||
+ (vfy->seed.len == 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* we want to either check PQ or G or both. If we don't have G, make
+ * sure we have count so we can check P. */
+ if ((params->base.len == 0) && (vfy->counter == -1)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p0) = 0;
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&G) = 0;
+ MP_DIGITS(&P_) = 0;
+ MP_DIGITS(&Q_) = 0;
+ MP_DIGITS(&G_) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&h) = 0;
+ CHECK_MPI_OK(mp_init(&p0));
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&G));
+ CHECK_MPI_OK(mp_init(&P_));
+ CHECK_MPI_OK(mp_init(&Q_));
+ CHECK_MPI_OK(mp_init(&G_));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&h));
+ *result = SECSuccess;
+ SECITEM_TO_MPINT(params->prime, &P);
+ SECITEM_TO_MPINT(params->subPrime, &Q);
+ /* if G isn't specified, just check P and Q */
+ if (params->base.len != 0) {
+ SECITEM_TO_MPINT(params->base, &G);
+ }
+ /* 1. Check (L,N) pair */
+ N = mpl_significant_bits(&Q);
+ L = mpl_significant_bits(&P);
+ if (L < 1024) {
+ /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+ CHECKPARAM(N == DSA1_Q_BITS);
+ j = PQG_PBITS_TO_INDEX(L);
+ CHECKPARAM(j >= 0 && j <= 8);
+ counter_max = 4096;
+ } else {
+ /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+ CHECKPARAM(pqg_validate_dsa2(L, N) == SECSuccess);
+ counter_max = 4 * L;
+ }
+ /* 3. G < P */
+ if (params->base.len != 0) {
+ CHECKPARAM(mp_cmp(&G, &P) < 0);
+ }
+ /* 4. P % Q == 1 */
+ CHECK_MPI_OK(mp_mod(&P, &Q, &r));
+ CHECKPARAM(mp_cmp_d(&r, 1) == 0);
+ /* 5. Q is prime */
+ CHECKPARAM(mpp_pprime_secure(&Q, prime_testcount_q(L, N)) == MP_YES);
+ /* 6. P is prime */
+ CHECKPARAM(mpp_pprime_secure(&P, prime_testcount_p(L, N)) == MP_YES);
+ /* Steps 7-12 are done only if the optional PQGVerify is supplied. */
+ /* continue processing P */
+ /* 7. counter < 4*L */
+ /* 8. g >= N and g < 2*L (g is length of seed in bits) */
+ /* step 7 and 8 are delayed until we determine which type of generation
+ * was used */
+ /* 9. Q generated from SEED matches Q in PQGParams. */
+ /* This function checks all possible hash and generation types to
+ * find a Q_ which matches Q. */
+ g = vfy->seed.len * 8;
+ CHECKPARAM(findQfromSeed(L, N, g, &vfy->seed, &Q, &Q_, &qseed_len,
+ &hashtype, &type, &qgen_counter_) == SECSuccess);
+ CHECKPARAM(mp_cmp(&Q, &Q_) == 0);
+ /* now we can do steps 7 & 8*/
+ if ((type == FIPS186_1_TYPE) || (type == FIPS186_3_TYPE)) {
+ CHECKPARAM((vfy->counter == -1) || (vfy->counter < counter_max));
+ CHECKPARAM(g >= N && g < counter_max / 2);
+ }
+ if (type == FIPS186_3_ST_TYPE) {
+ SECItem qseed = { 0, 0, 0 };
+ SECItem pseed = { 0, 0, 0 };
+ unsigned int first_seed_len;
+ unsigned int pgen_counter_ = 0;
+ unsigned int qgen_counter = (vfy->counter >> 16) & 0xffff;
+ unsigned int pgen_counter = (vfy->counter) & 0xffff;
+
+ /* extract pseed and qseed from domain_parameter_seed, which is
+ * first_seed || pseed || qseed. qseed is first_seed + small_integer
+ * mod the length of first_seed. pseed is qseed + small_integer mod
+ * the length of first_seed. This means most of the time
+ * first_seed.len == qseed.len == pseed.len. Rarely qseed.len and/or
+ * pseed.len will be smaller because mpi clamps them. pqgGen
+ * automatically adds the zero pad back though, so we can depend
+ * domain_parameter_seed.len to be a multiple of three. We only have
+ * to deal with the fact that the returned seeds from our functions
+ * could be shorter.
+ * first_seed.len = domain_parameter_seed.len/3
+ * We can now find the offsets;
+ * first_seed.data = domain_parameter_seed.data + 0
+ * pseed.data = domain_parameter_seed.data + first_seed.len
+ * qseed.data = domain_parameter_seed.data
+ * + domain_paramter_seed.len - qseed.len
+ * We deal with pseed possibly having zero pad in the pseed check later.
+ */
+ first_seed_len = vfy->seed.len / 3;
+ CHECKPARAM(qseed_len < vfy->seed.len);
+ CHECKPARAM(first_seed_len * 8 > N - 1);
+ CHECKPARAM(first_seed_len * 8 < counter_max / 2);
+ CHECKPARAM(first_seed_len >= qseed_len);
+ qseed.len = qseed_len;
+ qseed.data = vfy->seed.data + vfy->seed.len - qseed.len;
+ pseed.len = first_seed_len;
+ pseed.data = vfy->seed.data + first_seed_len;
+
+ /*
+ * now complete FIPS 186-3 A.1.2.1.2. Step 1 was completed
+ * above in our initial checks, Step 2 was completed by
+ * findQfromSeed */
+
+ /* Step 3 (status, c0, prime_seed, prime_gen_counter) =
+ ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+ */
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+ &qseed, &p0, &pseed_, &pgen_counter_));
+ /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+ CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, first_seed_len * 8,
+ &p0, &Q_, &P_, &pseed_, &pgen_counter_));
+ CHECKPARAM(mp_cmp(&P, &P_) == 0);
+ /* make sure pseed wasn't tampered with (since it is part of
+ * calculating G) */
+ if (pseed.len > pseed_.len) {
+ /* handle the case of zero pad for pseed */
+ int extra = pseed.len - pseed_.len;
+ int i;
+ for (i = 0; i < extra; i++) {
+ if (pseed.data[i] != 0) {
+ *result = SECFailure;
+ goto cleanup;
+ }
+ }
+ pseed.data += extra;
+ pseed.len -= extra;
+ /* the rest is handled in the normal compare below */
+ }
+ CHECKPARAM(SECITEM_CompareItem(&pseed, &pseed_) == SECEqual);
+ if (vfy->counter != -1) {
+ CHECKPARAM(pgen_counter < counter_max);
+ CHECKPARAM(qgen_counter < counter_max);
+ CHECKPARAM((pgen_counter_ == pgen_counter));
+ CHECKPARAM((qgen_counter_ == qgen_counter));
+ }
+ } else if (vfy->counter == -1) {
+ /* If counter is set to -1, we are really only verifying G, skip
+ * the remainder of the checks for P */
+ CHECKPARAM(type != FIPS186_1_TYPE); /* we only do this for DSA2 */
+ } else {
+ /* 10. P generated from (L, counter, g, SEED, Q) matches P
+ * in PQGParams. */
+ outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+ PORT_Assert(outlen > 0);
+ n = (L - 1) / outlen;
+ offset = vfy->counter * (n + 1) + ((type == FIPS186_1_TYPE) ? 2 : 1);
+ CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, g, &vfy->seed,
+ &Q, &P_));
+ CHECKPARAM(mp_cmp(&P, &P_) == 0);
+ }
+
+ /* now check G, skip if don't have a g */
+ if (params->base.len == 0)
+ goto cleanup;
+
+ /* first Always check that G is OK FIPS186-3 A.2.2 & A.2.4*/
+ /* 1. 2 < G < P-1 */
+ /* P is prime, p-1 == zero 1st bit */
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+ CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P) < 0);
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+ /* 2. verify g**q mod p == 1 */
+ CHECK_MPI_OK(mp_exptmod(&G, &Q, &P, &h)); /* h = G ** Q mod P */
+ CHECKPARAM(mp_cmp_d(&h, 1) == 0);
+
+ /* no h, the above is the best we can do */
+ if (vfy->h.len == 0) {
+ if (type != FIPS186_1_TYPE) {
+ *result = SECWouldBlock;
+ }
+ goto cleanup;
+ }
+
+ /*
+ * If h is one byte and FIPS186-3 was used to generate Q (we've verified
+ * Q was generated from seed already, then we assume that FIPS 186-3
+ * appendix A.2.3 was used to generate G. Otherwise we assume A.2.1 was
+ * used to generate G.
+ */
+ if ((vfy->h.len == 1) && (type != FIPS186_1_TYPE)) {
+ /* A.2.3 */
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &vfy->seed,
+ vfy->h.data[0], &G_));
+ CHECKPARAM(mp_cmp(&G, &G_) == 0);
+ } else {
+ int passed;
+ /* A.2.1 */
+ SECITEM_TO_MPINT(vfy->h, &h);
+ /* 11. 1 < h < P-1 */
+ /* P is prime, p-1 == zero 1st bit */
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+ CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P));
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+ /* 12. G generated from h matches G in PQGParams. */
+ CHECK_SEC_OK(makeGfromH(&P, &Q, &h, &G_, &passed));
+ CHECKPARAM(passed && mp_cmp(&G, &G_) == 0);
+ }
+cleanup:
+ mp_clear(&p0);
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&G);
+ mp_clear(&P_);
+ mp_clear(&Q_);
+ mp_clear(&G_);
+ mp_clear(&r);
+ mp_clear(&h);
+ if (pseed_.data) {
+ SECITEM_ZfreeItem(&pseed_, PR_FALSE);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/**************************************************************************
+ * Free the PQGParams struct and the things it points to. *
+ **************************************************************************/
+void
+PQG_DestroyParams(PQGParams *params)
+{
+ if (params == NULL)
+ return;
+ if (params->arena != NULL) {
+ PORT_FreeArena(params->arena, PR_TRUE);
+ } else {
+ SECITEM_ZfreeItem(&params->prime, PR_FALSE); /* don't free prime */
+ SECITEM_ZfreeItem(&params->subPrime, PR_FALSE); /* don't free subPrime */
+ SECITEM_ZfreeItem(&params->base, PR_FALSE); /* don't free base */
+ PORT_Free(params);
+ }
+}
+
+/**************************************************************************
+ * Free the PQGVerify struct and the things it points to. *
+ **************************************************************************/
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+ if (vfy == NULL)
+ return;
+ if (vfy->arena != NULL) {
+ PORT_FreeArena(vfy->arena, PR_TRUE);
+ } else {
+ SECITEM_ZfreeItem(&vfy->seed, PR_FALSE); /* don't free seed */
+ SECITEM_ZfreeItem(&vfy->h, PR_FALSE); /* don't free h */
+ PORT_Free(vfy);
+ }
+}
diff --git a/security/nss/lib/freebl/pqg.h b/security/nss/lib/freebl/pqg.h
new file mode 100644
index 0000000000..bb6696d469
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * pqg.h
+ *
+ * header file for pqg functions exported just to freebl
+ */
+
+#ifndef _PQG_H_
+#define _PQG_H_ 1
+
+SECStatus
+PQG_HashBuf(HASH_HashType type, unsigned char *dest,
+ const unsigned char *src, PRUint32 src_len);
+/* PQG_GetLength returns the significant bytes in the SECItem object (that is
+ * the length of the object minus any leading zeros. Any SECItem may be used,
+ * though this function is usually used for P, Q, or G values */
+unsigned int PQG_GetLength(const SECItem *obj);
+/* Check to see the PQG parameters patch a NIST defined DSA size,
+ * returns SECFaillure and sets SEC_ERROR_INVALID_ARGS if it doesn't.
+ * See blapi.h for legal DSA PQG sizes. */
+SECStatus PQG_Check(const PQGParams *params);
+/* Return the prefered hash algorithm for the given PQGParameters. */
+HASH_HashType PQG_GetHashType(const PQGParams *params);
+
+#endif /* _PQG_H_ */
diff --git a/security/nss/lib/freebl/rawhash.c b/security/nss/lib/freebl/rawhash.c
new file mode 100644
index 0000000000..07a34cf725
--- /dev/null
+++ b/security/nss/lib/freebl/rawhash.c
@@ -0,0 +1,198 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "nspr.h"
+#include "hasht.h"
+#include "blapi.h" /* below the line */
+#include "secerr.h"
+
+static void *
+null_hash_new_context(void)
+{
+ return NULL;
+}
+
+static void *
+null_hash_clone_context(void *v)
+{
+ PORT_Assert(v == NULL);
+ return NULL;
+}
+
+static void
+null_hash_begin(void *v)
+{
+}
+
+static void
+null_hash_update(void *v, const unsigned char *input, unsigned int length)
+{
+}
+
+static void
+null_hash_end(void *v, unsigned char *output, unsigned int *outLen,
+ unsigned int maxOut)
+{
+ *outLen = 0;
+}
+
+static void
+null_hash_destroy_context(void *v, PRBool b)
+{
+ PORT_Assert(v == NULL);
+}
+
+const SECHashObject SECRawHashObjects[] = {
+ { 0,
+ (void *(*)(void))null_hash_new_context,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))null_hash_destroy_context,
+ (void (*)(void *))null_hash_begin,
+ (void (*)(void *, const unsigned char *, unsigned int))null_hash_update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))null_hash_end,
+ 0,
+ HASH_AlgNULL,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))null_hash_end },
+ {
+ MD2_LENGTH,
+ (void *(*)(void))MD2_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))MD2_DestroyContext,
+ (void (*)(void *))MD2_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))MD2_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD2_End,
+ MD2_BLOCK_LENGTH,
+ HASH_AlgMD2,
+ NULL /* end_raw */
+ },
+ { MD5_LENGTH,
+ (void *(*)(void))MD5_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))MD5_DestroyContext,
+ (void (*)(void *))MD5_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))MD5_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_End,
+ MD5_BLOCK_LENGTH,
+ HASH_AlgMD5,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_EndRaw },
+ { SHA1_LENGTH,
+ (void *(*)(void))SHA1_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA1_DestroyContext,
+ (void (*)(void *))SHA1_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA1_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))SHA1_End,
+ SHA1_BLOCK_LENGTH,
+ HASH_AlgSHA1,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))
+ SHA1_EndRaw },
+ { SHA256_LENGTH,
+ (void *(*)(void))SHA256_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA256_DestroyContext,
+ (void (*)(void *))SHA256_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA256_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA256_End,
+ SHA256_BLOCK_LENGTH,
+ HASH_AlgSHA256,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA256_EndRaw },
+ { SHA384_LENGTH,
+ (void *(*)(void))SHA384_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA384_DestroyContext,
+ (void (*)(void *))SHA384_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA384_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA384_End,
+ SHA384_BLOCK_LENGTH,
+ HASH_AlgSHA384,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA384_EndRaw },
+ { SHA512_LENGTH,
+ (void *(*)(void))SHA512_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA512_DestroyContext,
+ (void (*)(void *))SHA512_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA512_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA512_End,
+ SHA512_BLOCK_LENGTH,
+ HASH_AlgSHA512,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA512_EndRaw },
+ { SHA224_LENGTH,
+ (void *(*)(void))SHA224_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA224_DestroyContext,
+ (void (*)(void *))SHA224_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA224_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA224_End,
+ SHA224_BLOCK_LENGTH,
+ HASH_AlgSHA224,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA224_EndRaw },
+ { SHA3_224_LENGTH,
+ (void *(*)(void))SHA3_224_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA3_224_DestroyContext,
+ (void (*)(void *))SHA3_224_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA3_224_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA3_224_End,
+ SHA3_224_BLOCK_LENGTH,
+ HASH_AlgSHA3_224,
+ NULL },
+ { SHA3_256_LENGTH,
+ (void *(*)(void))SHA3_256_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA3_256_DestroyContext,
+ (void (*)(void *))SHA3_256_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA3_256_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA3_256_End,
+ SHA3_256_BLOCK_LENGTH,
+ HASH_AlgSHA3_256,
+ NULL },
+ { SHA3_384_LENGTH,
+ (void *(*)(void))SHA3_384_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA3_384_DestroyContext,
+ (void (*)(void *))SHA3_384_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA3_384_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA3_384_End,
+ SHA3_384_BLOCK_LENGTH,
+ HASH_AlgSHA3_384,
+ NULL },
+ { SHA3_512_LENGTH,
+ (void *(*)(void))SHA3_512_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA3_512_DestroyContext,
+ (void (*)(void *))SHA3_512_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA3_512_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA3_512_End,
+ SHA3_512_BLOCK_LENGTH,
+ HASH_AlgSHA3_512,
+ NULL },
+};
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+ if (hashType <= HASH_AlgNULL || hashType >= HASH_AlgTOTAL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ return &SECRawHashObjects[hashType];
+}
diff --git a/security/nss/lib/freebl/ret_cr16.s b/security/nss/lib/freebl/ret_cr16.s
new file mode 100644
index 0000000000..1f53fc9007
--- /dev/null
+++ b/security/nss/lib/freebl/ret_cr16.s
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef __LP64__
+ .LEVEL 2.0W
+#else
+ .LEVEL 1.1
+#endif
+
+ .CODE ; equivalent to the following two lines
+; .SPACE $TEXT$,SORT=8
+; .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+ret_cr16
+ .PROC
+ .CALLINFO FRAME=0, NO_CALLS
+ .EXPORT ret_cr16,ENTRY
+ .ENTRY
+; BV %r0(%rp)
+ BV 0(%rp)
+ MFCTL %cr16,%ret0
+ BV %r0(%rp)
+ .EXIT
+ NOP
+ .PROCEND
+ .END
diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c
new file mode 100644
index 0000000000..82b1f419d9
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.c
@@ -0,0 +1,1265 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prenv.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "rijndael.h"
+
+#include "cts.h"
+#include "ctr.h"
+#include "gcm.h"
+#include "mpi.h"
+
+#if !defined(IS_LITTLE_ENDIAN) && !defined(NSS_X86_OR_X64)
+// not test yet on big endian platform of arm
+#undef USE_HW_AES
+#endif
+
+#ifdef __powerpc64__
+#include "ppc-crypto.h"
+#endif
+
+#ifdef USE_HW_AES
+#ifdef NSS_X86_OR_X64
+#include "intel-aes.h"
+#else
+#include "aes-armv8.h"
+#endif
+#endif /* USE_HW_AES */
+#ifdef INTEL_GCM
+#include "intel-gcm.h"
+#endif /* INTEL_GCM */
+#if defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
+#include "ppc-gcm.h"
+#endif
+
+/* Forward declarations */
+void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
+ unsigned int Nk);
+void rijndael_native_encryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input);
+void rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input);
+void native_xorBlock(unsigned char *out,
+ const unsigned char *a,
+ const unsigned char *b);
+
+/* Stub definitions for the above rijndael_native_* functions, which
+ * shouldn't be used unless NSS_X86_OR_X64 is defined */
+#ifndef NSS_X86_OR_X64
+void
+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
+ unsigned int Nk)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+rijndael_native_encryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+rijndael_native_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+native_xorBlock(unsigned char *out, const unsigned char *a,
+ const unsigned char *b)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+#endif /* NSS_X86_OR_X64 */
+
+/*
+ * There are currently three ways to build this code, varying in performance
+ * and code size.
+ *
+ * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab
+ * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table
+ * values "on-the-fly", using gfm
+ * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros
+ *
+ * The default is RIJNDAEL_INCLUDE_TABLES.
+ */
+
+/*
+ * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
+ * T**-1[0..4], IMXC[0..4]
+ * When building anything else, includes S, S**-1, Rcon
+ */
+#include "rijndael32.tab"
+
+#if defined(RIJNDAEL_INCLUDE_TABLES)
+/*
+ * RIJNDAEL_INCLUDE_TABLES
+ */
+#define T0(i) _T0[i]
+#define T1(i) _T1[i]
+#define T2(i) _T2[i]
+#define T3(i) _T3[i]
+#define TInv0(i) _TInv0[i]
+#define TInv1(i) _TInv1[i]
+#define TInv2(i) _TInv2[i]
+#define TInv3(i) _TInv3[i]
+#define IMXC0(b) _IMXC0[b]
+#define IMXC1(b) _IMXC1[b]
+#define IMXC2(b) _IMXC2[b]
+#define IMXC3(b) _IMXC3[b]
+/* The S-box can be recovered from the T-tables */
+#ifdef IS_LITTLE_ENDIAN
+#define SBOX(b) ((PRUint8)_T3[b])
+#else
+#define SBOX(b) ((PRUint8)_T1[b])
+#endif
+#define SINV(b) (_SInv[b])
+
+#else /* not RIJNDAEL_INCLUDE_TABLES */
+
+/*
+ * Code for generating T-table values.
+ */
+
+#ifdef IS_LITTLE_ENDIAN
+#define WORD4(b0, b1, b2, b3) \
+ ((((PRUint32)b3) << 24) | \
+ (((PRUint32)b2) << 16) | \
+ (((PRUint32)b1) << 8) | \
+ ((PRUint32)b0))
+#else
+#define WORD4(b0, b1, b2, b3) \
+ ((((PRUint32)b0) << 24) | \
+ (((PRUint32)b1) << 16) | \
+ (((PRUint32)b2) << 8) | \
+ ((PRUint32)b3))
+#endif
+
+/*
+ * Define the S and S**-1 tables (both have been stored)
+ */
+#define SBOX(b) (_S[b])
+#define SINV(b) (_SInv[b])
+
+/*
+ * The function xtime, used for Galois field multiplication
+ */
+#define XTIME(a) \
+ ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
+
+/* Choose GFM method (macros or function) */
+#if defined(RIJNDAEL_GENERATE_VALUES_MACRO)
+
+/*
+ * Galois field GF(2**8) multipliers, in macro form
+ */
+#define GFM01(a) \
+ (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+ (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM04(a) \
+ (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
+#define GFM08(a) \
+ (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
+#define GFM03(a) \
+ (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
+#define GFM09(a) \
+ (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
+#define GFM0B(a) \
+ (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
+#define GFM0D(a) \
+ (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
+#define GFM0E(a) \
+ (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
+
+#else /* RIJNDAEL_GENERATE_VALUES */
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gfm(PRUint8 a, PRUint8 b)
+{
+ PRUint8 res = 0;
+ while (b > 0) {
+ res = (b & 0x01) ? res ^ a : res;
+ a = XTIME(a);
+ b >>= 1;
+ }
+ return res;
+}
+
+#define GFM01(a) \
+ (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+ (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM03(a) \
+ (gfm(a, 0x03)) /* a * 03 */
+#define GFM09(a) \
+ (gfm(a, 0x09)) /* a * 09 */
+#define GFM0B(a) \
+ (gfm(a, 0x0B)) /* a * 0B */
+#define GFM0D(a) \
+ (gfm(a, 0x0D)) /* a * 0D */
+#define GFM0E(a) \
+ (gfm(a, 0x0E)) /* a * 0E */
+
+#endif /* choosing GFM function */
+
+/*
+ * The T-tables
+ */
+#define G_T0(i) \
+ (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
+#define G_T1(i) \
+ (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
+#define G_T2(i) \
+ (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
+#define G_T3(i) \
+ (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
+
+/*
+ * The inverse T-tables
+ */
+#define G_TInv0(i) \
+ (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
+#define G_TInv1(i) \
+ (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
+#define G_TInv2(i) \
+ (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
+#define G_TInv3(i) \
+ (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
+
+/*
+ * The inverse mix column tables
+ */
+#define G_IMXC0(i) \
+ (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
+#define G_IMXC1(i) \
+ (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
+#define G_IMXC2(i) \
+ (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
+#define G_IMXC3(i) \
+ (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
+
+/* Now choose the T-table indexing method */
+#if defined(RIJNDAEL_GENERATE_VALUES)
+/* generate values for the tables with a function*/
+static PRUint32
+gen_TInvXi(PRUint8 tx, PRUint8 i)
+{
+ PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
+ si01 = SINV(i);
+ si02 = XTIME(si01);
+ si04 = XTIME(si02);
+ si08 = XTIME(si04);
+ si03 = si02 ^ si01;
+ si09 = si08 ^ si01;
+ si0B = si08 ^ si03;
+ si0D = si09 ^ si04;
+ si0E = si08 ^ si04 ^ si02;
+ switch (tx) {
+ case 0:
+ return WORD4(si0E, si09, si0D, si0B);
+ case 1:
+ return WORD4(si0B, si0E, si09, si0D);
+ case 2:
+ return WORD4(si0D, si0B, si0E, si09);
+ case 3:
+ return WORD4(si09, si0D, si0B, si0E);
+ }
+ return -1;
+}
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) gen_TInvXi(0, i)
+#define TInv1(i) gen_TInvXi(1, i)
+#define TInv2(i) gen_TInvXi(2, i)
+#define TInv3(i) gen_TInvXi(3, i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#else /* RIJNDAEL_GENERATE_VALUES_MACRO */
+/* generate values for the tables with macros */
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) G_TInv0(i)
+#define TInv1(i) G_TInv1(i)
+#define TInv2(i) G_TInv2(i)
+#define TInv3(i) G_TInv3(i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#endif /* choose T-table indexing method */
+
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+/**************************************************************************
+ *
+ * Stuff related to the Rijndael key schedule
+ *
+ *************************************************************************/
+
+#define SUBBYTE(w) \
+ ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
+ (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
+ (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \
+ (((PRUint32)SBOX((w)&0xff))))
+
+#ifdef IS_LITTLE_ENDIAN
+#define ROTBYTE(b) \
+ ((b >> 8) | (b << 24))
+#else
+#define ROTBYTE(b) \
+ ((b << 8) | (b >> 24))
+#endif
+
+/* rijndael_key_expansion7
+ *
+ * Generate the expanded key from the key input by the user.
+ * XXX
+ * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte
+ * transformation is done periodically. The period is every 4 bytes, and
+ * since 7%4 != 0 this happens at different times for each key word (unlike
+ * Nk == 8 where it happens twice in every key word, in the same positions).
+ * For now, I'm implementing this case "dumbly", w/o any unrolling.
+ */
+static void
+rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int i;
+ PRUint32 *W;
+ PRUint32 *pW;
+ PRUint32 tmp;
+ W = cx->k.expandedKey;
+ /* 1. the first Nk words contain the cipher key */
+ memcpy(W, key, Nk * 4);
+ i = Nk;
+ /* 2. loop until full expanded key is obtained */
+ pW = W + i - 1;
+ for (; i < cx->Nb * (cx->Nr + 1); ++i) {
+ tmp = *pW++;
+ if (i % Nk == 0)
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ else if (i % Nk == 4)
+ tmp = SUBBYTE(tmp);
+ *pW = W[i - Nk] ^ tmp;
+ }
+}
+
+/* rijndael_key_expansion
+ *
+ * Generate the expanded key from the key input by the user.
+ */
+static void
+rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int i;
+ PRUint32 *W;
+ PRUint32 *pW;
+ PRUint32 tmp;
+ unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
+ if (Nk == 7) {
+ rijndael_key_expansion7(cx, key, Nk);
+ return;
+ }
+ W = cx->k.expandedKey;
+ /* The first Nk words contain the input cipher key */
+ memcpy(W, key, Nk * 4);
+ i = Nk;
+ pW = W + i - 1;
+ /* Loop over all sets of Nk words, except the last */
+ while (i < round_key_words - Nk) {
+ tmp = *pW++;
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ if (Nk == 4)
+ continue;
+ switch (Nk) {
+ case 8:
+ tmp = *pW++;
+ tmp = SUBBYTE(tmp);
+ *pW = W[i++ - Nk] ^ tmp;
+ case 7:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ case 6:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ case 5:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ }
+ }
+ /* Generate the last word */
+ tmp = *pW++;
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ *pW = W[i++ - Nk] ^ tmp;
+ /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However,
+ * since the above loop generated all but the last Nk key words, there
+ * is no more need for the SubByte transformation.
+ */
+ if (Nk < 8) {
+ for (; i < round_key_words; ++i) {
+ tmp = *pW++;
+ *pW = W[i - Nk] ^ tmp;
+ }
+ } else {
+ /* except in the case when Nk == 8. Then one more SubByte may have
+ * to be performed, at i % Nk == 4.
+ */
+ for (; i < round_key_words; ++i) {
+ tmp = *pW++;
+ if (i % Nk == 4)
+ tmp = SUBBYTE(tmp);
+ *pW = W[i - Nk] ^ tmp;
+ }
+ }
+}
+
+/* rijndael_invkey_expansion
+ *
+ * Generate the expanded key for the inverse cipher from the key input by
+ * the user.
+ */
+static void
+rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int r;
+ PRUint32 *roundkeyw;
+ PRUint8 *b;
+ int Nb = cx->Nb;
+ /* begins like usual key expansion ... */
+ rijndael_key_expansion(cx, key, Nk);
+ /* ... but has the additional step of InvMixColumn,
+ * excepting the first and last round keys.
+ */
+ roundkeyw = cx->k.expandedKey + cx->Nb;
+ for (r = 1; r < cx->Nr; ++r) {
+ /* each key word, roundkeyw, represents a column in the key
+ * matrix. Each column is multiplied by the InvMixColumn matrix.
+ * [ 0E 0B 0D 09 ] [ b0 ]
+ * [ 09 0E 0B 0D ] * [ b1 ]
+ * [ 0D 09 0E 0B ] [ b2 ]
+ * [ 0B 0D 09 0E ] [ b3 ]
+ */
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ if (Nb <= 4)
+ continue;
+ switch (Nb) {
+ case 8:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 7:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 6:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 5:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ }
+ }
+}
+
+/**************************************************************************
+ *
+ * Stuff related to Rijndael encryption/decryption.
+ *
+ *************************************************************************/
+
+#ifdef IS_LITTLE_ENDIAN
+#define BYTE0WORD(w) ((w)&0x000000ff)
+#define BYTE1WORD(w) ((w)&0x0000ff00)
+#define BYTE2WORD(w) ((w)&0x00ff0000)
+#define BYTE3WORD(w) ((w)&0xff000000)
+#else
+#define BYTE0WORD(w) ((w)&0xff000000)
+#define BYTE1WORD(w) ((w)&0x00ff0000)
+#define BYTE2WORD(w) ((w)&0x0000ff00)
+#define BYTE3WORD(w) ((w)&0x000000ff)
+#endif
+
+typedef union {
+ PRUint32 w[4];
+ PRUint8 b[16];
+} rijndael_state;
+
+#define COLUMN_0(state) state.w[0]
+#define COLUMN_1(state) state.w[1]
+#define COLUMN_2(state) state.w[2]
+#define COLUMN_3(state) state.w[3]
+
+#define STATE_BYTE(i) state.b[i]
+
+// out = a ^ b
+inline static void
+xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)
+{
+ for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {
+ (out)[j] = (a)[j] ^ (b)[j];
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+rijndael_encryptBlock128(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ unsigned int r;
+ PRUint32 *roundkeyw;
+ rijndael_state state;
+ PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+ unsigned char *pIn, *pOut;
+ PRUint32 inBuf[4], outBuf[4];
+
+ if ((ptrdiff_t)input & 0x3) {
+ memcpy(inBuf, input, sizeof inBuf);
+ pIn = (unsigned char *)inBuf;
+ } else {
+ pIn = (unsigned char *)input;
+ }
+ if ((ptrdiff_t)output & 0x3) {
+ pOut = (unsigned char *)outBuf;
+ } else {
+ pOut = (unsigned char *)output;
+ }
+#endif
+ roundkeyw = cx->k.expandedKey;
+ /* Step 1: Add Round Key 0 to initial state */
+ COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
+ COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
+ COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
+ COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
+ /* Step 2: Loop over rounds [1..NR-1] */
+ for (r = 1; r < cx->Nr; ++r) {
+ /* Do ShiftRow, ByteSub, and MixColumn all at once */
+ C0 = T0(STATE_BYTE(0)) ^
+ T1(STATE_BYTE(5)) ^
+ T2(STATE_BYTE(10)) ^
+ T3(STATE_BYTE(15));
+ C1 = T0(STATE_BYTE(4)) ^
+ T1(STATE_BYTE(9)) ^
+ T2(STATE_BYTE(14)) ^
+ T3(STATE_BYTE(3));
+ C2 = T0(STATE_BYTE(8)) ^
+ T1(STATE_BYTE(13)) ^
+ T2(STATE_BYTE(2)) ^
+ T3(STATE_BYTE(7));
+ C3 = T0(STATE_BYTE(12)) ^
+ T1(STATE_BYTE(1)) ^
+ T2(STATE_BYTE(6)) ^
+ T3(STATE_BYTE(11));
+ /* Round key addition */
+ COLUMN_0(state) = C0 ^ *roundkeyw++;
+ COLUMN_1(state) = C1 ^ *roundkeyw++;
+ COLUMN_2(state) = C2 ^ *roundkeyw++;
+ COLUMN_3(state) = C3 ^ *roundkeyw++;
+ }
+ /* Step 3: Do the last round */
+ /* Final round does not employ MixColumn */
+ C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
+ (BYTE1WORD(T3(STATE_BYTE(5)))) |
+ (BYTE2WORD(T0(STATE_BYTE(10)))) |
+ (BYTE3WORD(T1(STATE_BYTE(15))))) ^
+ *roundkeyw++;
+ C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
+ (BYTE1WORD(T3(STATE_BYTE(9)))) |
+ (BYTE2WORD(T0(STATE_BYTE(14)))) |
+ (BYTE3WORD(T1(STATE_BYTE(3))))) ^
+ *roundkeyw++;
+ C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
+ (BYTE1WORD(T3(STATE_BYTE(13)))) |
+ (BYTE2WORD(T0(STATE_BYTE(2)))) |
+ (BYTE3WORD(T1(STATE_BYTE(7))))) ^
+ *roundkeyw++;
+ C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
+ (BYTE1WORD(T3(STATE_BYTE(1)))) |
+ (BYTE2WORD(T0(STATE_BYTE(6)))) |
+ (BYTE3WORD(T1(STATE_BYTE(11))))) ^
+ *roundkeyw++;
+ *((PRUint32 *)pOut) = C0;
+ *((PRUint32 *)(pOut + 4)) = C1;
+ *((PRUint32 *)(pOut + 8)) = C2;
+ *((PRUint32 *)(pOut + 12)) = C3;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+ if ((ptrdiff_t)output & 0x3) {
+ memcpy(output, outBuf, sizeof outBuf);
+ }
+#endif
+}
+
+static void NO_SANITIZE_ALIGNMENT
+rijndael_decryptBlock128(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ int r;
+ PRUint32 *roundkeyw;
+ rijndael_state state;
+ PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+ unsigned char *pIn, *pOut;
+ PRUint32 inBuf[4], outBuf[4];
+
+ if ((ptrdiff_t)input & 0x3) {
+ memcpy(inBuf, input, sizeof inBuf);
+ pIn = (unsigned char *)inBuf;
+ } else {
+ pIn = (unsigned char *)input;
+ }
+ if ((ptrdiff_t)output & 0x3) {
+ pOut = (unsigned char *)outBuf;
+ } else {
+ pOut = (unsigned char *)output;
+ }
+#endif
+ roundkeyw = cx->k.expandedKey + cx->Nb * cx->Nr + 3;
+ /* reverse the final key addition */
+ COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
+ COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
+ COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
+ COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
+ /* Loop over rounds in reverse [NR..1] */
+ for (r = cx->Nr; r > 1; --r) {
+ /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
+ C0 = TInv0(STATE_BYTE(0)) ^
+ TInv1(STATE_BYTE(13)) ^
+ TInv2(STATE_BYTE(10)) ^
+ TInv3(STATE_BYTE(7));
+ C1 = TInv0(STATE_BYTE(4)) ^
+ TInv1(STATE_BYTE(1)) ^
+ TInv2(STATE_BYTE(14)) ^
+ TInv3(STATE_BYTE(11));
+ C2 = TInv0(STATE_BYTE(8)) ^
+ TInv1(STATE_BYTE(5)) ^
+ TInv2(STATE_BYTE(2)) ^
+ TInv3(STATE_BYTE(15));
+ C3 = TInv0(STATE_BYTE(12)) ^
+ TInv1(STATE_BYTE(9)) ^
+ TInv2(STATE_BYTE(6)) ^
+ TInv3(STATE_BYTE(3));
+ /* Invert the key addition step */
+ COLUMN_3(state) = C3 ^ *roundkeyw--;
+ COLUMN_2(state) = C2 ^ *roundkeyw--;
+ COLUMN_1(state) = C1 ^ *roundkeyw--;
+ COLUMN_0(state) = C0 ^ *roundkeyw--;
+ }
+ /* inverse sub */
+ pOut[0] = SINV(STATE_BYTE(0));
+ pOut[1] = SINV(STATE_BYTE(13));
+ pOut[2] = SINV(STATE_BYTE(10));
+ pOut[3] = SINV(STATE_BYTE(7));
+ pOut[4] = SINV(STATE_BYTE(4));
+ pOut[5] = SINV(STATE_BYTE(1));
+ pOut[6] = SINV(STATE_BYTE(14));
+ pOut[7] = SINV(STATE_BYTE(11));
+ pOut[8] = SINV(STATE_BYTE(8));
+ pOut[9] = SINV(STATE_BYTE(5));
+ pOut[10] = SINV(STATE_BYTE(2));
+ pOut[11] = SINV(STATE_BYTE(15));
+ pOut[12] = SINV(STATE_BYTE(12));
+ pOut[13] = SINV(STATE_BYTE(9));
+ pOut[14] = SINV(STATE_BYTE(6));
+ pOut[15] = SINV(STATE_BYTE(3));
+ /* final key addition */
+ *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
+ *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
+ *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
+ *((PRUint32 *)pOut) ^= *roundkeyw--;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+ if ((ptrdiff_t)output & 0x3) {
+ memcpy(output, outBuf, sizeof outBuf);
+ }
+#endif
+}
+
+/**************************************************************************
+ *
+ * Rijndael modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+static SECStatus
+rijndael_encryptECB(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRBool aesni = aesni_support();
+ while (inputLen > 0) {
+ if (aesni) {
+ rijndael_native_encryptBlock(cx, output, input);
+ } else {
+ rijndael_encryptBlock128(cx, output, input);
+ }
+ output += AES_BLOCK_SIZE;
+ input += AES_BLOCK_SIZE;
+ inputLen -= AES_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_encryptCBC(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned char *lastblock = cx->iv;
+ unsigned char inblock[AES_BLOCK_SIZE * 8];
+ PRBool aesni = aesni_support();
+
+ if (!inputLen)
+ return SECSuccess;
+ while (inputLen > 0) {
+ if (aesni) {
+ /* XOR with the last block (IV if first block) */
+ native_xorBlock(inblock, input, lastblock);
+ /* encrypt */
+ rijndael_native_encryptBlock(cx, output, inblock);
+ } else {
+ xorBlock(inblock, input, lastblock);
+ rijndael_encryptBlock128(cx, output, inblock);
+ }
+
+ /* move to the next block */
+ lastblock = output;
+ output += AES_BLOCK_SIZE;
+ input += AES_BLOCK_SIZE;
+ inputLen -= AES_BLOCK_SIZE;
+ }
+ memcpy(cx->iv, lastblock, AES_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptECB(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRBool aesni = aesni_support();
+ while (inputLen > 0) {
+ if (aesni) {
+ rijndael_native_decryptBlock(cx, output, input);
+ } else {
+ rijndael_decryptBlock128(cx, output, input);
+ }
+ output += AES_BLOCK_SIZE;
+ input += AES_BLOCK_SIZE;
+ inputLen -= AES_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptCBC(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ const unsigned char *in;
+ unsigned char *out;
+ unsigned char newIV[AES_BLOCK_SIZE];
+ PRBool aesni = aesni_support();
+
+ if (!inputLen)
+ return SECSuccess;
+ PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+ in = input + (inputLen - AES_BLOCK_SIZE);
+ memcpy(newIV, in, AES_BLOCK_SIZE);
+ out = output + (inputLen - AES_BLOCK_SIZE);
+ while (inputLen > AES_BLOCK_SIZE) {
+ if (aesni) {
+ // Use hardware acceleration for normal AES parameters.
+ rijndael_native_decryptBlock(cx, out, in);
+ native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
+ } else {
+ rijndael_decryptBlock128(cx, out, in);
+ xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
+ }
+ out -= AES_BLOCK_SIZE;
+ in -= AES_BLOCK_SIZE;
+ inputLen -= AES_BLOCK_SIZE;
+ }
+ if (in == input) {
+ if (aesni) {
+ rijndael_native_decryptBlock(cx, out, in);
+ native_xorBlock(out, out, cx->iv);
+ } else {
+ rijndael_decryptBlock128(cx, out, in);
+ xorBlock(out, out, cx->iv);
+ }
+ }
+ memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+/************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ * The following functions implement the encryption routines defined in
+ * BLAPI for the AES cipher, Rijndael.
+ *
+ ***********************************************************************/
+
+AESContext *
+AES_AllocateContext(void)
+{
+ return PORT_ZNewAligned(AESContext, 16, mem);
+}
+
+/*
+** Initialize a new AES context suitable for AES encryption/decryption in
+** the ECB or CBC mode.
+** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
+*/
+static SECStatus
+aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt)
+{
+ unsigned int Nk;
+ PRBool use_hw_aes;
+ /* According to AES, block lengths are 128 and key lengths are 128, 192, or
+ * 256 bits. We support other key sizes as well [128, 256] as long as the
+ * length in bytes is divisible by 4.
+ */
+
+ if (key == NULL ||
+ keysize < AES_BLOCK_SIZE ||
+ keysize > 32 ||
+ keysize % 4 != 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode != NSS_AES && mode != NSS_AES_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_AES_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+#if defined(NSS_X86_OR_X64) || defined(USE_HW_AES)
+ use_hw_aes = (aesni_support() || arm_aes_support()) && (keysize % 8) == 0;
+#else
+ use_hw_aes = PR_FALSE;
+#endif
+ /* Nb = (block size in bits) / 32 */
+ cx->Nb = AES_BLOCK_SIZE / 4;
+ /* Nk = (key size in bits) / 32 */
+ Nk = keysize / 4;
+ /* Obtain number of rounds from "table" */
+ cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
+ /* copy in the iv, if neccessary */
+ if (mode == NSS_AES_CBC) {
+ memcpy(cx->iv, iv, AES_BLOCK_SIZE);
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ cx->worker = (freeblCipherFunc)
+ native_aes_cbc_worker(encrypt, keysize);
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)(encrypt
+ ? &rijndael_encryptCBC
+ : &rijndael_decryptCBC);
+ }
+ } else {
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ cx->worker = (freeblCipherFunc)
+ native_aes_ecb_worker(encrypt, keysize);
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)(encrypt
+ ? &rijndael_encryptECB
+ : &rijndael_decryptECB);
+ }
+ }
+ PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
+ if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ native_aes_init(encrypt, keysize);
+ } else
+#endif
+ {
+ /* Generate expanded key */
+ if (encrypt) {
+ if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||
+ cx->mode == NSS_AES_CTR)) {
+ PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
+ /* Prepare hardware key for normal AES parameters. */
+ rijndael_native_key_expansion(cx, key, Nk);
+ } else {
+ rijndael_key_expansion(cx, key, Nk);
+ }
+ } else {
+ rijndael_invkey_expansion(cx, key, Nk);
+ }
+ BLAPI_CLEAR_STACK(256)
+ }
+ cx->worker_cx = cx;
+ cx->destroy = NULL;
+ cx->isBlock = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int blocksize)
+{
+ int basemode = mode;
+ PRBool baseencrypt = encrypt;
+ SECStatus rv;
+
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (mode) {
+ case NSS_AES_CTS:
+ basemode = NSS_AES_CBC;
+ break;
+ case NSS_AES_GCM:
+ case NSS_AES_CTR:
+ basemode = NSS_AES;
+ baseencrypt = PR_TRUE;
+ break;
+ }
+ /* Make sure enough is initialized so we can safely call Destroy. */
+ cx->worker_cx = NULL;
+ cx->destroy = NULL;
+ cx->mode = mode;
+ rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt);
+ if (rv != SECSuccess) {
+ AES_DestroyContext(cx, PR_FALSE);
+ return rv;
+ }
+
+ /* finally, set up any mode specific contexts */
+ cx->worker_aead = 0;
+ switch (mode) {
+ case NSS_AES_CTS:
+ cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv);
+ cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
+ cx->destroy = (freeblDestroyFunc)CTS_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ break;
+ case NSS_AES_GCM:
+#if defined(INTEL_GCM) && defined(USE_HW_AES)
+ if (aesni_support() && (keysize % 8) == 0 && avx_support() &&
+ clmul_support()) {
+ cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv);
+ cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate
+ : intel_AES_GCM_DecryptUpdate);
+ cx->worker_aead = (freeblAeadFunc)(encrypt ? intel_AES_GCM_EncryptAEAD
+ : intel_AES_GCM_DecryptAEAD);
+ cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ } else
+#elif defined(USE_PPC_CRYPTO) && defined(PPC_GCM)
+ if (ppc_crypto_support() && (keysize % 8) == 0) {
+ cx->worker_cx = ppc_AES_GCM_CreateContext(cx, cx->worker, iv);
+ cx->worker = (freeblCipherFunc)(encrypt ? ppc_AES_GCM_EncryptUpdate
+ : ppc_AES_GCM_DecryptUpdate);
+ cx->worker_aead = (freeblAeadFunc)(encrypt ? ppc_AES_GCM_EncryptAEAD
+ : ppc_AES_GCM_DecryptAEAD);
+ cx->destroy = (freeblDestroyFunc)ppc_AES_GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ } else
+#endif
+ {
+ cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);
+ cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate
+ : GCM_DecryptUpdate);
+ cx->worker_aead = (freeblAeadFunc)(encrypt ? GCM_EncryptAEAD
+ : GCM_DecryptAEAD);
+
+ cx->destroy = (freeblDestroyFunc)GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ }
+ break;
+ case NSS_AES_CTR:
+ cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv);
+#if defined(USE_HW_AES) && defined(_MSC_VER) && defined(NSS_X86_OR_X64)
+ if (aesni_support() && (keysize % 8) == 0) {
+ cx->worker = (freeblCipherFunc)CTR_Update_HW_AES;
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)CTR_Update;
+ }
+ cx->destroy = (freeblDestroyFunc)CTR_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ break;
+ default:
+ /* everything has already been set up by aes_InitContext, just
+ * return */
+ return SECSuccess;
+ }
+ /* check to see if we succeeded in getting the worker context */
+ if (cx->worker_cx == NULL) {
+ /* no, just destroy the existing context */
+ cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
+ /* below that this isn't necessary */
+ AES_DestroyContext(cx, PR_FALSE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* AES_CreateContext
+ *
+ * create a new context for Rijndael operations
+ */
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keysize, unsigned int blocksize)
+{
+ AESContext *cx = AES_AllocateContext();
+ if (cx) {
+ SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
+ blocksize);
+ if (rv != SECSuccess) {
+ AES_DestroyContext(cx, PR_TRUE);
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+/*
+ * AES_DestroyContext
+ *
+ * Zero an AES cipher context. If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+ void *mem = cx->mem;
+ if (cx->worker_cx && cx->destroy) {
+ (*cx->destroy)(cx->worker_cx, PR_TRUE);
+ cx->worker_cx = NULL;
+ cx->destroy = NULL;
+ }
+ PORT_Memset(cx, 0, sizeof(AESContext));
+ if (freeit) {
+ PORT_Free(mem);
+ } else {
+ /* if we are not freeing the context, restore mem, We may get called
+ * again to actually free the context */
+ cx->mem = mem;
+ }
+}
+
+/*
+ * AES_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ /* Check args */
+ SECStatus rv;
+ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+#if UINT_MAX > MP_32BIT_MAX
+ /*
+ * we can guarentee that GSM won't overlfow if we limit the input to
+ * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
+ *
+ * We do it here to cover both hardware and software GCM operations.
+ */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
+ }
+ if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+#else
+ /* if we can't pass in a 32_bit number, then no such check needed */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
+ }
+#endif
+
+ rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+ input, inputLen, AES_BLOCK_SIZE);
+ BLAPI_CLEAR_STACK(256)
+ return rv;
+}
+
+/*
+ * AES_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ SECStatus rv;
+ /* Check args */
+ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if ((cx->mode != NSS_AES_GCM) && (maxOutputLen < inputLen)) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+ rv = (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+ input, inputLen, AES_BLOCK_SIZE);
+ BLAPI_CLEAR_STACK(256)
+ return rv;
+}
+
+/*
+ * AES_Encrypt_AEAD
+ *
+ * Encrypt using GCM or CCM. include the nonce, extra data, and the tag
+ */
+SECStatus
+AES_AEAD(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ void *params, unsigned int paramsLen,
+ const unsigned char *aad, unsigned int aadLen)
+{
+ SECStatus rv;
+ /* Check args */
+ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0) || (aad == NULL && aadLen != 0) || params == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (cx->worker_aead == NULL) {
+ PORT_SetError(SEC_ERROR_NOT_INITIALIZED);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+#if UINT_MAX > MP_32BIT_MAX
+ /*
+ * we can guarentee that GSM won't overlfow if we limit the input to
+ * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
+ *
+ * We do it here to cover both hardware and software GCM operations.
+ */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
+ }
+ if (inputLen > MP_32BIT_MAX) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+#else
+ /* if we can't pass in a 32_bit number, then no such check needed */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
+ }
+#endif
+
+ rv = (*cx->worker_aead)(cx->worker_cx, output, outputLen, maxOutputLen,
+ input, inputLen, params, paramsLen, aad, aadLen,
+ AES_BLOCK_SIZE);
+ BLAPI_CLEAR_STACK(256)
+ return rv;
+}
diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h
new file mode 100644
index 0000000000..6a69a38199
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.h
@@ -0,0 +1,80 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _RIJNDAEL_H_
+#define _RIJNDAEL_H_ 1
+
+#include "blapii.h"
+#include <stdint.h>
+
+#if defined(NSS_X86_OR_X64)
+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#undef NSS_DISABLE_SSE2
+#define NSS_DISABLE_SSE2 1
+#endif /* GCC <= 4.8 */
+
+#include <emmintrin.h> /* __m128i */
+
+#ifdef NSS_DISABLE_SSE2
+#undef NSS_DISABLE_SSE2
+#pragma GCC pop_options
+#endif /* NSS_DISABLE_SSE2 */
+#endif
+
+/* RIJNDAEL_NUM_ROUNDS
+ *
+ * Number of rounds per execution
+ * Nk - number of key bytes
+ * Nb - blocksize (in bytes)
+ */
+#define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \
+ (PR_MAX(Nk, Nb) + 6)
+
+/*
+ * This magic number is (Nb_max * (Nr_max + 1))
+ * where Nb_max is the maximum block size in 32-bit words,
+ * Nr_max is the maximum number of rounds, which is Nb_max + 6
+ */
+#define RIJNDAEL_MAX_EXP_KEY_SIZE (4 * 15)
+
+/* AESContextStr
+ *
+ * Values which maintain the state for Rijndael encryption/decryption.
+ *
+ * keySchedule - 128-bit registers for the key-schedule
+ * iv - initialization vector for CBC mode
+ * Nb - the number of bytes in a block, specified by user
+ * Nr - the number of rounds, specified by a table
+ * expandedKey - the round keys in 4-byte words, the length is Nr * Nb
+ * worker - the encryption/decryption function to use with worker_cx
+ * destroy - if not NULL, the destroy function to use with worker_cx
+ * worker_cx - the context for worker and destroy
+ * isBlock - is the mode of operation a block cipher or a stream cipher?
+ */
+struct AESContextStr {
+ /* NOTE: Offsets to members in this struct are hardcoded in assembly.
+ * Don't change the struct without updating intel-aes.s and intel-gcm.s. */
+ union {
+#if defined(NSS_X86_OR_X64)
+ __m128i keySchedule[15];
+#endif
+ PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE];
+ } k;
+ unsigned int Nb;
+ unsigned int Nr;
+ freeblCipherFunc worker;
+ unsigned char iv[AES_BLOCK_SIZE];
+ freeblAeadFunc worker_aead;
+ freeblDestroyFunc destroy;
+ void *worker_cx;
+ PRBool isBlock;
+ int mode;
+ void *mem; /* Start of the allocated memory to free. */
+};
+
+#endif /* _RIJNDAEL_H_ */
diff --git a/security/nss/lib/freebl/rijndael32.tab b/security/nss/lib/freebl/rijndael32.tab
new file mode 100644
index 0000000000..59be7c2c09
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael32.tab
@@ -0,0 +1,1219 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RIJNDAEL_INCLUDE_TABLES
+static const PRUint8 _S[256] =
+{
+ 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118,
+202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21,
+ 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117,
+ 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132,
+ 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207,
+208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168,
+ 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210,
+205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115,
+ 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219,
+224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121,
+231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8,
+186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138,
+112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158,
+225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223,
+140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22
+};
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+static const PRUint8 _SInv[256] =
+{
+ 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251,
+124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203,
+ 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78,
+ 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37,
+114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
+108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132,
+144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6,
+208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107,
+ 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110,
+ 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27,
+252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244,
+ 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95,
+ 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239,
+160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97,
+ 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125
+};
+
+#ifdef RIJNDAEL_INCLUDE_TABLES
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T0[256] =
+{
+0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
+0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
+0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
+0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
+0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
+0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
+0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
+0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
+0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
+0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
+0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
+0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
+0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
+0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
+0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
+0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
+0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
+0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
+0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
+0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
+0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
+0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
+};
+#else
+static const PRUint32 _T0[256] =
+{
+0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd,
+0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d,
+0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7,
+0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4,
+0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1,
+0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e,
+0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e,
+0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46,
+0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7,
+0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe,
+0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a,
+0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2,
+0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e,
+0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256,
+0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4,
+0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa,
+0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1,
+0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42,
+0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158,
+0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22,
+0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631,
+0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T1[256] =
+{
+0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd,
+0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d,
+0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d,
+0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7,
+0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a,
+0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4,
+0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1,
+0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d,
+0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e,
+0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e,
+0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c,
+0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46,
+0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7,
+0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81,
+0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe,
+0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a,
+0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f,
+0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2,
+0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e,
+0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c,
+0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456,
+0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4,
+0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7,
+0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa,
+0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1,
+0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21,
+0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42,
+0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958,
+0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233,
+0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22,
+0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731,
+0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11,
+0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a
+};
+#else
+static const PRUint32 _T1[256] =
+{
+0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b,
+0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
+0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282,
+0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
+0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4,
+0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
+0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5,
+0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
+0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696,
+0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
+0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383,
+0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
+0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3,
+0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
+0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb,
+0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
+0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d,
+0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
+0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3,
+0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
+0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff,
+0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
+0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7,
+0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
+0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a,
+0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
+0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232,
+0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
+0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595,
+0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
+0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656,
+0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
+0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6,
+0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
+0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e,
+0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
+0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1,
+0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
+0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e,
+0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
+0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6,
+0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
+0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T2[256] =
+{
+0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b,
+0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
+0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82,
+0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4,
+0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
+0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5,
+0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196,
+0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
+0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83,
+0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3,
+0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
+0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb,
+0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d,
+0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
+0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3,
+0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff,
+0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
+0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7,
+0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a,
+0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
+0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632,
+0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495,
+0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
+0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56,
+0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6,
+0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
+0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e,
+0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1,
+0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
+0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e,
+0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6,
+0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
+0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16
+};
+#else
+static const PRUint32 _T2[256] =
+{
+0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b,
+0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
+0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82,
+0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
+0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4,
+0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
+0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5,
+0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
+0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796,
+0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
+0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83,
+0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
+0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3,
+0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
+0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb,
+0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
+0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d,
+0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
+0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3,
+0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
+0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff,
+0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
+0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7,
+0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
+0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a,
+0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
+0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432,
+0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
+0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195,
+0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
+0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56,
+0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
+0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6,
+0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
+0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e,
+0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
+0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1,
+0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
+0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e,
+0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
+0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6,
+0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
+0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T3[256] =
+{
+0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b,
+0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b,
+0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282,
+0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4,
+0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626,
+0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5,
+0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696,
+0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2,
+0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383,
+0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3,
+0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded,
+0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb,
+0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d,
+0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f,
+0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3,
+0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff,
+0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec,
+0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7,
+0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a,
+0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414,
+0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232,
+0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595,
+0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d,
+0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656,
+0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6,
+0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f,
+0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e,
+0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1,
+0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111,
+0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e,
+0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6,
+0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f,
+0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616
+};
+#else
+static const PRUint32 _T3[256] =
+{
+0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6,
+0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
+0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f,
+0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
+0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753,
+0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
+0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451,
+0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
+0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137,
+0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
+0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d,
+0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
+0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd,
+0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
+0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d,
+0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
+0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a,
+0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
+0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d,
+0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
+0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5,
+0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
+0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255,
+0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
+0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54,
+0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
+0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664,
+0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
+0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431,
+0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
+0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac,
+0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
+0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157,
+0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
+0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c,
+0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
+0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899,
+0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
+0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c,
+0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
+0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7,
+0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
+0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv0[256] =
+{
+0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
+0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
+0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
+0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
+0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
+0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
+0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
+0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
+0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
+0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
+0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
+0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
+0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
+0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
+0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
+0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
+0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
+0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
+0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
+0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
+0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
+0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0
+};
+#else
+static const PRUint32 _TInv0[256] =
+{
+0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1,
+0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67,
+0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3,
+0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182,
+0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2,
+0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492,
+0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa,
+0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997,
+0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48,
+0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f,
+0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad,
+0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc,
+0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3,
+0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1,
+0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8,
+0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4,
+0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331,
+0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d,
+0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252,
+0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f,
+0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c,
+0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv1[256] =
+{
+0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1,
+0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525,
+0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567,
+0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
+0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3,
+0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd,
+0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582,
+0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
+0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2,
+0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5,
+0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792,
+0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
+0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa,
+0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46,
+0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697,
+0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
+0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248,
+0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627,
+0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f,
+0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
+0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad,
+0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd,
+0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc,
+0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
+0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3,
+0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722,
+0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1,
+0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
+0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8,
+0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3,
+0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4,
+0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
+0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31,
+0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315,
+0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d,
+0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
+0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252,
+0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89,
+0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f,
+0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
+0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c,
+0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490,
+0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042
+};
+#else
+static const PRUint32 _TInv1[256] =
+{
+0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45,
+0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
+0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b,
+0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
+0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421,
+0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
+0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31,
+0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
+0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02,
+0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
+0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4,
+0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
+0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef,
+0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
+0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9,
+0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
+0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed,
+0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
+0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7,
+0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
+0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7,
+0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
+0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6,
+0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
+0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230,
+0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
+0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1,
+0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
+0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8,
+0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
+0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918,
+0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
+0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23,
+0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
+0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0,
+0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
+0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2,
+0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
+0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273,
+0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
+0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225,
+0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
+0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv2[256] =
+{
+0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145,
+0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c,
+0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b,
+0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
+0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321,
+0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71,
+0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231,
+0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
+0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202,
+0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508,
+0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4,
+0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
+0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef,
+0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd,
+0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9,
+0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
+0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed,
+0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739,
+0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7,
+0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
+0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7,
+0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60,
+0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6,
+0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
+0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330,
+0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264,
+0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1,
+0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
+0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8,
+0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312,
+0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418,
+0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
+0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123,
+0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8,
+0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0,
+0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
+0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2,
+0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c,
+0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73,
+0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
+0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25,
+0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1,
+0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257
+};
+#else
+static const PRUint32 _TInv2[256] =
+{
+0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d,
+0x58abacfa, 0x03934be3, 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
+0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, 0x5a49deb1, 0x1b6725ba,
+0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
+0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874,
+0x692949e0, 0xc8448ec9, 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
+0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, 0x4a1863df, 0x3182e51a,
+0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
+0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b,
+0x8f57e31f, 0xab2a6655, 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
+0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, 0x1c2b8acf, 0xb492a779,
+0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
+0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060,
+0x9f065e71, 0x1051bd6e, 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
+0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, 0xfb241998, 0xe997d6bd,
+0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
+0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b,
+0x70ac1e11, 0x724e6c5a, 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
+0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, 0x67b10c0a, 0xe70f9357,
+0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
+0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b,
+0xa8b92db6, 0xa9c8141e, 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
+0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, 0x29768b43, 0xc6dccb23,
+0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
+0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2,
+0x52ec0d86, 0xe3d077c1, 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
+0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, 0x4ec78749, 0xd1c1d938,
+0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
+0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8,
+0xf75e2e39, 0xaff582c3, 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
+0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, 0x7809cd26, 0x18f46e59,
+0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
+0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f,
+0x9430c6a5, 0x66c035a2, 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
+0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, 0xd68d764d, 0xb04d43ef,
+0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
+0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db,
+0x5633e910, 0x47136dd6, 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
+0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, 0xdf599cd2, 0x733f55f2,
+0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
+0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2,
+0x498b283c, 0x9541ff0d, 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
+0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv3[256] =
+{
+0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d,
+0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02,
+0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba,
+0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
+0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174,
+0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9,
+0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a,
+0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
+0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b,
+0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837,
+0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479,
+0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
+0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60,
+0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6,
+0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd,
+0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
+0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b,
+0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d,
+0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757,
+0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
+0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b,
+0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f,
+0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623,
+0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
+0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2,
+0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9,
+0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138,
+0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
+0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8,
+0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225,
+0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859,
+0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
+0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f,
+0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7,
+0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef,
+0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
+0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db,
+0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13,
+0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2,
+0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
+0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2,
+0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156,
+0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8
+};
+#else
+static const PRUint32 _TInv3[256] =
+{
+0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f,
+0xfa58abac, 0xe303934b, 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
+0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, 0xb15a49de, 0xba1b6725,
+0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
+0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358,
+0xe0692949, 0xc9c8448e, 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
+0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, 0xdf4a1863, 0x1a3182e5,
+0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
+0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272,
+0x1f8f57e3, 0x55ab2a66, 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
+0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, 0xcf1c2b8a, 0x79b492a7,
+0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
+0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40,
+0x719f065e, 0x6e1051bd, 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
+0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, 0x98fb2419, 0xbde997d6,
+0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
+0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832,
+0x1170ac1e, 0x5a724e6c, 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
+0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, 0x0a67b10c, 0x57e70f93,
+0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
+0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2,
+0xb6a8b92d, 0x1ea9c814, 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
+0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, 0x4329768b, 0x23c6dccb,
+0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
+0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc,
+0x8652ec0d, 0xc1e3d077, 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
+0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, 0x494ec787, 0x38d1c1d9,
+0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
+0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890,
+0x39f75e2e, 0xc3aff582, 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
+0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, 0x267809cd, 0x5918f46e,
+0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
+0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a,
+0xa59430c6, 0xa266c035, 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
+0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, 0x4dd68d76, 0xefb04d43,
+0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
+0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292,
+0x105633e9, 0xd647136d, 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
+0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, 0xd2df599c, 0xf2733f55,
+0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
+0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc,
+0x3c498b28, 0x0d9541ff, 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
+0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC0[256] =
+{
+0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36,
+0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 0xb0d090e0, 0xbbdd99ee,
+0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6,
+0xd296eeb4, 0xd99be7ba, 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5,
+0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d,
+0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 0xf66d76ad, 0xfd607fa3,
+0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb,
+0x942b08f9, 0x9f2601f7, 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733,
+0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040,
+0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 0x3d06dd96, 0x360bd498,
+0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0,
+0x5f40a3c2, 0x544daacc, 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f,
+0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197,
+0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 0x8c61d79a, 0x876cde94,
+0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc,
+0xee27a9ce, 0xe52aa0c0, 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604,
+0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da,
+0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 0xb1670a0c, 0xba6a0302,
+0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a,
+0xd3217458, 0xd82c7d56, 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049,
+0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1,
+0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d
+};
+#else
+static const PRUint32 _IMXC0[256] =
+{
+0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927,
+0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45,
+0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb,
+0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381,
+0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf,
+0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66,
+0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28,
+0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012,
+0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec,
+0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e,
+0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd,
+0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7,
+0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89,
+0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b,
+0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815,
+0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f,
+0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa,
+0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8,
+0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36,
+0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c,
+0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742,
+0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea,
+0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4,
+0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e,
+0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360,
+0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502,
+0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87,
+0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd,
+0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3,
+0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621,
+0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f,
+0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55,
+0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26,
+0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844,
+0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba,
+0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480,
+0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce,
+0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67,
+0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929,
+0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713,
+0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed,
+0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f,
+0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC1[256] =
+{
+0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d, 0x3424382c, 0x392d3627,
+0x2e36243a, 0x233f2a31, 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245,
+0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69, 0xd090e0b0, 0xdd99eebb,
+0xca82fca6, 0xc78bf2ad, 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81,
+0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5, 0x8cfca8c4, 0x81f5a6cf,
+0x96eeb4d2, 0x9be7bad9, 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966,
+0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a, 0xd373ab23, 0xde7aa528,
+0xc961b735, 0xc468b93e, 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112,
+0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6, 0x5f8f03e7, 0x52860dec,
+0x459d1ff1, 0x489411fa, 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e,
+0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2, 0x6d76adf6, 0x607fa3fd,
+0x7764b1e0, 0x7a6dbfeb, 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7,
+0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3, 0x311ae582, 0x3c13eb89,
+0x2b08f994, 0x2601f79f, 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b,
+0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777, 0xd5ae3d1e, 0xd8a73315,
+0xcfbc2108, 0xc2b52f03, 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f,
+0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490, 0xe2694ea1, 0xef6040aa,
+0xf87b52b7, 0xf5725cbc, 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8,
+0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4, 0x06dd963d, 0x0bd49836,
+0x1ccf8a2b, 0x11c68420, 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c,
+0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478, 0x5ab1de49, 0x57b8d042,
+0x40a3c25f, 0x4daacc54, 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea,
+0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6, 0xb2a431af, 0xbfad3fa4,
+0xa8b62db9, 0xa5bf23b2, 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e,
+0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a, 0x3e58996b, 0x33519760,
+0x244a857d, 0x29438b76, 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302,
+0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e, 0x61d79a8c, 0x6cde9487,
+0x7bc5869a, 0x76cc8891, 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd,
+0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9, 0x3dbbd2f8, 0x30b2dcf3,
+0x27a9ceee, 0x2aa0c0e5, 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821,
+0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d, 0xd90f0a64, 0xd406046f,
+0xc31d1672, 0xce141879, 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055,
+0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c, 0x83bed42d, 0x8eb7da26,
+0x99acc83b, 0x94a5c630, 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44,
+0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668, 0x670a0cb1, 0x6a0302ba,
+0x7d1810a7, 0x70111eac, 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680,
+0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4, 0x3b6644c5, 0x366f4ace,
+0x217458d3, 0x2c7d56d8, 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567,
+0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b, 0x64e94722, 0x69e04929,
+0x7efb5b34, 0x73f2553f, 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13,
+0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7, 0xe815efe6, 0xe51ce1ed,
+0xf207f3f0, 0xff0efdfb, 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f,
+0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3
+};
+#else
+static const PRUint32 _IMXC1[256] =
+{
+0x00000000, 0x0b0e090d, 0x161c121a, 0x1d121b17, 0x2c382434, 0x27362d39,
+0x3a24362e, 0x312a3f23, 0x58704868, 0x537e4165, 0x4e6c5a72, 0x4562537f,
+0x74486c5c, 0x7f466551, 0x62547e46, 0x695a774b, 0xb0e090d0, 0xbbee99dd,
+0xa6fc82ca, 0xadf28bc7, 0x9cd8b4e4, 0x97d6bde9, 0x8ac4a6fe, 0x81caaff3,
+0xe890d8b8, 0xe39ed1b5, 0xfe8ccaa2, 0xf582c3af, 0xc4a8fc8c, 0xcfa6f581,
+0xd2b4ee96, 0xd9bae79b, 0x7bdb3bbb, 0x70d532b6, 0x6dc729a1, 0x66c920ac,
+0x57e31f8f, 0x5ced1682, 0x41ff0d95, 0x4af10498, 0x23ab73d3, 0x28a57ade,
+0x35b761c9, 0x3eb968c4, 0x0f9357e7, 0x049d5eea, 0x198f45fd, 0x12814cf0,
+0xcb3bab6b, 0xc035a266, 0xdd27b971, 0xd629b07c, 0xe7038f5f, 0xec0d8652,
+0xf11f9d45, 0xfa119448, 0x934be303, 0x9845ea0e, 0x8557f119, 0x8e59f814,
+0xbf73c737, 0xb47dce3a, 0xa96fd52d, 0xa261dc20, 0xf6ad766d, 0xfda37f60,
+0xe0b16477, 0xebbf6d7a, 0xda955259, 0xd19b5b54, 0xcc894043, 0xc787494e,
+0xaedd3e05, 0xa5d33708, 0xb8c12c1f, 0xb3cf2512, 0x82e51a31, 0x89eb133c,
+0x94f9082b, 0x9ff70126, 0x464de6bd, 0x4d43efb0, 0x5051f4a7, 0x5b5ffdaa,
+0x6a75c289, 0x617bcb84, 0x7c69d093, 0x7767d99e, 0x1e3daed5, 0x1533a7d8,
+0x0821bccf, 0x032fb5c2, 0x32058ae1, 0x390b83ec, 0x241998fb, 0x2f1791f6,
+0x8d764dd6, 0x867844db, 0x9b6a5fcc, 0x906456c1, 0xa14e69e2, 0xaa4060ef,
+0xb7527bf8, 0xbc5c72f5, 0xd50605be, 0xde080cb3, 0xc31a17a4, 0xc8141ea9,
+0xf93e218a, 0xf2302887, 0xef223390, 0xe42c3a9d, 0x3d96dd06, 0x3698d40b,
+0x2b8acf1c, 0x2084c611, 0x11aef932, 0x1aa0f03f, 0x07b2eb28, 0x0cbce225,
+0x65e6956e, 0x6ee89c63, 0x73fa8774, 0x78f48e79, 0x49deb15a, 0x42d0b857,
+0x5fc2a340, 0x54ccaa4d, 0xf741ecda, 0xfc4fe5d7, 0xe15dfec0, 0xea53f7cd,
+0xdb79c8ee, 0xd077c1e3, 0xcd65daf4, 0xc66bd3f9, 0xaf31a4b2, 0xa43fadbf,
+0xb92db6a8, 0xb223bfa5, 0x83098086, 0x8807898b, 0x9515929c, 0x9e1b9b91,
+0x47a17c0a, 0x4caf7507, 0x51bd6e10, 0x5ab3671d, 0x6b99583e, 0x60975133,
+0x7d854a24, 0x768b4329, 0x1fd13462, 0x14df3d6f, 0x09cd2678, 0x02c32f75,
+0x33e91056, 0x38e7195b, 0x25f5024c, 0x2efb0b41, 0x8c9ad761, 0x8794de6c,
+0x9a86c57b, 0x9188cc76, 0xa0a2f355, 0xabacfa58, 0xb6bee14f, 0xbdb0e842,
+0xd4ea9f09, 0xdfe49604, 0xc2f68d13, 0xc9f8841e, 0xf8d2bb3d, 0xf3dcb230,
+0xeecea927, 0xe5c0a02a, 0x3c7a47b1, 0x37744ebc, 0x2a6655ab, 0x21685ca6,
+0x10426385, 0x1b4c6a88, 0x065e719f, 0x0d507892, 0x640a0fd9, 0x6f0406d4,
+0x72161dc3, 0x791814ce, 0x48322bed, 0x433c22e0, 0x5e2e39f7, 0x552030fa,
+0x01ec9ab7, 0x0ae293ba, 0x17f088ad, 0x1cfe81a0, 0x2dd4be83, 0x26dab78e,
+0x3bc8ac99, 0x30c6a594, 0x599cd2df, 0x5292dbd2, 0x4f80c0c5, 0x448ec9c8,
+0x75a4f6eb, 0x7eaaffe6, 0x63b8e4f1, 0x68b6edfc, 0xb10c0a67, 0xba02036a,
+0xa710187d, 0xac1e1170, 0x9d342e53, 0x963a275e, 0x8b283c49, 0x80263544,
+0xe97c420f, 0xe2724b02, 0xff605015, 0xf46e5918, 0xc544663b, 0xce4a6f36,
+0xd3587421, 0xd8567d2c, 0x7a37a10c, 0x7139a801, 0x6c2bb316, 0x6725ba1b,
+0x560f8538, 0x5d018c35, 0x40139722, 0x4b1d9e2f, 0x2247e964, 0x2949e069,
+0x345bfb7e, 0x3f55f273, 0x0e7fcd50, 0x0571c45d, 0x1863df4a, 0x136dd647,
+0xcad731dc, 0xc1d938d1, 0xdccb23c6, 0xd7c52acb, 0xe6ef15e8, 0xede11ce5,
+0xf0f307f2, 0xfbfd0eff, 0x92a779b4, 0x99a970b9, 0x84bb6bae, 0x8fb562a3,
+0xbe9f5d80, 0xb591548d, 0xa8834f9a, 0xa38d4697
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC2[256] =
+{
+0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17, 0x24382c34, 0x2d362739,
+0x36243a2e, 0x3f2a3123, 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f,
+0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b, 0x90e0b0d0, 0x99eebbdd,
+0x82fca6ca, 0x8bf2adc7, 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3,
+0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af, 0xfca8c48c, 0xf5a6cf81,
+0xeeb4d296, 0xe7bad99b, 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac,
+0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98, 0x73ab23d3, 0x7aa528de,
+0x61b735c9, 0x68b93ec4, 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0,
+0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c, 0x8f03e75f, 0x860dec52,
+0x9d1ff145, 0x9411fa48, 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14,
+0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220, 0x76adf66d, 0x7fa3fd60,
+0x64b1e077, 0x6dbfeb7a, 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e,
+0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312, 0x1ae58231, 0x13eb893c,
+0x08f9942b, 0x01f79f26, 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa,
+0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e, 0xae3d1ed5, 0xa73315d8,
+0xbc2108cf, 0xb52f03c2, 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6,
+0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1, 0x694ea1e2, 0x6040aaef,
+0x7b52b7f8, 0x725cbcf5, 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9,
+0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d, 0xdd963d06, 0xd498360b,
+0xcf8a2b1c, 0xc6842011, 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25,
+0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879, 0xb1de495a, 0xb8d04257,
+0xa3c25f40, 0xaacc544d, 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd,
+0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9, 0xa431afb2, 0xad3fa4bf,
+0xb62db9a8, 0xbf23b2a5, 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91,
+0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d, 0x58996b3e, 0x51976033,
+0x4a857d24, 0x438b7629, 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275,
+0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41, 0xd79a8c61, 0xde94876c,
+0xc5869a7b, 0xcc889176, 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42,
+0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e, 0xbbd2f83d, 0xb2dcf330,
+0xa9ceee27, 0xa0c0e52a, 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6,
+0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92, 0x0f0a64d9, 0x06046fd4,
+0x1d1672c3, 0x141879ce, 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa,
+0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0, 0xbed42d83, 0xb7da268e,
+0xacc83b99, 0xa5c63094, 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8,
+0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc, 0x0a0cb167, 0x0302ba6a,
+0x1810a77d, 0x111eac70, 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044,
+0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418, 0x6644c53b, 0x6f4ace36,
+0x7458d321, 0x7d56d82c, 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b,
+0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f, 0xe9472264, 0xe0492969,
+0xfb5b347e, 0xf2553f73, 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347,
+0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb, 0x15efe6e8, 0x1ce1ede5,
+0x07f3f0f2, 0x0efdfbff, 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3,
+0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397
+};
+#else
+static const PRUint32 _IMXC2[256] =
+{
+0x00000000, 0x0d0b0e09, 0x1a161c12, 0x171d121b, 0x342c3824, 0x3927362d,
+0x2e3a2436, 0x23312a3f, 0x68587048, 0x65537e41, 0x724e6c5a, 0x7f456253,
+0x5c74486c, 0x517f4665, 0x4662547e, 0x4b695a77, 0xd0b0e090, 0xddbbee99,
+0xcaa6fc82, 0xc7adf28b, 0xe49cd8b4, 0xe997d6bd, 0xfe8ac4a6, 0xf381caaf,
+0xb8e890d8, 0xb5e39ed1, 0xa2fe8cca, 0xaff582c3, 0x8cc4a8fc, 0x81cfa6f5,
+0x96d2b4ee, 0x9bd9bae7, 0xbb7bdb3b, 0xb670d532, 0xa16dc729, 0xac66c920,
+0x8f57e31f, 0x825ced16, 0x9541ff0d, 0x984af104, 0xd323ab73, 0xde28a57a,
+0xc935b761, 0xc43eb968, 0xe70f9357, 0xea049d5e, 0xfd198f45, 0xf012814c,
+0x6bcb3bab, 0x66c035a2, 0x71dd27b9, 0x7cd629b0, 0x5fe7038f, 0x52ec0d86,
+0x45f11f9d, 0x48fa1194, 0x03934be3, 0x0e9845ea, 0x198557f1, 0x148e59f8,
+0x37bf73c7, 0x3ab47dce, 0x2da96fd5, 0x20a261dc, 0x6df6ad76, 0x60fda37f,
+0x77e0b164, 0x7aebbf6d, 0x59da9552, 0x54d19b5b, 0x43cc8940, 0x4ec78749,
+0x05aedd3e, 0x08a5d337, 0x1fb8c12c, 0x12b3cf25, 0x3182e51a, 0x3c89eb13,
+0x2b94f908, 0x269ff701, 0xbd464de6, 0xb04d43ef, 0xa75051f4, 0xaa5b5ffd,
+0x896a75c2, 0x84617bcb, 0x937c69d0, 0x9e7767d9, 0xd51e3dae, 0xd81533a7,
+0xcf0821bc, 0xc2032fb5, 0xe132058a, 0xec390b83, 0xfb241998, 0xf62f1791,
+0xd68d764d, 0xdb867844, 0xcc9b6a5f, 0xc1906456, 0xe2a14e69, 0xefaa4060,
+0xf8b7527b, 0xf5bc5c72, 0xbed50605, 0xb3de080c, 0xa4c31a17, 0xa9c8141e,
+0x8af93e21, 0x87f23028, 0x90ef2233, 0x9de42c3a, 0x063d96dd, 0x0b3698d4,
+0x1c2b8acf, 0x112084c6, 0x3211aef9, 0x3f1aa0f0, 0x2807b2eb, 0x250cbce2,
+0x6e65e695, 0x636ee89c, 0x7473fa87, 0x7978f48e, 0x5a49deb1, 0x5742d0b8,
+0x405fc2a3, 0x4d54ccaa, 0xdaf741ec, 0xd7fc4fe5, 0xc0e15dfe, 0xcdea53f7,
+0xeedb79c8, 0xe3d077c1, 0xf4cd65da, 0xf9c66bd3, 0xb2af31a4, 0xbfa43fad,
+0xa8b92db6, 0xa5b223bf, 0x86830980, 0x8b880789, 0x9c951592, 0x919e1b9b,
+0x0a47a17c, 0x074caf75, 0x1051bd6e, 0x1d5ab367, 0x3e6b9958, 0x33609751,
+0x247d854a, 0x29768b43, 0x621fd134, 0x6f14df3d, 0x7809cd26, 0x7502c32f,
+0x5633e910, 0x5b38e719, 0x4c25f502, 0x412efb0b, 0x618c9ad7, 0x6c8794de,
+0x7b9a86c5, 0x769188cc, 0x55a0a2f3, 0x58abacfa, 0x4fb6bee1, 0x42bdb0e8,
+0x09d4ea9f, 0x04dfe496, 0x13c2f68d, 0x1ec9f884, 0x3df8d2bb, 0x30f3dcb2,
+0x27eecea9, 0x2ae5c0a0, 0xb13c7a47, 0xbc37744e, 0xab2a6655, 0xa621685c,
+0x85104263, 0x881b4c6a, 0x9f065e71, 0x920d5078, 0xd9640a0f, 0xd46f0406,
+0xc372161d, 0xce791814, 0xed48322b, 0xe0433c22, 0xf75e2e39, 0xfa552030,
+0xb701ec9a, 0xba0ae293, 0xad17f088, 0xa01cfe81, 0x832dd4be, 0x8e26dab7,
+0x993bc8ac, 0x9430c6a5, 0xdf599cd2, 0xd25292db, 0xc54f80c0, 0xc8448ec9,
+0xeb75a4f6, 0xe67eaaff, 0xf163b8e4, 0xfc68b6ed, 0x67b10c0a, 0x6aba0203,
+0x7da71018, 0x70ac1e11, 0x539d342e, 0x5e963a27, 0x498b283c, 0x44802635,
+0x0fe97c42, 0x02e2724b, 0x15ff6050, 0x18f46e59, 0x3bc54466, 0x36ce4a6f,
+0x21d35874, 0x2cd8567d, 0x0c7a37a1, 0x017139a8, 0x166c2bb3, 0x1b6725ba,
+0x38560f85, 0x355d018c, 0x22401397, 0x2f4b1d9e, 0x642247e9, 0x692949e0,
+0x7e345bfb, 0x733f55f2, 0x500e7fcd, 0x5d0571c4, 0x4a1863df, 0x47136dd6,
+0xdccad731, 0xd1c1d938, 0xc6dccb23, 0xcbd7c52a, 0xe8e6ef15, 0xe5ede11c,
+0xf2f0f307, 0xfffbfd0e, 0xb492a779, 0xb999a970, 0xae84bb6b, 0xa38fb562,
+0x80be9f5d, 0x8db59154, 0x9aa8834f, 0x97a38d46
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC3[256] =
+{
+0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b, 0x382c3424, 0x3627392d,
+0x243a2e36, 0x2a31233f, 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53,
+0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77, 0xe0b0d090, 0xeebbdd99,
+0xfca6ca82, 0xf2adc78b, 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af,
+0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3, 0xa8c48cfc, 0xa6cf81f5,
+0xb4d296ee, 0xbad99be7, 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20,
+0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804, 0xab23d373, 0xa528de7a,
+0xb735c961, 0xb93ec468, 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c,
+0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0, 0x03e75f8f, 0x0dec5286,
+0x1ff1459d, 0x11fa4894, 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8,
+0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc, 0xadf66d76, 0xa3fd607f,
+0xb1e07764, 0xbfeb7a6d, 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49,
+0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225, 0xe582311a, 0xeb893c13,
+0xf9942b08, 0xf79f2601, 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd,
+0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9, 0x3d1ed5ae, 0x3315d8a7,
+0x2108cfbc, 0x2f03c2b5, 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691,
+0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156, 0x4ea1e269, 0x40aaef60,
+0x52b7f87b, 0x5cbcf572, 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e,
+0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a, 0x963d06dd, 0x98360bd4,
+0x8a2b1ccf, 0x842011c6, 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2,
+0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e, 0xde495ab1, 0xd04257b8,
+0xc25f40a3, 0xcc544daa, 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7,
+0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3, 0x31afb2a4, 0x3fa4bfad,
+0x2db9a8b6, 0x23b2a5bf, 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b,
+0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67, 0x996b3e58, 0x97603351,
+0x857d244a, 0x8b762943, 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f,
+0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b, 0x9a8c61d7, 0x94876cde,
+0x869a7bc5, 0x889176cc, 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8,
+0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84, 0xd2f83dbb, 0xdcf330b2,
+0xceee27a9, 0xc0e52aa0, 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c,
+0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278, 0x0a64d90f, 0x046fd406,
+0x1672c31d, 0x1879ce14, 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30,
+0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081, 0xd42d83be, 0xda268eb7,
+0xc83b99ac, 0xc63094a5, 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9,
+0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced, 0x0cb1670a, 0x02ba6a03,
+0x10a77d18, 0x1eac7011, 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435,
+0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859, 0x44c53b66, 0x4ace366f,
+0x58d32174, 0x56d82c7d, 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba,
+0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e, 0x472264e9, 0x492969e0,
+0x5b347efb, 0x553f73f2, 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6,
+0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a, 0xefe6e815, 0xe1ede51c,
+0xf3f0f207, 0xfdfbff0e, 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362,
+0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746
+};
+#else
+static const PRUint32 _IMXC3[256] =
+{
+0x00000000, 0x090d0b0e, 0x121a161c, 0x1b171d12, 0x24342c38, 0x2d392736,
+0x362e3a24, 0x3f23312a, 0x48685870, 0x4165537e, 0x5a724e6c, 0x537f4562,
+0x6c5c7448, 0x65517f46, 0x7e466254, 0x774b695a, 0x90d0b0e0, 0x99ddbbee,
+0x82caa6fc, 0x8bc7adf2, 0xb4e49cd8, 0xbde997d6, 0xa6fe8ac4, 0xaff381ca,
+0xd8b8e890, 0xd1b5e39e, 0xcaa2fe8c, 0xc3aff582, 0xfc8cc4a8, 0xf581cfa6,
+0xee96d2b4, 0xe79bd9ba, 0x3bbb7bdb, 0x32b670d5, 0x29a16dc7, 0x20ac66c9,
+0x1f8f57e3, 0x16825ced, 0x0d9541ff, 0x04984af1, 0x73d323ab, 0x7ade28a5,
+0x61c935b7, 0x68c43eb9, 0x57e70f93, 0x5eea049d, 0x45fd198f, 0x4cf01281,
+0xab6bcb3b, 0xa266c035, 0xb971dd27, 0xb07cd629, 0x8f5fe703, 0x8652ec0d,
+0x9d45f11f, 0x9448fa11, 0xe303934b, 0xea0e9845, 0xf1198557, 0xf8148e59,
+0xc737bf73, 0xce3ab47d, 0xd52da96f, 0xdc20a261, 0x766df6ad, 0x7f60fda3,
+0x6477e0b1, 0x6d7aebbf, 0x5259da95, 0x5b54d19b, 0x4043cc89, 0x494ec787,
+0x3e05aedd, 0x3708a5d3, 0x2c1fb8c1, 0x2512b3cf, 0x1a3182e5, 0x133c89eb,
+0x082b94f9, 0x01269ff7, 0xe6bd464d, 0xefb04d43, 0xf4a75051, 0xfdaa5b5f,
+0xc2896a75, 0xcb84617b, 0xd0937c69, 0xd99e7767, 0xaed51e3d, 0xa7d81533,
+0xbccf0821, 0xb5c2032f, 0x8ae13205, 0x83ec390b, 0x98fb2419, 0x91f62f17,
+0x4dd68d76, 0x44db8678, 0x5fcc9b6a, 0x56c19064, 0x69e2a14e, 0x60efaa40,
+0x7bf8b752, 0x72f5bc5c, 0x05bed506, 0x0cb3de08, 0x17a4c31a, 0x1ea9c814,
+0x218af93e, 0x2887f230, 0x3390ef22, 0x3a9de42c, 0xdd063d96, 0xd40b3698,
+0xcf1c2b8a, 0xc6112084, 0xf93211ae, 0xf03f1aa0, 0xeb2807b2, 0xe2250cbc,
+0x956e65e6, 0x9c636ee8, 0x877473fa, 0x8e7978f4, 0xb15a49de, 0xb85742d0,
+0xa3405fc2, 0xaa4d54cc, 0xecdaf741, 0xe5d7fc4f, 0xfec0e15d, 0xf7cdea53,
+0xc8eedb79, 0xc1e3d077, 0xdaf4cd65, 0xd3f9c66b, 0xa4b2af31, 0xadbfa43f,
+0xb6a8b92d, 0xbfa5b223, 0x80868309, 0x898b8807, 0x929c9515, 0x9b919e1b,
+0x7c0a47a1, 0x75074caf, 0x6e1051bd, 0x671d5ab3, 0x583e6b99, 0x51336097,
+0x4a247d85, 0x4329768b, 0x34621fd1, 0x3d6f14df, 0x267809cd, 0x2f7502c3,
+0x105633e9, 0x195b38e7, 0x024c25f5, 0x0b412efb, 0xd7618c9a, 0xde6c8794,
+0xc57b9a86, 0xcc769188, 0xf355a0a2, 0xfa58abac, 0xe14fb6be, 0xe842bdb0,
+0x9f09d4ea, 0x9604dfe4, 0x8d13c2f6, 0x841ec9f8, 0xbb3df8d2, 0xb230f3dc,
+0xa927eece, 0xa02ae5c0, 0x47b13c7a, 0x4ebc3774, 0x55ab2a66, 0x5ca62168,
+0x63851042, 0x6a881b4c, 0x719f065e, 0x78920d50, 0x0fd9640a, 0x06d46f04,
+0x1dc37216, 0x14ce7918, 0x2bed4832, 0x22e0433c, 0x39f75e2e, 0x30fa5520,
+0x9ab701ec, 0x93ba0ae2, 0x88ad17f0, 0x81a01cfe, 0xbe832dd4, 0xb78e26da,
+0xac993bc8, 0xa59430c6, 0xd2df599c, 0xdbd25292, 0xc0c54f80, 0xc9c8448e,
+0xf6eb75a4, 0xffe67eaa, 0xe4f163b8, 0xedfc68b6, 0x0a67b10c, 0x036aba02,
+0x187da710, 0x1170ac1e, 0x2e539d34, 0x275e963a, 0x3c498b28, 0x35448026,
+0x420fe97c, 0x4b02e272, 0x5015ff60, 0x5918f46e, 0x663bc544, 0x6f36ce4a,
+0x7421d358, 0x7d2cd856, 0xa10c7a37, 0xa8017139, 0xb3166c2b, 0xba1b6725,
+0x8538560f, 0x8c355d01, 0x97224013, 0x9e2f4b1d, 0xe9642247, 0xe0692949,
+0xfb7e345b, 0xf2733f55, 0xcd500e7f, 0xc45d0571, 0xdf4a1863, 0xd647136d,
+0x31dccad7, 0x38d1c1d9, 0x23c6dccb, 0x2acbd7c5, 0x15e8e6ef, 0x1ce5ede1,
+0x07f2f0f3, 0x0efffbfd, 0x79b492a7, 0x70b999a9, 0x6bae84bb, 0x62a38fb5,
+0x5d80be9f, 0x548db591, 0x4f9aa883, 0x4697a38d
+};
+#endif
+
+#endif /* RIJNDAEL_INCLUDE_TABLES */
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 Rcon[30] = {
+0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
+0x00000040, 0x00000080, 0x0000001b, 0x00000036, 0x0000006c, 0x000000d8,
+0x000000ab, 0x0000004d, 0x0000009a, 0x0000002f, 0x0000005e, 0x000000bc,
+0x00000063, 0x000000c6, 0x00000097, 0x00000035, 0x0000006a, 0x000000d4,
+0x000000b3, 0x0000007d, 0x000000fa, 0x000000ef, 0x000000c5, 0x00000091
+};
+#else
+static const PRUint32 Rcon[30] = {
+0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000,
+0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000,
+0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000,
+0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000
+};
+#endif
+
diff --git a/security/nss/lib/freebl/rijndael_tables.c b/security/nss/lib/freebl/rijndael_tables.c
new file mode 100644
index 0000000000..61316d13ea
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael_tables.c
@@ -0,0 +1,213 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "stdio.h"
+#include "prtypes.h"
+#include "blapi.h"
+
+/*
+ * what follows is code thrown together to generate the myriad of tables
+ * used by Rijndael, the AES cipher.
+ */
+
+#define WORD_LE(b0, b1, b2, b3) \
+ (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | b0)
+
+#define WORD_BE(b0, b1, b2, b3) \
+ (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | b3)
+
+static const PRUint8 __S[256] = {
+ 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118,
+ 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+ 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21,
+ 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117,
+ 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132,
+ 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207,
+ 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168,
+ 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210,
+ 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115,
+ 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219,
+ 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121,
+ 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8,
+ 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138,
+ 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158,
+ 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223,
+ 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22
+};
+
+static const PRUint8 __SInv[256] = {
+ 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251,
+ 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203,
+ 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78,
+ 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37,
+ 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
+ 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132,
+ 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6,
+ 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107,
+ 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+ 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110,
+ 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27,
+ 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244,
+ 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95,
+ 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239,
+ 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97,
+ 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125
+};
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gf_multiply(PRUint8 a, PRUint8 b)
+{
+ PRUint8 res = 0;
+ while (b > 0) {
+ res = (b & 0x01) ? res ^ a : res;
+ a = (a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1);
+ b >>= 1;
+ }
+ return res;
+}
+
+void
+make_T_Table(char *table, const PRUint8 Sx[256], FILE *file,
+ unsigned char m0, unsigned char m1,
+ unsigned char m2, unsigned char m3)
+{
+ PRUint32 Ti;
+ int i;
+ fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+ for (i = 0; i < 256; i++) {
+ Ti = WORD_LE(gf_multiply(Sx[i], m0),
+ gf_multiply(Sx[i], m1),
+ gf_multiply(Sx[i], m2),
+ gf_multiply(Sx[i], m3));
+ if (Ti == 0)
+ fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ else
+ fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#else\n");
+ fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+ for (i = 0; i < 256; i++) {
+ Ti = WORD_BE(gf_multiply(Sx[i], m0),
+ gf_multiply(Sx[i], m1),
+ gf_multiply(Sx[i], m2),
+ gf_multiply(Sx[i], m3));
+ if (Ti == 0)
+ fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ else
+ fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#endif\n\n");
+}
+
+void
+make_InvMixCol_Table(int num, FILE *file, PRUint8 m0, PRUint8 m1, PRUint8 m2, PRUint8 m3)
+{
+ PRUint16 i;
+ PRUint8 b0, b1, b2, b3;
+ fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+ for (i = 0; i < 256; i++) {
+ b0 = gf_multiply(i, m0);
+ b1 = gf_multiply(i, m1);
+ b2 = gf_multiply(i, m2);
+ b3 = gf_multiply(i, m3);
+ fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b3, b2, b1, b0, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#else\n");
+ fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+ for (i = 0; i < 256; i++) {
+ b0 = gf_multiply(i, m0);
+ b1 = gf_multiply(i, m1);
+ b2 = gf_multiply(i, m2);
+ b3 = gf_multiply(i, m3);
+ fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b0, b1, b2, b3, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#endif\n\n");
+}
+
+int
+main()
+{
+ int i, j;
+ PRUint8 cur, last;
+ PRUint32 tmp;
+ FILE *optfile;
+ optfile = fopen("rijndael32.tab", "w");
+ /* output S, if there are no T tables */
+ fprintf(optfile, "#ifndef RIJNDAEL_INCLUDE_TABLES\n");
+ fprintf(optfile, "static const PRUint8 _S[256] = \n{\n");
+ for (i = 0; i < 256; i++) {
+ fprintf(optfile, "%3d%c%c", __S[i], (i == 255) ? ' ' : ',',
+ (i % 16 == 15) ? '\n' : ' ');
+ }
+ fprintf(optfile, "};\n#endif /* not RIJNDAEL_INCLUDE_TABLES */\n\n");
+ /* output S**-1 */
+ fprintf(optfile, "static const PRUint8 _SInv[256] = \n{\n");
+ for (i = 0; i < 256; i++) {
+ fprintf(optfile, "%3d%c%c", __SInv[i], (i == 255) ? ' ' : ',',
+ (i % 16 == 15) ? '\n' : ' ');
+ }
+ fprintf(optfile, "};\n\n");
+ fprintf(optfile, "#ifdef RIJNDAEL_INCLUDE_TABLES\n");
+ /* The 32-bit word tables for optimized implementation */
+ /* T0 = [ S[a] * 02, S[a], S[a], S[a] * 03 ] */
+ make_T_Table("0", __S, optfile, 0x02, 0x01, 0x01, 0x03);
+ /* T1 = [ S[a] * 03, S[a] * 02, S[a], S[a] ] */
+ make_T_Table("1", __S, optfile, 0x03, 0x02, 0x01, 0x01);
+ /* T2 = [ S[a], S[a] * 03, S[a] * 02, S[a] ] */
+ make_T_Table("2", __S, optfile, 0x01, 0x03, 0x02, 0x01);
+ /* T3 = [ S[a], S[a], S[a] * 03, S[a] * 02 ] */
+ make_T_Table("3", __S, optfile, 0x01, 0x01, 0x03, 0x02);
+ /* TInv0 = [ Si[a] * 0E, Si[a] * 09, Si[a] * 0D, Si[a] * 0B ] */
+ make_T_Table("Inv0", __SInv, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+ /* TInv1 = [ Si[a] * 0B, Si[a] * 0E, Si[a] * 09, Si[a] * 0D ] */
+ make_T_Table("Inv1", __SInv, optfile, 0x0b, 0x0e, 0x09, 0x0d);
+ /* TInv2 = [ Si[a] * 0D, Si[a] * 0B, Si[a] * 0E, Si[a] * 09 ] */
+ make_T_Table("Inv2", __SInv, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+ /* TInv3 = [ Si[a] * 09, Si[a] * 0D, Si[a] * 0B, Si[a] * 0E ] */
+ make_T_Table("Inv3", __SInv, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+ /* byte multiply tables for inverse key expansion (mimics InvMixColumn) */
+ make_InvMixCol_Table(0, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+ make_InvMixCol_Table(1, optfile, 0x0b, 0x0E, 0x09, 0x0d);
+ make_InvMixCol_Table(2, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+ make_InvMixCol_Table(3, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+ fprintf(optfile, "#endif /* RIJNDAEL_INCLUDE_TABLES */\n\n");
+ /* round constants for key expansion */
+ fprintf(optfile, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+ cur = 0x01;
+ for (i = 0; i < 30; i++) {
+ fprintf(optfile, "%#.8x%c%c", WORD_LE(cur, 0, 0, 0),
+ (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ last = cur;
+ cur = gf_multiply(last, 0x02);
+ }
+ fprintf(optfile, "};\n");
+ fprintf(optfile, "#else\n");
+ fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+ cur = 0x01;
+ for (i = 0; i < 30; i++) {
+ fprintf(optfile, "%#.8x%c%c", WORD_BE(cur, 0, 0, 0),
+ (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ last = cur;
+ cur = gf_multiply(last, 0x02);
+ }
+ fprintf(optfile, "};\n");
+ fprintf(optfile, "#endif\n\n");
+ fclose(optfile);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c
new file mode 100644
index 0000000000..67d65ba2b4
--- /dev/null
+++ b/security/nss/lib/freebl/rsa.c
@@ -0,0 +1,1725 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA key generation, public key op, private key op.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "prclist.h"
+#include "nssilock.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+/* The minimal required randomness is 64 bits */
+/* EXP_BLINDING_RANDOMNESS_LEN is the length of the randomness in mp_digits */
+/* for 32 bits platforts it is 2 mp_digits (= 2 * 32 bits), for 64 bits it is equal to 128 bits */
+#define EXP_BLINDING_RANDOMNESS_LEN ((128 + MP_DIGIT_BIT - 1) / MP_DIGIT_BIT)
+#define EXP_BLINDING_RANDOMNESS_LEN_BYTES (EXP_BLINDING_RANDOMNESS_LEN * sizeof(mp_digit))
+
+/*
+** Number of times to attempt to generate a prime (p or q) from a random
+** seed (the seed changes for each iteration).
+*/
+#define MAX_PRIME_GEN_ATTEMPTS 10
+/*
+** Number of times to attempt to generate a key. The primes p and q change
+** for each attempt.
+*/
+#define MAX_KEY_GEN_ATTEMPTS 10
+
+/* Blinding Parameters max cache size */
+#define RSA_BLINDING_PARAMS_MAX_CACHE_SIZE 20
+
+/* exponent should not be greater than modulus */
+#define BAD_RSA_KEY_SIZE(modLen, expLen) \
+ ((expLen) > (modLen) || (modLen) > RSA_MAX_MODULUS_BITS / 8 || \
+ (expLen) > RSA_MAX_EXPONENT_BITS / 8)
+
+struct blindingParamsStr;
+typedef struct blindingParamsStr blindingParams;
+
+struct blindingParamsStr {
+ blindingParams *next;
+ mp_int f, g; /* blinding parameter */
+ int counter; /* number of remaining uses of (f, g) */
+};
+
+/*
+** RSABlindingParamsStr
+**
+** For discussion of Paul Kocher's timing attack against an RSA private key
+** operation, see http://www.cryptography.com/timingattack/paper.html. The
+** countermeasure to this attack, known as blinding, is also discussed in
+** the Handbook of Applied Cryptography, 11.118-11.119.
+*/
+struct RSABlindingParamsStr {
+ /* Blinding-specific parameters */
+ PRCList link; /* link to list of structs */
+ SECItem modulus; /* list element "key" */
+ blindingParams *free, *bp; /* Blinding parameters queue */
+ blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
+ /* precalculate montegomery reduction value */
+ mp_digit n0i; /* n0i = -( n & MP_DIGIT) ** -1 mod mp_RADIX */
+};
+typedef struct RSABlindingParamsStr RSABlindingParams;
+
+/*
+** RSABlindingParamsListStr
+**
+** List of key-specific blinding params. The arena holds the volatile pool
+** of memory for each entry and the list itself. The lock is for list
+** operations, in this case insertions and iterations, as well as control
+** of the counter for each set of blinding parameters.
+*/
+struct RSABlindingParamsListStr {
+ PZLock *lock; /* Lock for the list */
+ PRCondVar *cVar; /* Condidtion Variable */
+ int waitCount; /* Number of threads waiting on cVar */
+ PRCList head; /* Pointer to the list */
+};
+
+/*
+** The master blinding params list.
+*/
+static struct RSABlindingParamsListStr blindingParamsList = { 0 };
+
+/* Number of times to reuse (f, g). Suggested by Paul Kocher */
+#define RSA_BLINDING_PARAMS_MAX_REUSE 50
+
+/* Global, allows optional use of blinding. On by default. */
+/* Cannot be changed at the moment, due to thread-safety issues. */
+static PRBool nssRSAUseBlinding = PR_TRUE;
+
+static SECStatus
+rsa_build_from_primes(const mp_int *p, const mp_int *q,
+ mp_int *e, PRBool needPublicExponent,
+ mp_int *d, PRBool needPrivateExponent,
+ RSAPrivateKey *key, unsigned int keySizeInBits)
+{
+ mp_int n, phi;
+ mp_int psub1, qsub1, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&phi) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&qsub1) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&phi));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&qsub1));
+ CHECK_MPI_OK(mp_init(&tmp));
+ /* p and q must be distinct. */
+ if (mp_cmp(p, q) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 1. Compute n = p*q */
+ CHECK_MPI_OK(mp_mul(p, q, &n));
+ /* verify that the modulus has the desired number of bits */
+ if ((unsigned)mpl_significant_bits(&n) != keySizeInBits) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ /* at least one exponent must be given */
+ PORT_Assert(!(needPublicExponent && needPrivateExponent));
+
+ /* 2. Compute phi = (p-1)*(q-1) */
+ CHECK_MPI_OK(mp_sub_d(p, 1, &psub1));
+ CHECK_MPI_OK(mp_sub_d(q, 1, &qsub1));
+ if (needPublicExponent || needPrivateExponent) {
+ CHECK_MPI_OK(mp_lcm(&psub1, &qsub1, &phi));
+ /* 3. Compute d = e**-1 mod(phi) */
+ /* or e = d**-1 mod(phi) as necessary */
+ if (needPublicExponent) {
+ err = mp_invmod(d, &phi, e);
+ } else {
+ err = mp_invmod(e, &phi, d);
+ }
+ } else {
+ err = MP_OKAY;
+ }
+ /* Verify that phi(n) and e have no common divisors */
+ if (err != MP_OKAY) {
+ if (err == MP_UNDEF) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ err = MP_OKAY; /* to keep PORT_SetError from being called again */
+ rv = SECFailure;
+ }
+ goto cleanup;
+ }
+
+ /* 4. Compute exponent1 = d mod (p-1) */
+ CHECK_MPI_OK(mp_mod(d, &psub1, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->exponent1, key->arena);
+ /* 5. Compute exponent2 = d mod (q-1) */
+ CHECK_MPI_OK(mp_mod(d, &qsub1, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->exponent2, key->arena);
+ /* 6. Compute coefficient = q**-1 mod p */
+ CHECK_MPI_OK(mp_invmod(q, p, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->coefficient, key->arena);
+
+ /* copy our calculated results, overwrite what is there */
+ key->modulus.data = NULL;
+ MPINT_TO_SECITEM(&n, &key->modulus, key->arena);
+ key->privateExponent.data = NULL;
+ MPINT_TO_SECITEM(d, &key->privateExponent, key->arena);
+ key->publicExponent.data = NULL;
+ MPINT_TO_SECITEM(e, &key->publicExponent, key->arena);
+ key->prime1.data = NULL;
+ MPINT_TO_SECITEM(p, &key->prime1, key->arena);
+ key->prime2.data = NULL;
+ MPINT_TO_SECITEM(q, &key->prime2, key->arena);
+cleanup:
+ mp_clear(&n);
+ mp_clear(&phi);
+ mp_clear(&psub1);
+ mp_clear(&qsub1);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+SECStatus
+generate_prime(mp_int *prime, int primeLen)
+{
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ int piter;
+ unsigned char *pb = NULL;
+ pb = PORT_Alloc(primeLen);
+ if (!pb) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ for (piter = 0; piter < MAX_PRIME_GEN_ATTEMPTS; piter++) {
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen));
+ pb[0] |= 0xC0; /* set two high-order bits */
+ pb[primeLen - 1] |= 0x01; /* set low-order bit */
+ CHECK_MPI_OK(mp_read_unsigned_octets(prime, pb, primeLen));
+ err = mpp_make_prime_secure(prime, primeLen * 8, PR_FALSE);
+ if (err != MP_NO)
+ goto cleanup;
+ /* keep going while err == MP_NO */
+ }
+cleanup:
+ if (pb)
+ PORT_ZFree(pb, primeLen);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+ * make sure the key components meet fips186 requirements.
+ */
+static PRBool
+rsa_fips186_verify(mp_int *p, mp_int *q, mp_int *d, int keySizeInBits)
+{
+ mp_int pq_diff;
+ mp_err err = MP_OKAY;
+ PRBool ret = PR_FALSE;
+
+ if (keySizeInBits < 250) {
+ /* not a valid FIPS length, no point in our other tests */
+ /* if you are here, and in FIPS mode, you are outside the security
+ * policy */
+ return PR_TRUE;
+ }
+
+ /* p & q are already known to be greater then sqrt(2)*2^(keySize/2-1) */
+ /* we also know that gcd(p-1,e) = 1 and gcd(q-1,e) = 1 because the
+ * mp_invmod() function will fail. */
+ /* now check p-q > 2^(keysize/2-100) */
+ MP_DIGITS(&pq_diff) = 0;
+ CHECK_MPI_OK(mp_init(&pq_diff));
+ /* NSS always has p > q, so we know pq_diff is positive */
+ CHECK_MPI_OK(mp_sub(p, q, &pq_diff));
+ if ((unsigned)mpl_significant_bits(&pq_diff) < (keySizeInBits / 2 - 100)) {
+ goto cleanup;
+ }
+ /* now verify d is large enough*/
+ if ((unsigned)mpl_significant_bits(d) < (keySizeInBits / 2)) {
+ goto cleanup;
+ }
+ ret = PR_TRUE;
+
+cleanup:
+ mp_clear(&pq_diff);
+ return ret;
+}
+
+/*
+** Generate and return a new RSA public and private key.
+** Both keys are encoded in a single RSAPrivateKey structure.
+** "cx" is the random number generator context
+** "keySizeInBits" is the size of the key to be generated, in bits.
+** 512, 1024, etc.
+** "publicExponent" when not NULL is a pointer to some data that
+** represents the public exponent to use. The data is a byte
+** encoded integer, in "big endian" order.
+*/
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+ unsigned int primeLen;
+ mp_int p = { 0, 0, 0, NULL };
+ mp_int q = { 0, 0, 0, NULL };
+ mp_int e = { 0, 0, 0, NULL };
+ mp_int d = { 0, 0, 0, NULL };
+ int kiter;
+ int max_attempts;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ int prerr = 0;
+ RSAPrivateKey *key = NULL;
+ PLArenaPool *arena = NULL;
+ /* Require key size to be a multiple of 16 bits. */
+ if (!publicExponent || keySizeInBits % 16 != 0 ||
+ BAD_RSA_KEY_SIZE((unsigned int)keySizeInBits / 8, publicExponent->len)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ /* 1. Set the public exponent and check if it's uneven and greater than 2.*/
+ MP_DIGITS(&e) = 0;
+ CHECK_MPI_OK(mp_init(&e));
+ SECITEM_TO_MPINT(*publicExponent, &e);
+ if (mp_iseven(&e) || !(mp_cmp_d(&e, 2) > 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+#ifndef NSS_FIPS_DISABLED
+ /* Check that the exponent is not smaller than 65537 */
+ if (mp_cmp_d(&e, 0x10001) < 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+#endif
+
+ /* 2. Allocate arena & key */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ key = PORT_ArenaZNew(arena, RSAPrivateKey);
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ key->arena = arena;
+ /* length of primes p and q (in bytes) */
+ primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&d) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&d));
+ /* 3. Set the version number (PKCS1 v1.5 says it should be zero) */
+ SECITEM_AllocItem(arena, &key->version, 1);
+ key->version.data[0] = 0;
+
+ kiter = 0;
+ max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */
+ do {
+ PORT_SetError(0);
+ CHECK_SEC_OK(generate_prime(&p, primeLen));
+ CHECK_SEC_OK(generate_prime(&q, primeLen));
+ /* Assure p > q */
+ /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+ * implementation optimization that requires p > q. We can remove
+ * this code in the future.
+ */
+ if (mp_cmp(&p, &q) < 0)
+ mp_exch(&p, &q);
+ /* Attempt to use these primes to generate a key */
+ rv = rsa_build_from_primes(&p, &q,
+ &e, PR_FALSE, /* needPublicExponent=false */
+ &d, PR_TRUE, /* needPrivateExponent=true */
+ key, keySizeInBits);
+ if (rv == SECSuccess) {
+ if (rsa_fips186_verify(&p, &q, &d, keySizeInBits)) {
+ break;
+ }
+ prerr = SEC_ERROR_NEED_RANDOM; /* retry with different values */
+ } else {
+ prerr = PORT_GetError();
+ }
+ kiter++;
+ /* loop until have primes */
+ } while (prerr == SEC_ERROR_NEED_RANDOM && kiter < max_attempts);
+
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&e);
+ mp_clear(&d);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv && arena) {
+ PORT_FreeArena(arena, PR_TRUE);
+ key = NULL;
+ }
+ return key;
+}
+
+mp_err
+rsa_is_prime(mp_int *p)
+{
+ int res;
+
+ /* run a Fermat test */
+ res = mpp_fermat(p, 2);
+ if (res != MP_OKAY) {
+ return res;
+ }
+
+ /* If that passed, run some Miller-Rabin tests */
+ res = mpp_pprime_secure(p, 2);
+ return res;
+}
+
+/*
+ * Factorize a RSA modulus n into p and q by using the exponents e and d.
+ *
+ * In: e, d, n
+ * Out: p, q
+ *
+ * See Handbook of Applied Cryptography, 8.2.2(i).
+ *
+ * The algorithm is probabilistic, it is run 64 times and each run has a 50%
+ * chance of succeeding with a runtime of O(log(e*d)).
+ *
+ * The returned p might be smaller than q.
+ */
+static mp_err
+rsa_factorize_n_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+ mp_int *n)
+{
+ /* lambda is the private modulus: e*d = 1 mod lambda */
+ /* so: e*d - 1 = k*lambda = t*2^s where t is odd */
+ mp_int klambda;
+ mp_int t, onetwentyeight;
+ unsigned long s = 0;
+ unsigned long i;
+
+ /* cand = a^(t * 2^i) mod n, next_cand = a^(t * 2^(i+1)) mod n */
+ mp_int a;
+ mp_int cand;
+ mp_int next_cand;
+
+ mp_int n_minus_one;
+ mp_err err = MP_OKAY;
+
+ MP_DIGITS(&klambda) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&cand) = 0;
+ MP_DIGITS(&n_minus_one) = 0;
+ MP_DIGITS(&next_cand) = 0;
+ MP_DIGITS(&onetwentyeight) = 0;
+ CHECK_MPI_OK(mp_init(&klambda));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&cand));
+ CHECK_MPI_OK(mp_init(&n_minus_one));
+ CHECK_MPI_OK(mp_init(&next_cand));
+ CHECK_MPI_OK(mp_init(&onetwentyeight));
+
+ mp_set_int(&onetwentyeight, 128);
+
+ /* calculate k*lambda = e*d - 1 */
+ CHECK_MPI_OK(mp_mul(e, d, &klambda));
+ CHECK_MPI_OK(mp_sub_d(&klambda, 1, &klambda));
+
+ /* factorize klambda into t*2^s */
+ CHECK_MPI_OK(mp_copy(&klambda, &t));
+ while (mpp_divis_d(&t, 2) == MP_YES) {
+ CHECK_MPI_OK(mp_div_2(&t, &t));
+ s += 1;
+ }
+
+ /* precompute n_minus_one = n - 1 */
+ CHECK_MPI_OK(mp_copy(n, &n_minus_one));
+ CHECK_MPI_OK(mp_sub_d(&n_minus_one, 1, &n_minus_one));
+
+ /* pick random bases a, each one has a 50% leading to a factorization */
+ CHECK_MPI_OK(mp_set_int(&a, 2));
+ /* The following is equivalent to for (a=2, a <= 128, a+=2) */
+ while (mp_cmp(&a, &onetwentyeight) <= 0) {
+ /* compute the base cand = a^(t * 2^0) [i = 0] */
+ CHECK_MPI_OK(mp_exptmod(&a, &t, n, &cand));
+
+ for (i = 0; i < s; i++) {
+ /* condition 1: skip the base if we hit a trivial factor of n */
+ if (mp_cmp(&cand, &n_minus_one) == 0 || mp_cmp_d(&cand, 1) == 0) {
+ break;
+ }
+
+ /* increase i in a^(t * 2^i) by squaring the number */
+ CHECK_MPI_OK(mp_exptmod_d(&cand, 2, n, &next_cand));
+
+ /* condition 2: a^(t * 2^(i+1)) = 1 mod n */
+ if (mp_cmp_d(&next_cand, 1) == 0) {
+ /* conditions verified, gcd(a^(t * 2^i) - 1, n) is a factor */
+ CHECK_MPI_OK(mp_sub_d(&cand, 1, &cand));
+ CHECK_MPI_OK(mp_gcd(&cand, n, p));
+ if (mp_cmp_d(p, 1) == 0) {
+ CHECK_MPI_OK(mp_add_d(&cand, 1, &cand));
+ break;
+ }
+ CHECK_MPI_OK(mp_div(n, p, q, NULL));
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_copy(&next_cand, &cand));
+ }
+
+ CHECK_MPI_OK(mp_add_d(&a, 2, &a));
+ }
+
+ /* if we reach here it's likely (2^64 - 1 / 2^64) that d is wrong */
+ err = MP_RANGE;
+
+cleanup:
+ mp_clear(&klambda);
+ mp_clear(&t);
+ mp_clear(&a);
+ mp_clear(&cand);
+ mp_clear(&n_minus_one);
+ mp_clear(&next_cand);
+ mp_clear(&onetwentyeight);
+ return err;
+}
+
+/*
+ * Try to find the two primes based on 2 exponents plus a prime.
+ *
+ * In: e, d and p.
+ * Out: p,q.
+ *
+ * Step 1, Since d = e**-1 mod phi, we know that d*e == 1 mod phi, or
+ * d*e = 1+k*phi, or d*e-1 = k*phi. since d is less than phi and e is
+ * usually less than d, then k must be an integer between e-1 and 1
+ * (probably on the order of e).
+ * Step 1a, We can divide k*phi by prime-1 and get k*(q-1). This will reduce
+ * the size of our division through the rest of the loop.
+ * Step 2, Loop through the values k=e-1 to 1 looking for k. k should be on
+ * the order or e, and e is typically small. This may take a while for
+ * a large random e. We are looking for a k that divides kphi
+ * evenly. Once we find a k that divides kphi evenly, we assume it
+ * is the true k. It's possible this k is not the 'true' k but has
+ * swapped factors of p-1 and/or q-1. Because of this, we
+ * tentatively continue Steps 3-6 inside this loop, and may return looking
+ * for another k on failure.
+ * Step 3, Calculate our tentative phi=kphi/k. Note: real phi is (p-1)*(q-1).
+ * Step 4a, kphi is k*(q-1), so phi is our tenative q-1. q = phi+1.
+ * If k is correct, q should be the right length and prime.
+ * Step 4b, It's possible q-1 and k could have swapped factors. We now have a
+ * possible solution that meets our criteria. It may not be the only
+ * solution, however, so we keep looking. If we find more than one,
+ * we will fail since we cannot determine which is the correct
+ * solution, and returning the wrong modulus will compromise both
+ * moduli. If no other solution is found, we return the unique solution.
+ *
+ * This will return p & q. q may be larger than p in the case that p was given
+ * and it was the smaller prime.
+ */
+static mp_err
+rsa_get_prime_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+ mp_int *n, unsigned int keySizeInBits)
+{
+ mp_int kphi; /* k*phi */
+ mp_int k; /* current guess at 'k' */
+ mp_int phi; /* (p-1)(q-1) */
+ mp_int r; /* remainder */
+ mp_int tmp; /* p-1 if p is given */
+ mp_err err = MP_OKAY;
+ unsigned int order_k;
+
+ MP_DIGITS(&kphi) = 0;
+ MP_DIGITS(&phi) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&kphi));
+ CHECK_MPI_OK(mp_init(&phi));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&tmp));
+
+ /* our algorithm looks for a factor k whose maximum size is dependent
+ * on the size of our smallest exponent, which had better be the public
+ * exponent (if it's the private, the key is vulnerable to a brute force
+ * attack).
+ *
+ * since our factor search is linear, we need to limit the maximum
+ * size of the public key. this should not be a problem normally, since
+ * public keys are usually small.
+ *
+ * if we want to handle larger public key sizes, we should have
+ * a version which tries to 'completely' factor k*phi (where completely
+ * means 'factor into primes, or composites with which are products of
+ * large primes). Once we have all the factors, we can sort them out and
+ * try different combinations to form our phi. The risk is if (p-1)/2,
+ * (q-1)/2, and k are all large primes. In any case if the public key
+ * is small (order of 20 some bits), then a linear search for k is
+ * manageable.
+ */
+ if (mpl_significant_bits(e) > 23) {
+ err = MP_RANGE;
+ goto cleanup;
+ }
+
+ /* calculate k*phi = e*d - 1 */
+ CHECK_MPI_OK(mp_mul(e, d, &kphi));
+ CHECK_MPI_OK(mp_sub_d(&kphi, 1, &kphi));
+
+ /* kphi is (e*d)-1, which is the same as k*(p-1)(q-1)
+ * d < (p-1)(q-1), therefor k must be less than e-1
+ * We can narrow down k even more, though. Since p and q are odd and both
+ * have their high bit set, then we know that phi must be on order of
+ * keySizeBits.
+ */
+ order_k = (unsigned)mpl_significant_bits(&kphi) - keySizeInBits;
+
+ if (order_k <= 1) {
+ err = MP_RANGE;
+ goto cleanup;
+ }
+
+ /* for (k=kinit; order(k) >= order_k; k--) { */
+ /* k=kinit: k can't be bigger than kphi/2^(keySizeInBits -1) */
+ CHECK_MPI_OK(mp_2expt(&k, keySizeInBits - 1));
+ CHECK_MPI_OK(mp_div(&kphi, &k, &k, NULL));
+ if (mp_cmp(&k, e) >= 0) {
+ /* also can't be bigger then e-1 */
+ CHECK_MPI_OK(mp_sub_d(e, 1, &k));
+ }
+
+ /* calculate our temp value */
+ /* This saves recalculating this value when the k guess is wrong, which
+ * is reasonably frequent. */
+ /* tmp = p-1 (used to calculate q-1= phi/tmp) */
+ CHECK_MPI_OK(mp_sub_d(p, 1, &tmp));
+ CHECK_MPI_OK(mp_div(&kphi, &tmp, &kphi, &r));
+ if (mp_cmp_z(&r) != 0) {
+ /* p-1 doesn't divide kphi, some parameter wasn't correct */
+ err = MP_RANGE;
+ goto cleanup;
+ }
+ mp_zero(q);
+ /* kphi is now k*(q-1) */
+
+ /* rest of the for loop */
+ for (; (err == MP_OKAY) && (mpl_significant_bits(&k) >= order_k);
+ err = mp_sub_d(&k, 1, &k)) {
+ CHECK_MPI_OK(err);
+ /* looking for k as a factor of kphi */
+ CHECK_MPI_OK(mp_div(&kphi, &k, &phi, &r));
+ if (mp_cmp_z(&r) != 0) {
+ /* not a factor, try the next one */
+ continue;
+ }
+ /* we have a possible phi, see if it works */
+ if ((unsigned)mpl_significant_bits(&phi) != keySizeInBits / 2) {
+ /* phi is not the right size */
+ continue;
+ }
+ /* phi should be divisible by 2, since
+ * q is odd and phi=(q-1). */
+ if (mpp_divis_d(&phi, 2) == MP_NO) {
+ /* phi is not divisible by 4 */
+ continue;
+ }
+ /* we now have a candidate for the second prime */
+ CHECK_MPI_OK(mp_add_d(&phi, 1, &tmp));
+
+ /* check to make sure it is prime */
+ err = rsa_is_prime(&tmp);
+ if (err != MP_OKAY) {
+ if (err == MP_NO) {
+ /* No, then we still have the wrong phi */
+ continue;
+ }
+ goto cleanup;
+ }
+ /*
+ * It is possible that we have the wrong phi if
+ * k_guess*(q_guess-1) = k*(q-1) (k and q-1 have swapped factors).
+ * since our q_quess is prime, however. We have found a valid
+ * rsa key because:
+ * q is the correct order of magnitude.
+ * phi = (p-1)(q-1) where p and q are both primes.
+ * e*d mod phi = 1.
+ * There is no way to know from the info given if this is the
+ * original key. We never want to return the wrong key because if
+ * two moduli with the same factor is known, then euclid's gcd
+ * algorithm can be used to find that factor. Even though the
+ * caller didn't pass the original modulus, it doesn't mean the
+ * modulus wasn't known or isn't available somewhere. So to be safe
+ * if we can't be sure we have the right q, we don't return any.
+ *
+ * So to make sure we continue looking for other valid q's. If none
+ * are found, then we can safely return this one, otherwise we just
+ * fail */
+ if (mp_cmp_z(q) != 0) {
+ /* this is the second valid q, don't return either,
+ * just fail */
+ err = MP_RANGE;
+ break;
+ }
+ /* we only have one q so far, save it and if no others are found,
+ * it's safe to return it */
+ CHECK_MPI_OK(mp_copy(&tmp, q));
+ continue;
+ }
+ if ((unsigned)mpl_significant_bits(&k) < order_k) {
+ if (mp_cmp_z(q) == 0) {
+ /* If we get here, something was wrong with the parameters we
+ * were given */
+ err = MP_RANGE;
+ }
+ }
+cleanup:
+ mp_clear(&kphi);
+ mp_clear(&phi);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&tmp);
+ return err;
+}
+
+/*
+ * take a private key with only a few elements and fill out the missing pieces.
+ *
+ * All the entries will be overwritten with data allocated out of the arena
+ * If no arena is supplied, one will be created.
+ *
+ * The following fields must be supplied in order for this function
+ * to succeed:
+ * one of either publicExponent or privateExponent
+ * two more of the following 5 parameters.
+ * modulus (n)
+ * prime1 (p)
+ * prime2 (q)
+ * publicExponent (e)
+ * privateExponent (d)
+ *
+ * NOTE: if only the publicExponent, privateExponent, and one prime is given,
+ * then there may be more than one RSA key that matches that combination.
+ *
+ * All parameters will be replaced in the key structure with new parameters
+ * Allocated out of the arena. There is no attempt to free the old structures.
+ * Prime1 will always be greater than prime2 (even if the caller supplies the
+ * smaller prime as prime1 or the larger prime as prime2). The parameters are
+ * not overwritten on failure.
+ *
+ * How it works:
+ * We can generate all the parameters from one of the exponents, plus the
+ * two primes. (rsa_build_key_from_primes)
+ * If we are given one of the exponents and both primes, we are done.
+ * If we are given one of the exponents, the modulus and one prime, we
+ * caclulate the second prime by dividing the modulus by the given
+ * prime, giving us an exponent and 2 primes.
+ * If we are given 2 exponents and one of the primes we calculate
+ * k*phi = d*e-1, where k is an integer less than d which
+ * divides d*e-1. We find factor k so we can isolate phi.
+ * phi = (p-1)(q-1)
+ * We can use phi to find the other prime as follows:
+ * q = (phi/(p-1)) + 1. We now have 2 primes and an exponent.
+ * (NOTE: if more then one prime meets this condition, the operation
+ * will fail. See comments elsewhere in this file about this).
+ * (rsa_get_prime_from_exponents)
+ * If we are given 2 exponents and the modulus we factor the modulus to
+ * get the 2 missing primes (rsa_factorize_n_from_exponents)
+ *
+ */
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+ PLArenaPool *arena = NULL;
+ PRBool needPublicExponent = PR_TRUE;
+ PRBool needPrivateExponent = PR_TRUE;
+ PRBool hasModulus = PR_FALSE;
+ unsigned int keySizeInBits = 0;
+ int prime_count = 0;
+ /* standard RSA nominclature */
+ mp_int p, q, e, d, n;
+ /* remainder */
+ mp_int r;
+ mp_err err = 0;
+ SECStatus rv = SECFailure;
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&r) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&r));
+
+ /* if the key didn't already have an arena, create one. */
+ if (key->arena == NULL) {
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ goto cleanup;
+ }
+ key->arena = arena;
+ }
+
+ /* load up the known exponents */
+ if (key->publicExponent.data) {
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ needPublicExponent = PR_FALSE;
+ }
+ if (key->privateExponent.data) {
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ needPrivateExponent = PR_FALSE;
+ }
+ if (needPrivateExponent && needPublicExponent) {
+ /* Not enough information, we need at least one exponent */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* load up the known primes. If only one prime is given, it will be
+ * assigned 'p'. Once we have both primes, well make sure p is the larger.
+ * The value prime_count tells us howe many we have acquired.
+ */
+ if (key->prime1.data) {
+ int primeLen = key->prime1.len;
+ if (key->prime1.data[0] == 0) {
+ primeLen--;
+ }
+ keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->prime1, &p);
+ prime_count++;
+ }
+ if (key->prime2.data) {
+ int primeLen = key->prime2.len;
+ if (key->prime2.data[0] == 0) {
+ primeLen--;
+ }
+ keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->prime2, prime_count ? &q : &p);
+ prime_count++;
+ }
+ /* load up the modulus */
+ if (key->modulus.data) {
+ int modLen = key->modulus.len;
+ if (key->modulus.data[0] == 0) {
+ modLen--;
+ }
+ keySizeInBits = modLen * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->modulus, &n);
+ hasModulus = PR_TRUE;
+ }
+ /* if we have the modulus and one prime, calculate the second. */
+ if ((prime_count == 1) && (hasModulus)) {
+ if (mp_div(&n, &p, &q, &r) != MP_OKAY || mp_cmp_z(&r) != 0) {
+ /* p is not a factor or n, fail */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+ prime_count++;
+ }
+
+ /* If we didn't have enough primes try to calculate the primes from
+ * the exponents */
+ if (prime_count < 2) {
+ /* if we don't have at least 2 primes at this point, then we need both
+ * exponents and one prime or a modulus*/
+ if (!needPublicExponent && !needPrivateExponent &&
+ (prime_count > 0)) {
+ CHECK_MPI_OK(rsa_get_prime_from_exponents(&e, &d, &p, &q, &n,
+ keySizeInBits));
+ } else if (!needPublicExponent && !needPrivateExponent && hasModulus) {
+ CHECK_MPI_OK(rsa_factorize_n_from_exponents(&e, &d, &p, &q, &n));
+ } else {
+ /* not enough given parameters to get both primes */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+ }
+
+ /* Assure p > q */
+ /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+ * implementation optimization that requires p > q. We can remove
+ * this code in the future.
+ */
+ if (mp_cmp(&p, &q) < 0)
+ mp_exch(&p, &q);
+
+ /* we now have our 2 primes and at least one exponent, we can fill
+ * in the key */
+ rv = rsa_build_from_primes(&p, &q,
+ &e, needPublicExponent,
+ &d, needPrivateExponent,
+ key, keySizeInBits);
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&e);
+ mp_clear(&d);
+ mp_clear(&n);
+ mp_clear(&r);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv && arena) {
+ PORT_FreeArena(arena, PR_TRUE);
+ key->arena = NULL;
+ }
+ return rv;
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+ if (modulus->len == 0) {
+ return 0;
+ };
+ unsigned char byteZero = modulus->data[0];
+ unsigned int modLen = modulus->len - !byteZero;
+ return modLen;
+}
+
+/*
+** Perform a raw public-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ unsigned int modLen, expLen, offset;
+ mp_int n, e, m, c;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!key || !output || !input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&c) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&m));
+ CHECK_MPI_OK(mp_init(&c));
+ modLen = rsa_modulusLen(&key->modulus);
+ expLen = rsa_modulusLen(&key->publicExponent);
+
+ if (modLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ /* 1. Obtain public key (n, e) */
+ if (BAD_RSA_KEY_SIZE(modLen, expLen)) {
+ PORT_SetError(SEC_ERROR_INVALID_KEY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ if (e.used > n.used) {
+ /* exponent should not be greater than modulus */
+ PORT_SetError(SEC_ERROR_INVALID_KEY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 2. check input out of range (needs to be in range [0..n-1]) */
+ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 2 bis. Represent message as integer in range [0..n-1] */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&m, input, modLen));
+/* 3. Compute c = m**e mod n */
+#ifdef USE_MPI_EXPT_D
+ /* XXX see which is faster */
+ if (MP_USED(&e) == 1) {
+ CHECK_MPI_OK(mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c));
+ } else
+#endif
+ CHECK_MPI_OK(mp_exptmod(&m, &e, &n, &c));
+ /* 4. result c is ciphertext */
+ err = mp_to_fixlen_octets(&c, output, modLen);
+ if (err >= 0)
+ err = MP_OKAY;
+cleanup:
+ mp_clear(&n);
+ mp_clear(&e);
+ mp_clear(&m);
+ mp_clear(&c);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** RSA Private key operation (no CRT).
+*/
+static SECStatus
+rsa_PrivateKeyOpNoCRT(RSAPrivateKey *key, mp_int *m, mp_int *c, mp_int *n,
+ unsigned int modLen)
+{
+ mp_int d;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&d) = 0;
+ CHECK_MPI_OK(mp_init(&d));
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ /* 1. m = c**d mod n */
+ CHECK_MPI_OK(mp_exptmod(c, &d, n, m));
+cleanup:
+ mp_clear(&d);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** RSA Private key operation using CRT.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTNoCheck(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+ mp_int p, q, d_p, d_q, qInv;
+ /*
+ The length of the randomness comes from the papers:
+ https://link.springer.com/chapter/10.1007/978-3-642-29912-4_7
+ https://link.springer.com/chapter/10.1007/978-3-642-21554-4_5.
+ */
+ mp_int blinding_dp, blinding_dq, r1, r2;
+ unsigned char random_block[EXP_BLINDING_RANDOMNESS_LEN_BYTES];
+ mp_int m1, m2, h, ctmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&d_p) = 0;
+ MP_DIGITS(&d_q) = 0;
+ MP_DIGITS(&qInv) = 0;
+ MP_DIGITS(&m1) = 0;
+ MP_DIGITS(&m2) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&ctmp) = 0;
+ MP_DIGITS(&blinding_dp) = 0;
+ MP_DIGITS(&blinding_dq) = 0;
+ MP_DIGITS(&r1) = 0;
+ MP_DIGITS(&r2) = 0;
+
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&d_p));
+ CHECK_MPI_OK(mp_init(&d_q));
+ CHECK_MPI_OK(mp_init(&qInv));
+ CHECK_MPI_OK(mp_init(&m1));
+ CHECK_MPI_OK(mp_init(&m2));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&ctmp));
+ CHECK_MPI_OK(mp_init(&blinding_dp));
+ CHECK_MPI_OK(mp_init(&blinding_dq));
+ CHECK_MPI_OK(mp_init_size(&r1, EXP_BLINDING_RANDOMNESS_LEN));
+ CHECK_MPI_OK(mp_init_size(&r2, EXP_BLINDING_RANDOMNESS_LEN));
+
+ /* copy private key parameters into mp integers */
+ SECITEM_TO_MPINT(key->prime1, &p); /* p */
+ SECITEM_TO_MPINT(key->prime2, &q); /* q */
+ SECITEM_TO_MPINT(key->exponent1, &d_p); /* d_p = d mod (p-1) */
+ SECITEM_TO_MPINT(key->exponent2, &d_q); /* d_q = d mod (q-1) */
+ SECITEM_TO_MPINT(key->coefficient, &qInv); /* qInv = q**-1 mod p */
+
+ // blinding_dp = 1
+ CHECK_MPI_OK(mp_set_int(&blinding_dp, 1));
+ // blinding_dp = p - 1
+ CHECK_MPI_OK(mp_sub(&p, &blinding_dp, &blinding_dp));
+ // generating a random value
+ RNG_GenerateGlobalRandomBytes(random_block, EXP_BLINDING_RANDOMNESS_LEN_BYTES);
+ MP_USED(&r1) = EXP_BLINDING_RANDOMNESS_LEN;
+ memcpy(MP_DIGITS(&r1), random_block, sizeof(random_block));
+ // blinding_dp = random * (p - 1)
+ CHECK_MPI_OK(mp_mul(&blinding_dp, &r1, &blinding_dp));
+ //d_p = d_p + random * (p - 1)
+ CHECK_MPI_OK(mp_add(&d_p, &blinding_dp, &d_p));
+
+ // blinding_dq = 1
+ CHECK_MPI_OK(mp_set_int(&blinding_dq, 1));
+ // blinding_dq = q - 1
+ CHECK_MPI_OK(mp_sub(&q, &blinding_dq, &blinding_dq));
+ // generating a random value
+ RNG_GenerateGlobalRandomBytes(random_block, EXP_BLINDING_RANDOMNESS_LEN_BYTES);
+ memcpy(MP_DIGITS(&r2), random_block, sizeof(random_block));
+ MP_USED(&r2) = EXP_BLINDING_RANDOMNESS_LEN;
+ // blinding_dq = random * (q - 1)
+ CHECK_MPI_OK(mp_mul(&blinding_dq, &r2, &blinding_dq));
+ //d_q = d_q + random * (q-1)
+ CHECK_MPI_OK(mp_add(&d_q, &blinding_dq, &d_q));
+
+ /* 1. m1 = c**d_p mod p */
+ CHECK_MPI_OK(mp_mod(c, &p, &ctmp));
+ CHECK_MPI_OK(mp_exptmod(&ctmp, &d_p, &p, &m1));
+ /* 2. m2 = c**d_q mod q */
+ CHECK_MPI_OK(mp_mod(c, &q, &ctmp));
+ CHECK_MPI_OK(mp_exptmod(&ctmp, &d_q, &q, &m2));
+ /* 3. h = (m1 - m2) * qInv mod p */
+ CHECK_MPI_OK(mp_submod(&m1, &m2, &p, &h));
+ CHECK_MPI_OK(mp_mulmod(&h, &qInv, &p, &h));
+ /* 4. m = m2 + h * q */
+ CHECK_MPI_OK(mp_mul(&h, &q, m));
+ CHECK_MPI_OK(mp_add(m, &m2, m));
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&d_p);
+ mp_clear(&d_q);
+ mp_clear(&qInv);
+ mp_clear(&m1);
+ mp_clear(&m2);
+ mp_clear(&h);
+ mp_clear(&ctmp);
+ mp_clear(&blinding_dp);
+ mp_clear(&blinding_dq);
+ mp_clear(&r1);
+ mp_clear(&r2);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** An attack against RSA CRT was described by Boneh, DeMillo, and Lipton in:
+** "On the Importance of Eliminating Errors in Cryptographic Computations",
+** http://theory.stanford.edu/~dabo/papers/faults.ps.gz
+**
+** As a defense against the attack, carry out the private key operation,
+** followed up with a public key operation to invert the result.
+** Verify that result against the input.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTCheckedPubKey(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+ mp_int n, e, v;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&v) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, m, c));
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ /* Perform a public key operation v = m ** e mod n */
+ CHECK_MPI_OK(mp_exptmod(m, &e, &n, &v));
+ if (mp_cmp(&v, c) != 0) {
+ rv = SECFailure;
+ }
+cleanup:
+ mp_clear(&n);
+ mp_clear(&e);
+ mp_clear(&v);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+static PRCallOnceType coBPInit = { 0, 0, 0 };
+static PRStatus
+init_blinding_params_list(void)
+{
+ blindingParamsList.lock = PZ_NewLock(nssILockOther);
+ if (!blindingParamsList.lock) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return PR_FAILURE;
+ }
+ blindingParamsList.cVar = PR_NewCondVar(blindingParamsList.lock);
+ if (!blindingParamsList.cVar) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return PR_FAILURE;
+ }
+ blindingParamsList.waitCount = 0;
+ PR_INIT_CLIST(&blindingParamsList.head);
+ return PR_SUCCESS;
+}
+
+static SECStatus
+generate_blinding_params(RSAPrivateKey *key, mp_int *f, mp_int *g, mp_int *n,
+ unsigned int modLen)
+{
+ SECStatus rv = SECSuccess;
+ mp_int e, k;
+ mp_err err = MP_OKAY;
+ unsigned char *kb = NULL;
+
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&k) = 0;
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&k));
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ /* generate random k < n */
+ kb = PORT_Alloc(modLen);
+ if (!kb) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
+ /* k < n */
+ CHECK_MPI_OK(mp_mod(&k, n, &k));
+ /* f = k**e mod n */
+ CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
+ /* g = k**-1 mod n */
+ CHECK_MPI_OK(mp_invmod(&k, n, g));
+ /* g in montgomery form.. */
+ CHECK_MPI_OK(mp_to_mont(g, n, g));
+cleanup:
+ if (kb)
+ PORT_ZFree(kb, modLen);
+ mp_clear(&k);
+ mp_clear(&e);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+static SECStatus
+init_blinding_params(RSABlindingParams *rsabp, RSAPrivateKey *key,
+ mp_int *n, unsigned int modLen)
+{
+ blindingParams *bp = rsabp->array;
+ int i = 0;
+
+ /* Initialize the list pointer for the element */
+ PR_INIT_CLIST(&rsabp->link);
+ for (i = 0; i < RSA_BLINDING_PARAMS_MAX_CACHE_SIZE; ++i, ++bp) {
+ bp->next = bp + 1;
+ MP_DIGITS(&bp->f) = 0;
+ MP_DIGITS(&bp->g) = 0;
+ bp->counter = 0;
+ }
+ /* The last bp->next value was initialized with out
+ * of rsabp->array pointer and must be set to NULL
+ */
+ rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
+
+ bp = rsabp->array;
+ rsabp->bp = NULL;
+ rsabp->free = bp;
+
+ /* precalculate montgomery reduction parameter */
+ rsabp->n0i = mp_calculate_mont_n0i(n);
+
+ /* List elements are keyed using the modulus */
+ return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
+}
+
+static SECStatus
+get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
+ mp_int *f, mp_int *g, mp_digit *n0i)
+{
+ RSABlindingParams *rsabp = NULL;
+ blindingParams *bpUnlinked = NULL;
+ blindingParams *bp;
+ PRCList *el;
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int cmp = -1;
+ PRBool holdingLock = PR_FALSE;
+
+ do {
+ if (blindingParamsList.lock == NULL) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* Acquire the list lock */
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+
+ /* Walk the list looking for the private key */
+ for (el = PR_NEXT_LINK(&blindingParamsList.head);
+ el != &blindingParamsList.head;
+ el = PR_NEXT_LINK(el)) {
+ rsabp = (RSABlindingParams *)el;
+ cmp = SECITEM_CompareItem(&rsabp->modulus, &key->modulus);
+ if (cmp >= 0) {
+ /* The key is found or not in the list. */
+ break;
+ }
+ }
+
+ if (cmp) {
+ /* At this point, the key is not in the list. el should point to
+ ** the list element before which this key should be inserted.
+ */
+ rsabp = PORT_ZNew(RSABlindingParams);
+ if (!rsabp) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+
+ rv = init_blinding_params(rsabp, key, n, modLen);
+ if (rv != SECSuccess) {
+ PORT_ZFree(rsabp, sizeof(RSABlindingParams));
+ goto cleanup;
+ }
+
+ /* Insert the new element into the list
+ ** If inserting in the middle of the list, el points to the link
+ ** to insert before. Otherwise, the link needs to be appended to
+ ** the end of the list, which is the same as inserting before the
+ ** head (since el would have looped back to the head).
+ */
+ PR_INSERT_BEFORE(&rsabp->link, el);
+ }
+
+ /* We've found (or created) the RSAblindingParams struct for this key.
+ * Now, search its list of ready blinding params for a usable one.
+ */
+ *n0i = rsabp->n0i;
+ while (0 != (bp = rsabp->bp)) {
+#ifdef UNSAFE_FUZZER_MODE
+ /* Found a match and there are still remaining uses left */
+ /* Return the parameters */
+ CHECK_MPI_OK(mp_copy(&bp->f, f));
+ CHECK_MPI_OK(mp_copy(&bp->g, g));
+
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+#else
+ if (--(bp->counter) > 0) {
+ /* Found a match and there are still remaining uses left */
+ /* Return the parameters */
+ CHECK_MPI_OK(mp_copy(&bp->f, f));
+ CHECK_MPI_OK(mp_copy(&bp->g, g));
+
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+ }
+ /* exhausted this one, give its values to caller, and
+ * then retire it.
+ */
+ mp_exch(&bp->f, f);
+ mp_exch(&bp->g, g);
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ bp->counter = 0;
+ /* Move to free list */
+ rsabp->bp = bp->next;
+ bp->next = rsabp->free;
+ rsabp->free = bp;
+ /* In case there're threads waiting for new blinding
+ * value - notify 1 thread the value is ready
+ */
+ if (blindingParamsList.waitCount > 0) {
+ PR_NotifyCondVar(blindingParamsList.cVar);
+ blindingParamsList.waitCount--;
+ }
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+#endif
+ }
+ /* We did not find a usable set of blinding params. Can we make one? */
+ /* Find a free bp struct. */
+ if ((bp = rsabp->free) != NULL) {
+ /* unlink this bp */
+ rsabp->free = bp->next;
+ bp->next = NULL;
+ bpUnlinked = bp; /* In case we fail */
+
+ PZ_Unlock(blindingParamsList.lock);
+ holdingLock = PR_FALSE;
+ /* generate blinding parameter values for the current thread */
+ CHECK_SEC_OK(generate_blinding_params(key, f, g, n, modLen));
+
+ /* put the blinding parameter values into cache */
+ CHECK_MPI_OK(mp_init(&bp->f));
+ CHECK_MPI_OK(mp_init(&bp->g));
+ CHECK_MPI_OK(mp_copy(f, &bp->f));
+ CHECK_MPI_OK(mp_copy(g, &bp->g));
+
+ /* Put this at head of queue of usable params. */
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+ (void)holdingLock;
+ /* initialize RSABlindingParamsStr */
+ bp->counter = RSA_BLINDING_PARAMS_MAX_REUSE;
+ bp->next = rsabp->bp;
+ rsabp->bp = bp;
+ bpUnlinked = NULL;
+ /* In case there're threads waiting for new blinding value
+ * just notify them the value is ready
+ */
+ if (blindingParamsList.waitCount > 0) {
+ PR_NotifyAllCondVar(blindingParamsList.cVar);
+ blindingParamsList.waitCount = 0;
+ }
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+ }
+ /* Here, there are no usable blinding parameters available,
+ * and no free bp blocks, presumably because they're all
+ * actively having parameters generated for them.
+ * So, we need to wait here and not eat up CPU until some
+ * change happens.
+ */
+ blindingParamsList.waitCount++;
+ PR_WaitCondVar(blindingParamsList.cVar, PR_INTERVAL_NO_TIMEOUT);
+ PZ_Unlock(blindingParamsList.lock);
+ holdingLock = PR_FALSE;
+ (void)holdingLock;
+ } while (1);
+
+cleanup:
+ /* It is possible to reach this after the lock is already released. */
+ if (bpUnlinked) {
+ if (!holdingLock) {
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+ }
+ bp = bpUnlinked;
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ bp->counter = 0;
+ /* Must put the unlinked bp back on the free list */
+ bp->next = rsabp->free;
+ rsabp->free = bp;
+ }
+ if (holdingLock) {
+ PZ_Unlock(blindingParamsList.lock);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ }
+ *n0i = 0;
+ return SECFailure;
+}
+
+/*
+** Perform a raw private-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+static SECStatus
+rsa_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input,
+ PRBool check)
+{
+ unsigned int modLen;
+ unsigned int offset;
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int n, c, m;
+ mp_int f, g;
+ mp_digit n0i;
+ if (!key || !output || !input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* check input out of range (needs to be in range [0..n-1]) */
+ modLen = rsa_modulusLen(&key->modulus);
+ if (modLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&f) = 0;
+ MP_DIGITS(&g) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&m));
+ CHECK_MPI_OK(mp_init(&f));
+ CHECK_MPI_OK(mp_init(&g));
+ SECITEM_TO_MPINT(key->modulus, &n);
+ OCTETS_TO_MPINT(input, &c, modLen);
+ /* If blinding, compute pre-image of ciphertext by multiplying by
+ ** blinding factor
+ */
+ if (nssRSAUseBlinding) {
+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g, &n0i));
+ /* c' = c*f mod n */
+ CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
+ }
+ /* Do the private key operation m = c**d mod n */
+ if (key->prime1.len == 0 ||
+ key->prime2.len == 0 ||
+ key->exponent1.len == 0 ||
+ key->exponent2.len == 0 ||
+ key->coefficient.len == 0) {
+ CHECK_SEC_OK(rsa_PrivateKeyOpNoCRT(key, &m, &c, &n, modLen));
+ } else if (check) {
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTCheckedPubKey(key, &m, &c));
+ } else {
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
+ }
+ /* If blinding, compute post-image of plaintext by multiplying by
+ ** blinding factor
+ */
+ if (nssRSAUseBlinding) {
+ /* m = m'*g mod n */
+ CHECK_MPI_OK(mp_mulmontmodCT(&m, &g, &n, n0i, &m));
+ }
+ err = mp_to_fixlen_octets(&m, output, modLen);
+ if (err >= 0)
+ err = MP_OKAY;
+cleanup:
+ mp_clear(&n);
+ mp_clear(&c);
+ mp_clear(&m);
+ mp_clear(&f);
+ mp_clear(&g);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return rsa_PrivateKeyOp(key, output, input, PR_FALSE);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return rsa_PrivateKeyOp(key, output, input, PR_TRUE);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+ mp_int p, q, n, psub1, qsub1, e, d, d_p, d_q, qInv, res;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&qsub1) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&d_p) = 0;
+ MP_DIGITS(&d_q) = 0;
+ MP_DIGITS(&qInv) = 0;
+ MP_DIGITS(&res) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&qsub1));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&d_p));
+ CHECK_MPI_OK(mp_init(&d_q));
+ CHECK_MPI_OK(mp_init(&qInv));
+ CHECK_MPI_OK(mp_init(&res));
+
+ if (!key->modulus.data || !key->prime1.data || !key->prime2.data ||
+ !key->publicExponent.data || !key->privateExponent.data ||
+ !key->exponent1.data || !key->exponent2.data ||
+ !key->coefficient.data) {
+ /* call RSA_PopulatePrivateKey first, if the application wishes to
+ * recover these parameters */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->prime1, &p);
+ SECITEM_TO_MPINT(key->prime2, &q);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ SECITEM_TO_MPINT(key->exponent1, &d_p);
+ SECITEM_TO_MPINT(key->exponent2, &d_q);
+ SECITEM_TO_MPINT(key->coefficient, &qInv);
+ /* p and q must be distinct. */
+ if (mp_cmp(&p, &q) == 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+#define VERIFY_MPI_EQUAL(m1, m2) \
+ if (mp_cmp(m1, m2) != 0) { \
+ rv = SECFailure; \
+ goto cleanup; \
+ }
+#define VERIFY_MPI_EQUAL_1(m) \
+ if (mp_cmp_d(m, 1) != 0) { \
+ rv = SECFailure; \
+ goto cleanup; \
+ }
+ /* n == p * q */
+ CHECK_MPI_OK(mp_mul(&p, &q, &res));
+ VERIFY_MPI_EQUAL(&res, &n);
+ /* gcd(e, p-1) == 1 */
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+ CHECK_MPI_OK(mp_gcd(&e, &psub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* gcd(e, q-1) == 1 */
+ CHECK_MPI_OK(mp_sub_d(&q, 1, &qsub1));
+ CHECK_MPI_OK(mp_gcd(&e, &qsub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d*e == 1 mod p-1 */
+ CHECK_MPI_OK(mp_mulmod(&d, &e, &psub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d*e == 1 mod q-1 */
+ CHECK_MPI_OK(mp_mulmod(&d, &e, &qsub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d_p == d mod p-1 */
+ CHECK_MPI_OK(mp_mod(&d, &psub1, &res));
+ VERIFY_MPI_EQUAL(&res, &d_p);
+ /* d_q == d mod q-1 */
+ CHECK_MPI_OK(mp_mod(&d, &qsub1, &res));
+ VERIFY_MPI_EQUAL(&res, &d_q);
+ /* q * q**-1 == 1 mod p */
+ CHECK_MPI_OK(mp_mulmod(&q, &qInv, &p, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+
+cleanup:
+ mp_clear(&n);
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&psub1);
+ mp_clear(&qsub1);
+ mp_clear(&e);
+ mp_clear(&d);
+ mp_clear(&d_p);
+ mp_clear(&d_q);
+ mp_clear(&qInv);
+ mp_clear(&res);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+SECStatus
+RSA_Init(void)
+{
+ if (PR_CallOnce(&coBPInit, init_blinding_params_list) != PR_SUCCESS) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* cleanup at shutdown */
+void
+RSA_Cleanup(void)
+{
+ blindingParams *bp = NULL;
+ if (!coBPInit.initialized)
+ return;
+
+ while (!PR_CLIST_IS_EMPTY(&blindingParamsList.head)) {
+ RSABlindingParams *rsabp =
+ (RSABlindingParams *)PR_LIST_HEAD(&blindingParamsList.head);
+ PR_REMOVE_LINK(&rsabp->link);
+ /* clear parameters cache */
+ while (rsabp->bp != NULL) {
+ bp = rsabp->bp;
+ rsabp->bp = rsabp->bp->next;
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ }
+ SECITEM_ZfreeItem(&rsabp->modulus, PR_FALSE);
+ PORT_Free(rsabp);
+ }
+
+ if (blindingParamsList.cVar) {
+ PR_DestroyCondVar(blindingParamsList.cVar);
+ blindingParamsList.cVar = NULL;
+ }
+
+ if (blindingParamsList.lock) {
+ SKIP_AFTER_FORK(PZ_DestroyLock(blindingParamsList.lock));
+ blindingParamsList.lock = NULL;
+ }
+
+ coBPInit.initialized = 0;
+ coBPInit.inProgress = 0;
+ coBPInit.status = 0;
+}
+
+/*
+ * need a central place for this function to free up all the memory that
+ * free_bl may have allocated along the way. Currently only RSA does this,
+ * so I've put it here for now.
+ */
+void
+BL_Cleanup(void)
+{
+ RSA_Cleanup();
+}
+
+PRBool bl_parentForkedAfterC_Initialize;
+
+/*
+ * Set fork flag so it can be tested in SKIP_AFTER_FORK on relevant platforms.
+ */
+void
+BL_SetForkState(PRBool forked)
+{
+ bl_parentForkedAfterC_Initialize = forked;
+}
diff --git a/security/nss/lib/freebl/rsa_blind.c b/security/nss/lib/freebl/rsa_blind.c
new file mode 100644
index 0000000000..167a59f880
--- /dev/null
+++ b/security/nss/lib/freebl/rsa_blind.c
@@ -0,0 +1,471 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Implementation of RSA Blind Signatures.
+ * (https://datatracker.ietf.org/doc/draft-irtf-cfrg-rsa-blind-signatures/)
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "secitem.h"
+#include "prerr.h"
+#include "blapii.h"
+#include "secmpi.h"
+#include "mpi-priv.h"
+#include "pqg.h"
+
+/*#define RSA_DEBUG*/
+
+#define MP_DIGIT_BYTE (MP_DIGIT_BIT / PR_BITS_PER_BYTE)
+
+#ifdef RSA_DEBUG
+
+void
+rsaBlind_Print(PRUint8* m, size_t t)
+{
+ for (int i = 0; i < t; i++) {
+ if (i % 16 == 0)
+ printf("\n");
+ printf("%02x ", m[i]);
+ }
+ printf("\n \n");
+}
+
+void
+mp_print_buf(mp_int* mp)
+{
+ for (int i = MP_USED(mp) - 1; i >= 0; i--) {
+ if (i % 2 == 1)
+ printf("\n");
+ printf("%016lx ", (long unsigned int)MP_DIGIT(mp, i));
+ }
+ printf("\n \n");
+}
+#endif
+
+/*
+ * 4.1. Prepare
+ * There are two types of preparation functions:
+ * an identity preparation function, and a randomized preparation function.
+ * The identity preparation function returns the input message without transformation,
+ * i.e., msg = PrepareIdentity(msg).
+ * The randomized preparation function augments the input message with fresh randomness.
+ *
+ * Inputs:
+ * - msg, message to be signed, a byte string
+ *
+ * Outputs:
+ * - input_msg, a byte string that is 32 bytes longer than msg
+
+ * Steps:
+ * 1. msgPrefix = random(32)
+ * 2. input_msg = concat(msgPrefix, msg)
+ * 3. output input_msg
+ */
+
+SECStatus
+RSABlinding_Prepare(PRUint8* preparedMessage, size_t preparedMessageLen, const PRUint8* msg,
+ size_t msgLen, PRBool isDeterministic)
+{
+ if (!preparedMessage || !msg) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* The identity preparation function: */
+ if (isDeterministic) {
+ if (preparedMessageLen < msgLen) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ PORT_Memcpy(preparedMessage, msg, msgLen);
+ }
+ /* The randomized preparation function: */
+ else {
+ /* 1. msgPrefix = random(32)*/
+ PRUint8 lenRandom = 32;
+ if (msgLen > UINT32_MAX - lenRandom) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (preparedMessageLen < msgLen + lenRandom) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ RNG_GenerateGlobalRandomBytes(preparedMessage, lenRandom);
+ /* 2. input_msg = concat(msgPrefix, msg)*/
+ PORT_Memcpy(preparedMessage + lenRandom, msg, msgLen);
+ }
+
+ return SECSuccess;
+}
+
+/* RSA Blind Signatures
+ * Blind(pkS, msg)
+ * Parameters:
+ * - kLen, the length in bytes of the RSA modulus n
+ * - Hash, the hash function used to hash the message
+ * - MGF, the mask generation function
+ * - sLen, the length in bytes of the salt
+ *
+ * Inputs:
+ * - pkS, server public key (n, e)
+ * - msg, message to be signed, a byte string
+ *
+ * Outputs:
+ * - blinded_msg, a byte string of length kLen
+ * - inv, an integer used to unblind the signature in Finalize
+ */
+
+/* The length of the random buffer is n. */
+SECStatus
+RSABlinding_Blind(HASH_HashType hashAlg, PRUint8* blindedMsg, size_t blindedMsgLen,
+ PRUint8* inv, size_t invLen, const PRUint8* msg, size_t msgLen,
+ const PRUint8* salt, size_t saltLen,
+ RSAPublicKey* pkS, const PRUint8* randomBuf, size_t randomBufLen)
+{
+ if (!blindedMsgLen || !inv || !msg || !pkS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ mp_err err = MP_OKAY;
+ const size_t modulus_len = pkS->modulus.len;
+
+ if (blindedMsgLen != modulus_len || invLen != modulus_len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (randomBufLen != 0 && randomBufLen != modulus_len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if ((PRUint64)pkS->modulus.len * PR_BITS_PER_BYTE - 1 > UINT32_MAX) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ PRUint8* encoded_msg = PORT_ZAlloc(modulus_len);
+ PRUint8* rBuf = PORT_ZAlloc(modulus_len);
+ PRUint8* xBuf = PORT_ZAlloc(modulus_len);
+
+ mp_int m, n, r, mask, invR, rsavp1, blindSign;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&invR) = 0;
+ MP_DIGITS(&rsavp1) = 0;
+ MP_DIGITS(&blindSign) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&mask) = 0;
+
+ CHECK_MPI_OK(mp_init(&m));
+ CHECK_MPI_OK(mp_init(&invR));
+ CHECK_MPI_OK(mp_init(&rsavp1));
+ CHECK_MPI_OK(mp_init(&blindSign));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&mask));
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&n, pkS->modulus.data, pkS->modulus.len));
+ SECStatus rv = SECFailure;
+ size_t bit_len_n = pkS->modulus.len * PR_BITS_PER_BYTE - 1;
+
+ if (!randomBuf || randomBufLen == 0) {
+ CHECK_MPI_OK(mp_2expt(&mask, bit_len_n + 1));
+ CHECK_MPI_OK(mp_sub_d(&mask, 1, &mask));
+ do {
+ CHECK_MPI_OK(mpp_random_secure(&r));
+ for (size_t i = 0; i < mask.alloc; i++) {
+ r.dp[i] = mask.dp[i] & r.dp[i];
+ }
+ } while (mp_cmp(&r, &n) != MP_LT);
+ CHECK_MPI_OK(mp_init_copy(&r, &mask));
+ } else {
+ CHECK_MPI_OK(mp_read_unsigned_octets(&r, randomBuf, pkS->modulus.len));
+ }
+
+ /* 1. encoded_msg = EMSA-PSS-ENCODE(msg, bit_len(n)). */
+ PRUint8 msgHash[HASH_LENGTH_MAX] = { 0 };
+ rv = PQG_HashBuf(hashAlg, msgHash, msg, msgLen);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ rv = RSA_EMSAEncodePSS(encoded_msg, pkS->modulus.len, bit_len_n, msgHash, hashAlg, hashAlg, salt, saltLen);
+
+ /* 2. If EMSA-PSS-ENCODE raises an error, raise the error and stop. */
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_FAILED_TO_ENCODE_DATA);
+ goto cleanup;
+ }
+
+#ifdef RSA_DEBUG
+ printf("encoded_msg: \n");
+ rsaBlind_Print(encoded_msg, modulus_len);
+#endif
+
+ /* 3. m = bytes_to_int(encoded_msg). */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&m, encoded_msg, pkS->modulus.len));
+
+ /* 4. c = mp_is_coprime(m, n).
+ ** 5. If c is false, raise an "invalid input" error and stop.
+ ** 7. inv = inverse_mod(r, n)
+ */
+ err = mp_invmod(&r, &n, &invR);
+ if (err == MP_UNDEF) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ } else if (err) {
+ goto cleanup;
+ }
+
+#ifdef RSA_DEBUG
+ printf("inverse r: \n");
+ mp_print_buf(&invR);
+#endif
+
+ /* 9. x = RSAVP1(pkS, r)*/
+ CHECK_MPI_OK(mp_to_fixlen_octets(&r, rBuf, pkS->modulus.len));
+ rv = RSA_PublicKeyOp(pkS, xBuf, rBuf);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_read_unsigned_octets(&rsavp1, xBuf, pkS->modulus.len));
+
+#ifdef RSA_DEBUG
+ printf("x (RSAVP1): \n");
+ mp_print_buf(&rsavp1);
+#endif
+
+ /* 10. z = m * x mod n*/
+ CHECK_MPI_OK(mp_mulmod(&m, &rsavp1, &n, &blindSign));
+
+#ifdef RSA_DEBUG
+ printf("blindSign: \n");
+ mp_print_buf(&blindSign);
+#endif
+
+ CHECK_MPI_OK(mp_to_fixlen_octets(&blindSign, blindedMsg, blindedMsgLen));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&invR, inv, invLen));
+
+cleanup:
+ mp_clear(&m);
+ mp_clear(&n);
+ mp_clear(&r);
+ mp_clear(&invR);
+ mp_clear(&rsavp1);
+ mp_clear(&blindSign);
+ mp_clear(&mask);
+
+ PORT_Free(encoded_msg);
+ PORT_Free(rBuf);
+ PORT_Free(xBuf);
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+
+ return rv;
+}
+
+/* 4.3. BlindSign
+ * BlindSign(skS, blinded_msg)
+ *
+ * Parameters:
+ * - kLen, the length in bytes of the RSA modulus n
+ *
+ * Inputs:
+ * - skS, server private key
+ * - blinded_msg, encoded and blinded message to be signed, a byte string
+ */
+
+SECStatus
+RSABlinding_BlindSign(PRUint8* blindSig, size_t blindSigLen,
+ const PRUint8* blindedMsg, size_t blindedMsgLen, RSAPrivateKey* skS, RSAPublicKey* pkS)
+{
+ SECStatus rv = SECSuccess;
+ if (!blindSig || !blindedMsg || !skS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if ((blindSigLen != skS->modulus.len) || (skS->modulus.len != pkS->modulus.len) || (blindedMsgLen != skS->modulus.len)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ PRUint8* sBuf = (PRUint8*)PORT_Alloc(skS->modulus.len);
+ PRUint8* mPrimeBuf = (PRUint8*)PORT_Alloc(pkS->modulus.len);
+
+ mp_err err = MP_OKAY;
+ mp_int z, mPrime;
+ MP_DIGITS(&z) = 0;
+ MP_DIGITS(&mPrime) = 0;
+
+ CHECK_MPI_OK(mp_init(&z));
+ CHECK_MPI_OK(mp_init(&mPrime));
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&z, blindedMsg, skS->modulus.len));
+
+ /* 2. s = rsasp1(skS, z). */
+ rv = RSA_PrivateKeyOp(skS, sBuf, blindedMsg);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+#ifdef RSA_DEBUG
+ printf("Blinded Signature: \n");
+ mp_print_buf(&s);
+#endif
+
+ /* 3. mPrime = rsavp1(pkS, s). */
+ rv = RSA_PublicKeyOp(pkS, mPrimeBuf, sBuf);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&mPrime, mPrimeBuf, skS->modulus.len));
+
+#ifdef RSA_DEBUG
+ printf("mPrime: \n");
+ mp_print_buf(&mPrime);
+#endif
+
+ /* 4. If m != m', raise "signing failure" and stop. */
+ PRBool isBlindedMsgCorrect = mp_cmp(&mPrime, &z) == 0;
+
+ /* 5. blind_sig = int_to_bytes(s, kLen). */
+ if (isBlindedMsgCorrect) {
+ PORT_Memcpy(blindSig, sBuf, skS->modulus.len);
+ }
+
+cleanup:
+ mp_clear(&z);
+ mp_clear(&mPrime);
+
+ PORT_Free(sBuf);
+ PORT_Free(mPrimeBuf);
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/*
+ * 4.4. Finalize.
+ * Finalize validates the server's response, unblinds the message to produce a signature,
+ * verifies it for correctness, and outputs the signature upon success.
+ *
+ * Parameters:
+ * - kLen, the length in bytes of the RSA modulus n
+ * - Hash, the hash function used to hash the message
+ * - MGF, the mask generation function
+ * - sLen, the length in bytes of the salt
+ *
+ * Inputs:
+ * - pkS, server public key (n, e)
+ * - msg, message to be signed, a byte string
+ * - blind_sig, signed and blinded element, a byte string of
+ * length kLen
+ * - inv, inverse of the blind, an integer
+ *
+ * Outputs:
+ * - sig, a byte string of length kLen
+ *
+ * Blinded Signature Len should be the same as modulus len.
+ */
+
+SECStatus
+RSABlinding_Finalize(HASH_HashType hashAlg, PRUint8* signature, const PRUint8* msg, PRUint32 msgLen,
+ const PRUint8* blindSig, size_t blindSigLen,
+ const PRUint8* inv, size_t invLen, RSAPublicKey* pkS, size_t saltLen)
+{
+ if (!signature || !msg || !blindSig || !inv || !pkS || msgLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (blindSigLen != pkS->modulus.len || invLen != pkS->modulus.len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECFailure;
+
+ mp_int inv_mp, blindSig_mp, n_mp, sig_mp;
+ MP_DIGITS(&inv_mp) = 0;
+ MP_DIGITS(&blindSig_mp) = 0;
+ MP_DIGITS(&n_mp) = 0;
+ MP_DIGITS(&sig_mp) = 0;
+
+ CHECK_MPI_OK(mp_init(&n_mp));
+ CHECK_MPI_OK(mp_init(&inv_mp));
+ CHECK_MPI_OK(mp_init(&blindSig_mp));
+ CHECK_MPI_OK(mp_init(&sig_mp));
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&n_mp, pkS->modulus.data, pkS->modulus.len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&blindSig_mp, blindSig, pkS->modulus.len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&inv_mp, inv, pkS->modulus.len));
+
+ /* 3. s = z * inv mod n. */
+ CHECK_MPI_OK(mp_mulmod(&blindSig_mp, &inv_mp, &n_mp, &sig_mp));
+
+#ifdef RSA_DEBUG
+ printf("Computed Signature : \n");
+ mp_print_buf(&sig_mp);
+#endif
+
+ CHECK_MPI_OK(mp_to_fixlen_octets(&sig_mp, signature, pkS->modulus.len));
+
+ PRUint8 mHash[HASH_LENGTH_MAX] = { 0 };
+ rv = PQG_HashBuf(hashAlg, mHash, msg, msgLen);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ goto cleanup;
+ }
+
+ /* 5. result = RSASSA-PSS-VERIFY(pkS, msg, sig) with Hash, MGF, and sLen as defined in the parameters. */
+ rv = RSA_CheckSignPSS(pkS, hashAlg, hashAlg, saltLen, signature, sig_mp.used * MP_DIGIT_BYTE, mHash, 0);
+
+ /* If result = "valid signature", output sig, else raise "invalid signature" and stop. */
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ }
+
+#ifdef RSA_DEBUG
+ if (rv == SECFailure) {
+ printf("%s\n", "RSA CheckSignPSS has failed. ");
+ } else {
+ printf("%s\n", "RSA CheckSignPSS has succeeded. ");
+ }
+#endif
+
+cleanup:
+ mp_clear(&inv_mp);
+ mp_clear(&blindSig_mp);
+ mp_clear(&n_mp);
+ mp_clear(&sig_mp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+
+ return rv;
+}
diff --git a/security/nss/lib/freebl/rsapkcs.c b/security/nss/lib/freebl/rsapkcs.c
new file mode 100644
index 0000000000..47706992d7
--- /dev/null
+++ b/security/nss/lib/freebl/rsapkcs.c
@@ -0,0 +1,1706 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA PKCS#1 v2.1 (RFC 3447) operations
+ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "blapi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+#define RSA_BLOCK_MIN_PAD_LEN 8
+#define RSA_BLOCK_FIRST_OCTET 0x00
+#define RSA_BLOCK_PRIVATE_PAD_OCTET 0xff
+#define RSA_BLOCK_AFTER_PAD_OCTET 0x00
+
+/*
+ * RSA block types
+ *
+ * The values of RSA_BlockPrivate and RSA_BlockPublic are fixed.
+ * The value of RSA_BlockRaw isn't fixed by definition, but we are keeping
+ * the value that NSS has been using in the past.
+ */
+typedef enum {
+ RSA_BlockPrivate = 1, /* pad for a private-key operation */
+ RSA_BlockPublic = 2, /* pad for a public-key operation */
+ RSA_BlockRaw = 4 /* simply justify the block appropriately */
+} RSA_BlockType;
+
+/* Needed for RSA-PSS functions */
+static const unsigned char eightZeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* Constant time comparison of a single byte.
+ * Returns 1 iff a == b, otherwise returns 0.
+ * Note: For ranges of bytes, use constantTimeCompare.
+ */
+static unsigned char
+constantTimeEQ8(unsigned char a, unsigned char b)
+{
+ unsigned char c = ~((a - b) | (b - a));
+ c >>= 7;
+ return c;
+}
+
+/* Constant time comparison of a range of bytes.
+ * Returns 1 iff len bytes of a are identical to len bytes of b, otherwise
+ * returns 0.
+ */
+static unsigned char
+constantTimeCompare(const unsigned char *a,
+ const unsigned char *b,
+ unsigned int len)
+{
+ unsigned char tmp = 0;
+ unsigned int i;
+ for (i = 0; i < len; ++i, ++a, ++b)
+ tmp |= *a ^ *b;
+ return constantTimeEQ8(0x00, tmp);
+}
+
+/* Constant time conditional.
+ * Returns a if c is 1, or b if c is 0. The result is undefined if c is
+ * not 0 or 1.
+ */
+static unsigned int
+constantTimeCondition(unsigned int c,
+ unsigned int a,
+ unsigned int b)
+{
+ return (~(c - 1) & a) | ((c - 1) & b);
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+ if (modulus->len == 0) {
+ return 0;
+ }
+
+ unsigned char byteZero = modulus->data[0];
+ unsigned int modLen = modulus->len - !byteZero;
+ return modLen;
+}
+
+static unsigned int
+rsa_modulusBits(SECItem *modulus)
+{
+ if (modulus->len == 0) {
+ return 0;
+ }
+
+ unsigned char byteZero = modulus->data[0];
+ unsigned int numBits = (modulus->len - 1) * 8;
+
+ if (byteZero == 0 && modulus->len == 1) {
+ return 0;
+ }
+
+ if (byteZero == 0) {
+ numBits -= 8;
+ byteZero = modulus->data[1];
+ }
+
+ while (byteZero > 0) {
+ numBits++;
+ byteZero >>= 1;
+ }
+
+ return numBits;
+}
+
+/*
+ * Format one block of data for public/private key encryption using
+ * the rules defined in PKCS #1.
+ */
+static unsigned char *
+rsa_FormatOneBlock(unsigned modulusLen,
+ RSA_BlockType blockType,
+ SECItem *data)
+{
+ unsigned char *block;
+ unsigned char *bp;
+ unsigned int padLen;
+ unsigned int i, j;
+ SECStatus rv;
+
+ block = (unsigned char *)PORT_Alloc(modulusLen);
+ if (block == NULL)
+ return NULL;
+
+ bp = block;
+
+ /*
+ * All RSA blocks start with two octets:
+ * 0x00 || BlockType
+ */
+ *bp++ = RSA_BLOCK_FIRST_OCTET;
+ *bp++ = (unsigned char)blockType;
+
+ switch (blockType) {
+
+ /*
+ * Blocks intended for private-key operation.
+ */
+ case RSA_BlockPrivate: /* preferred method */
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ * 1 1 padLen 1 data->len
+ * padLen must be at least RSA_BLOCK_MIN_PAD_LEN (8) bytes.
+ * Pad is either all 0x00 or all 0xff bytes, depending on blockType.
+ */
+ padLen = modulusLen - data->len - 3;
+ PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+ if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+ PORT_ZFree(block, modulusLen);
+ return NULL;
+ }
+ PORT_Memset(bp, RSA_BLOCK_PRIVATE_PAD_OCTET, padLen);
+ bp += padLen;
+ *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+ PORT_Memcpy(bp, data->data, data->len);
+ break;
+
+ /*
+ * Blocks intended for public-key operation.
+ */
+ case RSA_BlockPublic:
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ * 1 1 padLen 1 data->len
+ * Pad is 8 or more non-zero random bytes.
+ *
+ * Build the block left to right.
+ * Fill the entire block from Pad to the end with random bytes.
+ * Use the bytes after Pad as a supply of extra random bytes from
+ * which to find replacements for the zero bytes in Pad.
+ * If we need more than that, refill the bytes after Pad with
+ * new random bytes as necessary.
+ */
+
+ padLen = modulusLen - (data->len + 3);
+ PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+ if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+ PORT_ZFree(block, modulusLen);
+ return NULL;
+ }
+ j = modulusLen - 2;
+ rv = RNG_GenerateGlobalRandomBytes(bp, j);
+ if (rv == SECSuccess) {
+ for (i = 0; i < padLen;) {
+ unsigned char repl;
+ /* Pad with non-zero random data. */
+ if (bp[i] != RSA_BLOCK_AFTER_PAD_OCTET) {
+ ++i;
+ continue;
+ }
+ if (j <= padLen) {
+ rv = RNG_GenerateGlobalRandomBytes(bp + padLen,
+ modulusLen - (2 + padLen));
+ if (rv != SECSuccess)
+ break;
+ j = modulusLen - 2;
+ }
+ do {
+ repl = bp[--j];
+ } while (repl == RSA_BLOCK_AFTER_PAD_OCTET && j > padLen);
+ if (repl != RSA_BLOCK_AFTER_PAD_OCTET) {
+ bp[i++] = repl;
+ }
+ }
+ }
+ if (rv != SECSuccess) {
+ PORT_ZFree(block, modulusLen);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return NULL;
+ }
+ bp += padLen;
+ *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+ PORT_Memcpy(bp, data->data, data->len);
+ break;
+
+ default:
+ PORT_Assert(0);
+ PORT_ZFree(block, modulusLen);
+ return NULL;
+ }
+
+ return block;
+}
+
+/* modulusLen has to be larger than RSA_BLOCK_MIN_PAD_LEN + 3, and data has to be smaller than modulus - (RSA_BLOCK_MIN_PAD_LEN + 3) */
+static SECStatus
+rsa_FormatBlock(SECItem *result,
+ unsigned modulusLen,
+ RSA_BlockType blockType,
+ SECItem *data)
+{
+ switch (blockType) {
+ case RSA_BlockPrivate:
+ case RSA_BlockPublic:
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ *
+ * The "3" below is the first octet + the second octet + the 0x00
+ * octet that always comes just before the ActualData.
+ */
+ if (modulusLen < (3 + RSA_BLOCK_MIN_PAD_LEN) || data->len > (modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN))) {
+ return SECFailure;
+ }
+ result->data = rsa_FormatOneBlock(modulusLen, blockType, data);
+ if (result->data == NULL) {
+ result->len = 0;
+ return SECFailure;
+ }
+ result->len = modulusLen;
+
+ break;
+
+ case RSA_BlockRaw:
+ /*
+ * Pad || ActualData
+ * Pad is zeros. The application is responsible for recovering
+ * the actual data.
+ */
+ if (data->len > modulusLen) {
+ return SECFailure;
+ }
+ result->data = (unsigned char *)PORT_ZAlloc(modulusLen);
+ result->len = modulusLen;
+ PORT_Memcpy(result->data + (modulusLen - data->len),
+ data->data, data->len);
+ break;
+
+ default:
+ PORT_Assert(0);
+ result->data = NULL;
+ result->len = 0;
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/*
+ * Mask generation function MGF1 as defined in PKCS #1 v2.1 / RFC 3447.
+ */
+static SECStatus
+MGF1(HASH_HashType hashAlg,
+ unsigned char *mask,
+ unsigned int maskLen,
+ const unsigned char *mgfSeed,
+ unsigned int mgfSeedLen)
+{
+ unsigned int digestLen;
+ PRUint32 counter;
+ PRUint32 rounds;
+ unsigned char *tempHash;
+ unsigned char *temp;
+ const SECHashObject *hash;
+ void *hashContext;
+ unsigned char C[4];
+ SECStatus rv = SECSuccess;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ if (hash == NULL) {
+ return SECFailure;
+ }
+
+ hashContext = (*hash->create)();
+ rounds = (maskLen + hash->length - 1) / hash->length;
+ for (counter = 0; counter < rounds; counter++) {
+ C[0] = (unsigned char)((counter >> 24) & 0xff);
+ C[1] = (unsigned char)((counter >> 16) & 0xff);
+ C[2] = (unsigned char)((counter >> 8) & 0xff);
+ C[3] = (unsigned char)(counter & 0xff);
+
+ /* This could be optimized when the clone functions in
+ * rawhash.c are implemented. */
+ (*hash->begin)(hashContext);
+ (*hash->update)(hashContext, mgfSeed, mgfSeedLen);
+ (*hash->update)(hashContext, C, sizeof C);
+
+ tempHash = mask + counter * hash->length;
+ if (counter != (rounds - 1)) {
+ (*hash->end)(hashContext, tempHash, &digestLen, hash->length);
+ } else { /* we're in the last round and need to cut the hash */
+ temp = (unsigned char *)PORT_Alloc(hash->length);
+ if (!temp) {
+ rv = SECFailure;
+ goto done;
+ }
+ (*hash->end)(hashContext, temp, &digestLen, hash->length);
+ PORT_Memcpy(tempHash, temp, maskLen - counter * hash->length);
+ PORT_Free(temp);
+ }
+ }
+
+done:
+ (*hash->destroy)(hashContext, PR_TRUE);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ if (maxOutputLen < modulusLen)
+ return SECFailure;
+
+ unformatted.len = dataLen;
+ unformatted.data = (unsigned char *)data;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+ *outputLen = modulusLen;
+
+done:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *buffer;
+
+ if (sigLen != modulusLen)
+ goto failure;
+ if (hashLen > modulusLen)
+ goto failure;
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess)
+ goto loser;
+
+ /*
+ * make sure we get the same results
+ */
+ /* XXX(rsleevi): Constant time */
+ /* NOTE: should we verify the leading zeros? */
+ if (PORT_Memcmp(buffer + (modulusLen - hashLen), hash, hashLen) != 0)
+ goto loser;
+
+ PORT_Free(buffer);
+ return SECSuccess;
+
+loser:
+ PORT_Free(buffer);
+failure:
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+ if (sigLen != modulusLen)
+ goto failure;
+ if (maxDataLen < modulusLen)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, data, sig);
+ if (rv != SECSuccess)
+ goto failure;
+
+ *dataLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ formatted.data = NULL;
+ if (maxOutputLen < modulusLen)
+ goto failure;
+
+ unformatted.len = inputLen;
+ unformatted.data = (unsigned char *)input;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+ if (rv != SECSuccess)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, output, formatted.data);
+ if (rv != SECSuccess)
+ goto failure;
+
+ PORT_ZFree(formatted.data, modulusLen);
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+ if (modulusLen > maxOutputLen)
+ goto failure;
+ if (inputLen != modulusLen)
+ goto failure;
+
+ rv = RSA_PrivateKeyOp(key, output, input);
+ if (rv != SECSuccess)
+ goto failure;
+
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ return SECFailure;
+}
+
+/*
+ * Decodes an EME-OAEP encoded block, validating the encoding in constant
+ * time.
+ * Described in RFC 3447, section 7.1.2.
+ * input contains the encoded block, after decryption.
+ * label is the optional value L that was associated with the message.
+ * On success, the original message and message length will be stored in
+ * output and outputLen.
+ */
+static SECStatus
+eme_oaep_decode(unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen)
+{
+ const SECHashObject *hash;
+ void *hashContext;
+ SECStatus rv = SECFailure;
+ unsigned char labelHash[HASH_LENGTH_MAX];
+ unsigned int i;
+ unsigned int maskLen;
+ unsigned int paddingOffset;
+ unsigned char *mask = NULL;
+ unsigned char *tmpOutput = NULL;
+ unsigned char isGood;
+ unsigned char foundPaddingEnd;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+
+ /* 1.c */
+ if (inputLen < (hash->length * 2) + 2) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ /* Step 3.a - Generate lHash */
+ hashContext = (*hash->create)();
+ if (hashContext == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hashContext);
+ if (labelLen > 0)
+ (*hash->update)(hashContext, label, labelLen);
+ (*hash->end)(hashContext, labelHash, &i, sizeof(labelHash));
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ tmpOutput = (unsigned char *)PORT_Alloc(inputLen);
+ if (tmpOutput == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ maskLen = inputLen - hash->length - 1;
+ mask = (unsigned char *)PORT_Alloc(maskLen);
+ if (mask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ PORT_Memcpy(tmpOutput, input, inputLen);
+
+ /* 3.c - Generate seedMask */
+ MGF1(maskHashAlg, mask, hash->length, &tmpOutput[1 + hash->length],
+ inputLen - hash->length - 1);
+ /* 3.d - Unmask seed */
+ for (i = 0; i < hash->length; ++i)
+ tmpOutput[1 + i] ^= mask[i];
+
+ /* 3.e - Generate dbMask */
+ MGF1(maskHashAlg, mask, maskLen, &tmpOutput[1], hash->length);
+ /* 3.f - Unmask DB */
+ for (i = 0; i < maskLen; ++i)
+ tmpOutput[1 + hash->length + i] ^= mask[i];
+
+ /* 3.g - Compare Y, lHash, and PS in constant time
+ * Warning: This code is timing dependent and must not disclose which of
+ * these were invalid.
+ */
+ paddingOffset = 0;
+ isGood = 1;
+ foundPaddingEnd = 0;
+
+ /* Compare Y */
+ isGood &= constantTimeEQ8(0x00, tmpOutput[0]);
+
+ /* Compare lHash and lHash' */
+ isGood &= constantTimeCompare(&labelHash[0],
+ &tmpOutput[1 + hash->length],
+ hash->length);
+
+ /* Compare that the padding is zero or more zero octets, followed by a
+ * 0x01 octet */
+ for (i = 1 + (hash->length * 2); i < inputLen; ++i) {
+ unsigned char isZero = constantTimeEQ8(0x00, tmpOutput[i]);
+ unsigned char isOne = constantTimeEQ8(0x01, tmpOutput[i]);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] == 0x01 && !foundPaddingEnd)
+ * paddingOffset = i;
+ */
+ paddingOffset = constantTimeCondition(isOne & ~foundPaddingEnd, i,
+ paddingOffset);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] == 0x01)
+ * foundPaddingEnd = true;
+ *
+ * Note: This may yield false positives, as it will be set whenever
+ * a 0x01 byte is encountered. If there was bad padding (eg:
+ * 0x03 0x02 0x01), foundPaddingEnd will still be set to true, and
+ * paddingOffset will still be set to 2.
+ */
+ foundPaddingEnd = constantTimeCondition(isOne, 1, foundPaddingEnd);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] != 0x00 && tmpOutput[i] != 0x01 &&
+ * !foundPaddingEnd) {
+ * isGood = false;
+ * }
+ *
+ * Note: This may yield false positives, as a message (and padding)
+ * that is entirely zeros will result in isGood still being true. Thus
+ * it's necessary to check foundPaddingEnd is positive below.
+ */
+ isGood = constantTimeCondition(~foundPaddingEnd & ~isZero, 0, isGood);
+ }
+
+ /* While both isGood and foundPaddingEnd may have false positives, they
+ * cannot BOTH have false positives. If both are not true, then an invalid
+ * message was received. Note, this comparison must still be done in constant
+ * time so as not to leak either condition.
+ */
+ if (!(isGood & foundPaddingEnd)) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ goto done;
+ }
+
+ /* End timing dependent code */
+
+ ++paddingOffset; /* Skip the 0x01 following the end of PS */
+
+ *outputLen = inputLen - paddingOffset;
+ if (*outputLen > maxOutputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto done;
+ }
+
+ if (*outputLen)
+ PORT_Memcpy(output, &tmpOutput[paddingOffset], *outputLen);
+ rv = SECSuccess;
+
+done:
+ if (mask)
+ PORT_ZFree(mask, maskLen);
+ if (tmpOutput)
+ PORT_ZFree(tmpOutput, inputLen);
+ return rv;
+}
+
+/*
+ * Generate an EME-OAEP encoded block for encryption
+ * Described in RFC 3447, section 7.1.1
+ * We use input instead of M for the message to be encrypted
+ * label is the optional value L to be associated with the message.
+ */
+static SECStatus
+eme_oaep_encode(unsigned char *em,
+ unsigned int emLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen)
+{
+ const SECHashObject *hash;
+ void *hashContext;
+ SECStatus rv;
+ unsigned char *mask;
+ unsigned int reservedLen;
+ unsigned int dbMaskLen;
+ unsigned int i;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ PORT_Assert(seed == NULL || seedLen == hash->length);
+
+ /* Step 1.b */
+ reservedLen = (2 * hash->length) + 2;
+ if (emLen < reservedLen || inputLen > (emLen - reservedLen)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ /*
+ * From RFC 3447, Section 7.1
+ * +----------+---------+-------+
+ * DB = | lHash | PS | M |
+ * +----------+---------+-------+
+ * |
+ * +----------+ V
+ * | seed |--> MGF ---> xor
+ * +----------+ |
+ * | |
+ * +--+ V |
+ * |00| xor <----- MGF <-----|
+ * +--+ | |
+ * | | |
+ * V V V
+ * +--+----------+----------------------------+
+ * EM = |00|maskedSeed| maskedDB |
+ * +--+----------+----------------------------+
+ *
+ * We use mask to hold the result of the MGF functions, and all other
+ * values are generated in their final resting place.
+ */
+ *em = 0x00;
+
+ /* Step 2.a - Generate lHash */
+ hashContext = (*hash->create)();
+ if (hashContext == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hashContext);
+ if (labelLen > 0)
+ (*hash->update)(hashContext, label, labelLen);
+ (*hash->end)(hashContext, &em[1 + hash->length], &i, hash->length);
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ /* Step 2.b - Generate PS */
+ if (emLen - reservedLen - inputLen > 0) {
+ PORT_Memset(em + 1 + (hash->length * 2), 0x00,
+ emLen - reservedLen - inputLen);
+ }
+
+ /* Step 2.c. - Generate DB
+ * DB = lHash || PS || 0x01 || M
+ * Note that PS and lHash have already been placed into em at their
+ * appropriate offsets. This just copies M into place
+ */
+ em[emLen - inputLen - 1] = 0x01;
+ if (inputLen)
+ PORT_Memcpy(em + emLen - inputLen, input, inputLen);
+
+ if (seed == NULL) {
+ /* Step 2.d - Generate seed */
+ rv = RNG_GenerateGlobalRandomBytes(em + 1, hash->length);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ } else {
+ /* For Known Answer Tests, copy the supplied seed. */
+ PORT_Memcpy(em + 1, seed, seedLen);
+ }
+
+ /* Step 2.e - Generate dbMask*/
+ dbMaskLen = emLen - hash->length - 1;
+ mask = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (mask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ MGF1(maskHashAlg, mask, dbMaskLen, em + 1, hash->length);
+ /* Step 2.f - Compute maskedDB*/
+ for (i = 0; i < dbMaskLen; ++i)
+ em[1 + hash->length + i] ^= mask[i];
+
+ /* Step 2.g - Generate seedMask */
+ MGF1(maskHashAlg, mask, hash->length, &em[1 + hash->length], dbMaskLen);
+ /* Step 2.h - Compute maskedSeed */
+ for (i = 0; i < hash->length; ++i)
+ em[1 + i] ^= mask[i];
+
+ PORT_ZFree(mask, dbMaskLen);
+ return SECSuccess;
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *oaepEncoded = NULL;
+
+ if (maxOutputLen < modulusLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if ((labelLen == 0 && label != NULL) ||
+ (labelLen > 0 && label == NULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+ if (oaepEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ rv = eme_oaep_encode(oaepEncoded, modulusLen, input, inputLen,
+ hashAlg, maskHashAlg, label, labelLen, seed, seedLen);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PublicKeyOp(key, output, oaepEncoded);
+ if (rv != SECSuccess)
+ goto done;
+ *outputLen = modulusLen;
+
+done:
+ PORT_Free(oaepEncoded);
+ return rv;
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *oaepEncoded = NULL;
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if (inputLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((labelLen == 0 && label != NULL) ||
+ (labelLen > 0 && label == NULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+ if (oaepEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, oaepEncoded, input);
+ if (rv != SECSuccess) {
+ goto done;
+ }
+ rv = eme_oaep_decode(output, outputLen, maxOutputLen, oaepEncoded,
+ modulusLen, hashAlg, maskHashAlg, label,
+ labelLen);
+
+done:
+ if (oaepEncoded)
+ PORT_ZFree(oaepEncoded, modulusLen);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ formatted.data = NULL;
+ if (maxOutputLen < modulusLen)
+ goto failure;
+
+ unformatted.len = inputLen;
+ unformatted.data = (unsigned char *)input;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPublic,
+ &unformatted);
+ if (rv != SECSuccess)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, output, formatted.data);
+ if (rv != SECSuccess)
+ goto failure;
+
+ PORT_ZFree(formatted.data, modulusLen);
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return SECFailure;
+}
+
+static HMACContext *
+rsa_GetHMACContext(const SECHashObject *hash, RSAPrivateKey *key,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned char keyHash[HASH_LENGTH_MAX];
+ void *hashContext;
+ HMACContext *hmac = NULL;
+ unsigned int privKeyLen = key->privateExponent.len;
+ unsigned int keyLen;
+ SECStatus rv;
+
+ /* first get the key hash (should store in the key structure) */
+ PORT_Memset(keyHash, 0, sizeof(keyHash));
+ hashContext = (*hash->create)();
+ if (hashContext == NULL) {
+ return NULL;
+ }
+ (*hash->begin)(hashContext);
+ if (privKeyLen < inputLen) {
+ int padLen = inputLen - privKeyLen;
+ while (padLen > sizeof(keyHash)) {
+ (*hash->update)(hashContext, keyHash, sizeof(keyHash));
+ padLen -= sizeof(keyHash);
+ }
+ (*hash->update)(hashContext, keyHash, padLen);
+ }
+ (*hash->update)(hashContext, key->privateExponent.data, privKeyLen);
+ (*hash->end)(hashContext, keyHash, &keyLen, sizeof(keyHash));
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ /* now create the hmac key */
+ hmac = HMAC_Create(hash, keyHash, keyLen, PR_TRUE);
+ if (hmac == NULL) {
+ PORT_Memset(keyHash, 0, sizeof(keyHash));
+ return NULL;
+ }
+ HMAC_Begin(hmac);
+ HMAC_Update(hmac, input, inputLen);
+ rv = HMAC_Finish(hmac, keyHash, &keyLen, sizeof(keyHash));
+ if (rv != SECSuccess) {
+ PORT_Memset(keyHash, 0, sizeof(keyHash));
+ HMAC_Destroy(hmac, PR_TRUE);
+ return NULL;
+ }
+ /* Finally set the new key into the hash context. We
+ * reuse the original context allocated above so we don't
+ * need to allocate and free another one */
+ rv = HMAC_ReInit(hmac, hash, keyHash, keyLen, PR_TRUE);
+ PORT_Memset(keyHash, 0, sizeof(keyHash));
+ if (rv != SECSuccess) {
+ HMAC_Destroy(hmac, PR_TRUE);
+ return NULL;
+ }
+
+ return hmac;
+}
+
+static SECStatus
+rsa_HMACPrf(HMACContext *hmac, const char *label, int labelLen,
+ int hashLength, unsigned char *output, int length)
+{
+ unsigned char iterator[2] = { 0, 0 };
+ unsigned char encodedLen[2] = { 0, 0 };
+ unsigned char hmacLast[HASH_LENGTH_MAX];
+ unsigned int left = length;
+ unsigned int hashReturn;
+ SECStatus rv = SECSuccess;
+
+ /* encodedLen is in bits, length is in bytes, thus the shifts
+ * do an implied multiply by 8 */
+ encodedLen[0] = (length >> 5) & 0xff;
+ encodedLen[1] = (length << 3) & 0xff;
+
+ while (left > hashLength) {
+ HMAC_Begin(hmac);
+ HMAC_Update(hmac, iterator, 2);
+ HMAC_Update(hmac, (const unsigned char *)label, labelLen);
+ HMAC_Update(hmac, encodedLen, 2);
+ rv = HMAC_Finish(hmac, output, &hashReturn, hashLength);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ iterator[1]++;
+ if (iterator[1] == 0)
+ iterator[0]++;
+ left -= hashLength;
+ output += hashLength;
+ }
+ if (left) {
+ HMAC_Begin(hmac);
+ HMAC_Update(hmac, iterator, 2);
+ HMAC_Update(hmac, (const unsigned char *)label, labelLen);
+ HMAC_Update(hmac, encodedLen, 2);
+ rv = HMAC_Finish(hmac, hmacLast, &hashReturn, sizeof(hmacLast));
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ PORT_Memcpy(output, hmacLast, left);
+ PORT_Memset(hmacLast, 0, sizeof(hmacLast));
+ }
+ return rv;
+}
+
+/* This function takes a 16-bit input number and
+ * creates the smallest mask which covers
+ * the whole number. Examples:
+ * 0x81 -> 0xff
+ * 0x1af -> 0x1ff
+ * 0x4d1 -> 0x7ff
+ */
+static int
+makeMask16(int len)
+{
+ // or the high bit in each bit location
+ len |= (len >> 1);
+ len |= (len >> 2);
+ len |= (len >> 4);
+ len |= (len >> 8);
+ return len;
+}
+
+#define STRING_AND_LENGTH(s) s, sizeof(s) - 1
+static int
+rsa_GetErrorLength(HMACContext *hmac, int hashLen, int maxLegalLen)
+{
+ unsigned char out[128 * 2];
+ unsigned char *outp;
+ int outLength = 0;
+ int lengthMask;
+ SECStatus rv;
+
+ lengthMask = makeMask16(maxLegalLen);
+ rv = rsa_HMACPrf(hmac, STRING_AND_LENGTH("length"), hashLen,
+ out, sizeof(out));
+ if (rv != SECSuccess) {
+ return -1;
+ }
+ for (outp = out; outp < out + sizeof(out); outp += 2) {
+ int candidate = outp[0] << 8 | outp[1];
+ candidate = candidate & lengthMask;
+ outLength = PORT_CT_SEL(PORT_CT_LT(candidate, maxLegalLen),
+ candidate, outLength);
+ }
+ PORT_Memset(out, 0, sizeof(out));
+ return outLength;
+}
+
+/*
+ * This function can only fail in environmental cases: Programming errors
+ * and out of memory situations. It can't fail if the keys are valid and
+ * the inputs are the proper size. If the actual RSA decryption fails, a
+ * fake value and a fake length, both of which have already been generated
+ * based on the key and input, are returned.
+ * Applications are expected to detect decryption failures based on the fact
+ * that the decrypted value (usually a key) doesn't validate. The prevents
+ * Blecheinbaucher style attacks against the key. */
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ PRUint32 fail;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer = NULL;
+ unsigned char *errorBuffer = NULL;
+ unsigned char *bp = NULL;
+ unsigned char *ep = NULL;
+ unsigned int outLen = modulusLen;
+ unsigned int maxLegalLen = modulusLen - 10;
+ unsigned int errorLength;
+ const SECHashObject *hashObj;
+ HMACContext *hmac = NULL;
+
+ /* failures in the top section indicate failures in the environment
+ * (memory) or the library. OK to return errors in these cases because
+ * it doesn't provide any oracle information to attackers. */
+ if (inputLen != modulusLen || modulusLen < 10) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Allocate enough space to decrypt */
+ buffer = PORT_ZAlloc(modulusLen);
+ if (!buffer) {
+ goto loser;
+ }
+ errorBuffer = PORT_ZAlloc(modulusLen);
+ if (!errorBuffer) {
+ goto loser;
+ }
+ hashObj = HASH_GetRawHashObject(HASH_AlgSHA256);
+ if (hashObj == NULL) {
+ goto loser;
+ }
+
+ /* calculate the values to return in the error case rather than
+ * the actual returned values. This data is the same for the
+ * same input and private key. */
+ hmac = rsa_GetHMACContext(hashObj, key, input, inputLen);
+ if (hmac == NULL) {
+ goto loser;
+ }
+ errorLength = rsa_GetErrorLength(hmac, hashObj->length, maxLegalLen);
+ if (((int)errorLength) < 0) {
+ goto loser;
+ }
+ /* we always have to generate a full moduluslen error string. Otherwise
+ * we create a timing dependency on errorLength, which could be used to
+ * determine the difference between errorLength and outputLen and tell
+ * us that there was a pkcs1 decryption failure */
+ rv = rsa_HMACPrf(hmac, STRING_AND_LENGTH("message"),
+ hashObj->length, errorBuffer, modulusLen);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ HMAC_Destroy(hmac, PR_TRUE);
+ hmac = NULL;
+
+ /* From here on out, we will always return success. If there is
+ * an error, we will return deterministic output based on the key
+ * and the input data. */
+ rv = RSA_PrivateKeyOp(key, buffer, input);
+
+ fail = PORT_CT_NE(rv, SECSuccess);
+ fail |= PORT_CT_NE(buffer[0], RSA_BLOCK_FIRST_OCTET) | PORT_CT_NE(buffer[1], RSA_BlockPublic);
+
+ /* There have to be at least 8 bytes of padding. */
+ for (i = 2; i < 10; i++) {
+ fail |= PORT_CT_EQ(buffer[i], RSA_BLOCK_AFTER_PAD_OCTET);
+ }
+
+ for (i = 10; i < modulusLen; i++) {
+ unsigned int newLen = modulusLen - i - 1;
+ PRUint32 condition = PORT_CT_EQ(buffer[i], RSA_BLOCK_AFTER_PAD_OCTET) & PORT_CT_EQ(outLen, modulusLen);
+ outLen = PORT_CT_SEL(condition, newLen, outLen);
+ }
+ // this can only happen if a zero wasn't found above
+ fail |= PORT_CT_GE(outLen, modulusLen);
+
+ outLen = PORT_CT_SEL(fail, errorLength, outLen);
+
+ /* index into the correct buffer. Do it before we truncate outLen if the
+ * application was asking for less data than we can return */
+ bp = buffer + modulusLen - outLen;
+ ep = errorBuffer + modulusLen - outLen;
+
+ /* at this point, outLen returns no information about decryption failures,
+ * no need to hide its value. maxOutputLen is how much data the
+ * application is expecting, which is also not sensitive. */
+ if (outLen > maxOutputLen) {
+ outLen = maxOutputLen;
+ }
+
+ /* we can't use PORT_Memcpy because caching could create a time dependency
+ * on the status of fail. */
+ for (i = 0; i < outLen; i++) {
+ output[i] = PORT_CT_SEL(fail, ep[i], bp[i]);
+ }
+
+ *outputLen = outLen;
+
+ PORT_Free(buffer);
+ PORT_Free(errorBuffer);
+
+ return SECSuccess;
+
+loser:
+ if (hmac) {
+ HMAC_Destroy(hmac, PR_TRUE);
+ }
+ PORT_Free(buffer);
+ PORT_Free(errorBuffer);
+
+ return SECFailure;
+}
+
+/*
+ * Encode a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.1.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.1.
+ * We only support MGF1 as the MGF.
+ */
+SECStatus
+RSA_EMSAEncodePSS(unsigned char *em,
+ unsigned int emLen,
+ unsigned int emBits,
+ const unsigned char *mHash,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen)
+{
+ const SECHashObject *hash;
+ void *hash_context;
+ unsigned char *dbMask;
+ unsigned int dbMaskLen;
+ unsigned int i;
+ SECStatus rv;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ dbMaskLen = emLen - hash->length - 1;
+
+ /* Step 3 */
+ if (emLen < hash->length + saltLen + 2) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ /* Step 4 */
+ if (salt == NULL) {
+ rv = RNG_GenerateGlobalRandomBytes(&em[dbMaskLen - saltLen], saltLen);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ } else {
+ PORT_Memcpy(&em[dbMaskLen - saltLen], salt, saltLen);
+ }
+
+ /* Step 5 + 6 */
+ /* Compute H and store it at its final location &em[dbMaskLen]. */
+ hash_context = (*hash->create)();
+ if (hash_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hash_context);
+ (*hash->update)(hash_context, eightZeros, 8);
+ (*hash->update)(hash_context, mHash, hash->length);
+ (*hash->update)(hash_context, &em[dbMaskLen - saltLen], saltLen);
+ (*hash->end)(hash_context, &em[dbMaskLen], &i, hash->length);
+ (*hash->destroy)(hash_context, PR_TRUE);
+
+ /* Step 7 + 8 */
+ PORT_Memset(em, 0, dbMaskLen - saltLen - 1);
+ em[dbMaskLen - saltLen - 1] = 0x01;
+
+ /* Step 9 */
+ dbMask = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (dbMask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ MGF1(maskHashAlg, dbMask, dbMaskLen, &em[dbMaskLen], hash->length);
+
+ /* Step 10 */
+ for (i = 0; i < dbMaskLen; i++)
+ em[i] ^= dbMask[i];
+ PORT_Free(dbMask);
+
+ /* Step 11 */
+ em[0] &= 0xff >> (8 * emLen - emBits);
+
+ /* Step 12 */
+ em[emLen - 1] = 0xbc;
+
+ return SECSuccess;
+}
+
+/*
+ * Verify a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.2.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.2.
+ * We only support MGF1 as the MGF.
+ */
+static SECStatus
+emsa_pss_verify(const unsigned char *mHash,
+ const unsigned char *em,
+ unsigned int emLen,
+ unsigned int emBits,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen)
+{
+ const SECHashObject *hash;
+ void *hash_context;
+ unsigned char *db;
+ unsigned char *H_; /* H' from the RFC */
+ unsigned int i;
+ unsigned int dbMaskLen;
+ unsigned int zeroBits;
+ SECStatus rv;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ dbMaskLen = emLen - hash->length - 1;
+
+ /* Step 3 + 4 */
+ if ((emLen < (hash->length + saltLen + 2)) ||
+ (em[emLen - 1] != 0xbc)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* Step 6 */
+ zeroBits = 8 * emLen - emBits;
+ if (em[0] >> (8 - zeroBits)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* Step 7 */
+ db = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (db == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ /* &em[dbMaskLen] points to H, used as mgfSeed */
+ MGF1(maskHashAlg, db, dbMaskLen, &em[dbMaskLen], hash->length);
+
+ /* Step 8 */
+ for (i = 0; i < dbMaskLen; i++) {
+ db[i] ^= em[i];
+ }
+
+ /* Step 9 */
+ db[0] &= 0xff >> zeroBits;
+
+ /* Step 10 */
+ for (i = 0; i < (dbMaskLen - saltLen - 1); i++) {
+ if (db[i] != 0) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+ }
+ if (db[dbMaskLen - saltLen - 1] != 0x01) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* Step 12 + 13 */
+ H_ = (unsigned char *)PORT_Alloc(hash->length);
+ if (H_ == NULL) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ hash_context = (*hash->create)();
+ if (hash_context == NULL) {
+ PORT_Free(db);
+ PORT_Free(H_);
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hash_context);
+ (*hash->update)(hash_context, eightZeros, 8);
+ (*hash->update)(hash_context, mHash, hash->length);
+ (*hash->update)(hash_context, &db[dbMaskLen - saltLen], saltLen);
+ (*hash->end)(hash_context, H_, &i, hash->length);
+ (*hash->destroy)(hash_context, PR_TRUE);
+
+ PORT_Free(db);
+
+ /* Step 14 */
+ if (PORT_Memcmp(H_, &em[dbMaskLen], hash->length) != 0) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ } else {
+ rv = SECSuccess;
+ }
+
+ PORT_Free(H_);
+ return rv;
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLength,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int modulusBits = rsa_modulusBits(&key->modulus);
+ unsigned int emLen = modulusLen;
+ unsigned char *pssEncoded, *em;
+
+ if (maxOutputLen < modulusLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ pssEncoded = em = (unsigned char *)PORT_Alloc(modulusLen);
+ if (pssEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ /* len(em) == ceil((modulusBits - 1) / 8). */
+ if (modulusBits % 8 == 1) {
+ em[0] = 0;
+ emLen--;
+ em++;
+ }
+ rv = RSA_EMSAEncodePSS(em, emLen, modulusBits - 1, input, hashAlg,
+ maskHashAlg, salt, saltLength);
+ if (rv != SECSuccess)
+ goto done;
+
+ // This sets error codes upon failure.
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, pssEncoded);
+ *outputLen = modulusLen;
+
+done:
+ PORT_Free(pssEncoded);
+ return rv;
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLength,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int modulusBits = rsa_modulusBits(&key->modulus);
+ unsigned int emLen = modulusLen;
+ unsigned char *buffer, *em;
+
+ if (sigLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ buffer = em = (unsigned char *)PORT_Alloc(modulusLen);
+ if (!buffer) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess) {
+ PORT_Free(buffer);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* len(em) == ceil((modulusBits - 1) / 8). */
+ if (modulusBits % 8 == 1) {
+ emLen--;
+ em++;
+ }
+ rv = emsa_pss_verify(hash, em, emLen, modulusBits - 1, hashAlg,
+ maskHashAlg, saltLength);
+
+ PORT_Free(buffer);
+ return rv;
+}
+
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted = { siBuffer, NULL, 0 };
+ SECItem unformatted = { siBuffer, (unsigned char *)input, inputLen };
+
+ if (maxOutputLen < modulusLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto done;
+ }
+
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPrivate,
+ &unformatted);
+ if (rv != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ goto done;
+ }
+
+ // This sets error codes upon failure.
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+ *outputLen = modulusLen;
+
+done:
+ if (formatted.data != NULL) {
+ PORT_ZFree(formatted.data, modulusLen);
+ }
+ return rv;
+}
+
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer = NULL;
+
+ if (sigLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ *
+ * The "3" below is the first octet + the second octet + the 0x00
+ * octet that always comes just before the ActualData.
+ */
+ if (dataLen > modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN)) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ goto done;
+ }
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ if (RSA_PublicKeyOp(key, buffer, sig) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+
+ /*
+ * check the padding that was used
+ */
+ if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+ buffer[1] != (unsigned char)RSA_BlockPrivate) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ for (i = 2; i < modulusLen - dataLen - 1; i++) {
+ if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ }
+ if (buffer[i] != RSA_BLOCK_AFTER_PAD_OCTET) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+
+ /*
+ * make sure we get the same results
+ */
+ if (PORT_Memcmp(buffer + modulusLen - dataLen, data, dataLen) == 0) {
+ rv = SECSuccess;
+ }
+
+done:
+ if (buffer) {
+ PORT_Free(buffer);
+ }
+ return rv;
+}
+
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer = NULL;
+ unsigned int padLen;
+
+ if (sigLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ if (RSA_PublicKeyOp(key, buffer, sig) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+
+ *outputLen = 0;
+
+ /*
+ * check the padding that was used
+ */
+ if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+ buffer[1] != (unsigned char)RSA_BlockPrivate) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ for (i = 2; i < modulusLen; i++) {
+ if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) {
+ *outputLen = modulusLen - i - 1;
+ break;
+ }
+ if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ }
+ padLen = i - 2;
+ if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ if (*outputLen == 0) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto done;
+ }
+ if (*outputLen > maxOutputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto done;
+ }
+
+ PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen);
+ rv = SECSuccess;
+
+done:
+ if (buffer) {
+ PORT_Free(buffer);
+ }
+ return rv;
+}
diff --git a/security/nss/lib/freebl/scripts/LICENSE b/security/nss/lib/freebl/scripts/LICENSE
new file mode 100644
index 0000000000..a9335c22f3
--- /dev/null
+++ b/security/nss/lib/freebl/scripts/LICENSE
@@ -0,0 +1,36 @@
+Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+ * Redistributions of source code must retain copyright notices,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials
+ provided with the distribution.
+
+ * Neither the name of the CRYPTOGAMS nor the names of its
+ copyright holder and contributors may be used to endorse or
+ promote products derived from this software without specific
+ prior written permission.
+
+ALTERNATIVELY, provided that this notice is retained in full, this
+product may be distributed under the terms of the GNU General Public
+License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+those given above.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/security/nss/lib/freebl/scripts/gen.sh b/security/nss/lib/freebl/scripts/gen.sh
new file mode 100755
index 0000000000..ea415cc949
--- /dev/null
+++ b/security/nss/lib/freebl/scripts/gen.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+# Run the scripts in this folder, generating the assembly,
+#
+
+perl sha512p8-ppc.pl linux64le sha512-p8.s
+
+# Add the license mention
+cat > hdr << "EOF"
+# Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+# See the full LICENSE under scripts/.
+
+EOF
+
+cat hdr sha512-p8.s > ../sha512-p8.s
+
+# Cleanup
+rm hdr sha512-p8.s
diff --git a/security/nss/lib/freebl/scripts/ppc-xlate.pl b/security/nss/lib/freebl/scripts/ppc-xlate.pl
new file mode 100644
index 0000000000..a0fcf6a31f
--- /dev/null
+++ b/security/nss/lib/freebl/scripts/ppc-xlate.pl
@@ -0,0 +1,352 @@
+#!/usr/bin/env perl
+
+# PowerPC assembler distiller by \@dot-asm.
+
+################################################################
+# Recognized "flavour"-s are:
+#
+# linux{32|64}[le] GNU assembler and ELF symbol decorations,
+# with little-endian option
+# linux64v2 GNU asssembler and big-endian instantiation
+# of latest ELF specification
+# aix{32|64} AIX assembler and symbol decorations
+# osx{32|64} Mac OS X assembler and symbol decoratons
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my %TYPES;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $type = sub {
+ my ($dir,$name,$type) = @_;
+
+ $TYPES{$name} = $type;
+ if ($flavour =~ /linux/) {
+ $name =~ s|^\.||;
+ ".type $name,$type";
+ } else {
+ "";
+ }
+};
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $type = \$TYPES{$name};
+ my $ret;
+
+ $name =~ s|^\.||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { if (!$$type) {
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $name = ".$name";
+ }
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux.*(32|64(le|v2))/
+ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ last;
+ };
+ /linux.*64/ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $ret .= "\n.section \".opd\",\"aw\"";
+ $ret .= "\n.align 3";
+ $ret .= "\n$name:";
+ $ret .= "\n.quad .$name,.TOC.\@tocbase,0";
+ $ret .= "\n.previous";
+ $name = ".$name";
+ }
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64(le|v2)/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift;
+ my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
+ my $ret = ".size $$real,.-$$real";
+ $name =~ s|^\.||;
+ if ($$real ne $name) {
+ $ret .= "\n.size $name,.-$$real";
+ }
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /aix|linux64(le|v2)/);
+my $mtspr = sub {
+ my ($f,$idx,$ra) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " or $ra,$ra,$ra";
+ } else {
+ " mtspr $idx,$ra";
+ }
+};
+my $mfspr = sub {
+ my ($f,$rd,$idx) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " li $rd,-1";
+ } else {
+ " mfspr $rd,$idx";
+ }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+my $lvx_splt = sub { vsxmem_op(@_, 332); }; # lxvdsx
+# VSX instruction[s] masqueraded as made-up AltiVec/VMX
+my $vpermdi = sub { # xxpermdi
+ my ($f, $vrt, $vra, $vrb, $dm) = @_;
+ $dm = oct($dm) if ($dm =~ /^0/);
+ " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($dm<<8)|(10<<3)|7;
+};
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+sub vfour {
+ my ($f, $vrt, $vra, $vrb, $vrc, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|($vrc<<6)|$op;
+};
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+# These are not really crypto, but vcrypto_op template works
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+my $vadduqm = sub { vcrypto_op(@_, 256); };
+my $vmuleuw = sub { vcrypto_op(@_, 648); };
+my $vmulouw = sub { vcrypto_op(@_, 136); };
+my $vrld = sub { vcrypto_op(@_, 196); };
+my $vsld = sub { vcrypto_op(@_, 1476); };
+my $vsrd = sub { vcrypto_op(@_, 1732); };
+my $vsubudm = sub { vcrypto_op(@_, 1216); };
+my $vaddcuq = sub { vcrypto_op(@_, 320); };
+my $vaddeuqm = sub { vfour(@_,60); };
+my $vaddecuq = sub { vfour(@_,61); };
+my $vmrgew = sub { vfour(@_,0,1932); };
+my $vmrgow = sub { vfour(@_,0,1676); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+# VSX instructions masqueraded as AltiVec/VMX
+my $mtvrd = sub {
+ my ($f, $vrt, $ra) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(179<<1)|1;
+};
+my $mtvrwz = sub {
+ my ($f, $vrt, $ra) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|(243<<1)|1;
+};
+my $lvwzx_u = sub { vsxmem_op(@_, 12); }; # lxsiwzx
+my $stvwx_u = sub { vsxmem_op(@_, 140); }; # stxsiwx
+
+# PowerISA 3.0 stuff
+my $maddhdu = sub { vfour(@_,49); };
+my $maddld = sub { vfour(@_,51); };
+my $darn = sub {
+ my ($f, $rt, $l) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
+};
+my $iseleq = sub {
+ my ($f, $rt, $ra, $rb) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|(2<<6)|30;
+};
+# VSX instruction[s] masqueraded as made-up AltiVec/VMX
+my $vspltib = sub { # xxspltib
+ my ($f, $vrt, $imm8) = @_;
+ $imm8 = oct($imm8) if ($imm8 =~ /^0/);
+ $imm8 &= 0xff;
+ " .long ".sprintf "0x%X",(60<<26)|($vrt<<21)|($imm8<<11)|(360<<1)|1;
+};
+
+# PowerISA 3.0B stuff
+my $addex = sub {
+ my ($f, $rt, $ra, $rb, $cy) = @_; # only cy==0 is specified in 3.0B
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($cy<<9)|(170<<1);
+};
+my $vmsumudm = sub { vfour(@_,35); };
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ my $xlated = ($GLOBALS{$label} or $label);
+ print "$xlated:";
+ if ($flavour =~ /linux.*64(le|v2)/) {
+ if ($TYPES{$label} =~ /function/) {
+ printf "\n.localentry %s,0\n",$xlated;
+ }
+ }
+ }
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(/,\s*/,$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/security/nss/lib/freebl/scripts/sha512p8-ppc.pl b/security/nss/lib/freebl/scripts/sha512p8-ppc.pl
new file mode 100644
index 0000000000..3bef98be7b
--- /dev/null
+++ b/security/nss/lib/freebl/scripts/sha512p8-ppc.pl
@@ -0,0 +1,413 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for use in the OpenSSL
+# project. The module is dual licensed under OpenSSL and CRYPTOGAMS
+# licenses depending on where you obtain it. For further details see
+# https://github.com/dot-asm/cryptogams/.
+# ====================================================================
+
+# SHA256/512 for PowerISA v2.07.
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This module is
+# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
+# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
+# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
+# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
+# result is degree of computational resources' utilization. POWER8 is
+# "massively multi-threaded chip" and difference between single- and
+# maximum multi-process benchmark results tells that utilization is
+# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and
+# for sha1-ppc.pl - 73%. 100% means that multi-process result equals
+# to single-process one, given that all threads end up on the same
+# physical core.
+#
+######################################################################
+# Believed-to-be-accurate results in cycles per processed byte [on
+# little-endian system]. Numbers in square brackets are for 64-bit
+# build of sha512-ppc.pl, presented for reference.
+#
+# POWER8 POWER9
+# SHA256 9.7 [15.8] 11.2 [12.5]
+# SHA512 6.1 [10.3] 7.0 [7.9]
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$LENDIAN=($flavour=~/le/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+if ($output =~ /512/) {
+ $bits=512;
+ $SZ=8;
+ $sz="d";
+ $rounds=80;
+} else {
+ $bits=256;
+ $SZ=4;
+ $sz="w";
+ $rounds=64;
+}
+
+$func="sha${bits}_block_p8";
+$LOCALS=8*$SIZE_T+8*16;
+$FRAME=$LOCALS+9*16+6*$SIZE_T;
+
+$sp ="r1";
+$toc="r2";
+$ctx="r3";
+$inp="r4";
+$num="r5";
+$Tbl="r6";
+$idx="r7";
+$lrsave="r8";
+$offload="r11";
+$vrsave="r12";
+@I = ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70) = (0,map("r$_",(10,26..31)));
+
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
+@X=map("v$_",(8..19,24..27));
+($Ki,$Func,$Sigma,$lemask)=map("v$_",(28..31));
+
+sub ROUND {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)%16;
+my $k=($i+2)%8;
+
+$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1));
+ lvx_u @X[$i+1],0,$inp ; load X[i] in advance
+ addi $inp,$inp,16
+___
+$code.=<<___ if ($i<16 && ($i%(16/$SZ)));
+ vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ
+___
+$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0);
+ vperm @X[$i],@X[$i],@X[$i],$lemask
+___
+$code.=<<___ if ($i>=15);
+ vshasigma${sz} $Sigma,@X[($j+1)%16],0,0
+ vaddu${sz}m @X[$j],@X[$j],$Sigma
+ vshasigma${sz} $Sigma,@X[($j+14)%16],0,15
+ vaddu${sz}m @X[$j],@X[$j],$Sigma
+ vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]
+___
+$code.=<<___;
+ vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i]
+ vsel $Func,$g,$f,$e ; Ch(e,f,g)
+ vaddu${sz}m $g,$g,$Ki ; future h+=K[i]
+ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g)
+ vshasigma${sz} $Sigma,$e,1,15 ; Sigma1(e)
+ vaddu${sz}m $h,$h,$Sigma ; h+=Sigma1(e)
+ vxor $Func,$a,$b
+ vsel $Func,$b,$c,$Func ; Maj(a,b,c)
+ vaddu${sz}m $d,$d,$h ; d+=h
+ vshasigma${sz} $Sigma,$a,1,0 ; Sigma0(a)
+ vaddu${sz}m $Sigma,$Sigma,$Func ; Sigma0(a)+Maj(a,b,c)
+ vaddu${sz}m $h,$h,$Sigma ; h+=Sigma0(a)+Maj(a,b,c)
+ lvx $Ki,@I[$k],$idx ; load next K[i]
+___
+$code.=<<___ if ($k == 7);
+ addi $idx,$idx,0x80
+___
+}
+
+$code=<<___;
+.machine "any"
+.text
+
+.globl $func
+.align 6
+$func:
+ $STU $sp,-$FRAME($sp)
+ mflr $lrsave
+ li r10,`$LOCALS+15`
+ li r11,`$LOCALS+31`
+ stvx v24,r10,$sp # ABI says so
+ addi r10,r10,32
+ mfspr $vrsave,256
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r11,-4096+255 # 0xfffff0ff
+ stw $vrsave,`$FRAME-6*$SIZE_T-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME-6*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME-5*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME-4*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME-3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME-2*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME-1*$SIZE_T`($sp)
+ li $x70,0x70
+ $PUSH $lrsave,`$FRAME+$LRSAVE`($sp)
+ mtspr 256,r11
+
+ bl LPICmeup
+ addi $offload,$sp,`8*$SIZE_T+15`
+___
+$code.=<<___ if ($LENDIAN);
+ li $idx,8
+ lvsl $lemask,0,$idx
+ vspltisb $Ki,0x0f
+ vxor $lemask,$lemask,$Ki
+___
+$code.=<<___ if ($SZ==4);
+ lvx_4w $A,$x00,$ctx
+ lvx_4w $E,$x10,$ctx
+ vsldoi $B,$A,$A,4 # unpack
+ vsldoi $C,$A,$A,8
+ vsldoi $D,$A,$A,12
+ vsldoi $F,$E,$E,4
+ vsldoi $G,$E,$E,8
+ vsldoi $H,$E,$E,12
+___
+$code.=<<___ if ($SZ==8);
+ lvx_u $A,$x00,$ctx
+ lvx_u $C,$x10,$ctx
+ lvx_u $E,$x20,$ctx
+ vsldoi $B,$A,$A,8 # unpack
+ lvx_u $G,$x30,$ctx
+ vsldoi $D,$C,$C,8
+ vsldoi $F,$E,$E,8
+ vsldoi $H,$G,$G,8
+___
+$code.=<<___;
+ li r0,`($rounds-16)/16` # inner loop counter
+ b Loop
+.align 5
+Loop:
+ lvx $Ki,$x00,$Tbl
+ lvx_u @X[0],0,$inp
+ addi $inp,$inp,16
+ mr $idx,$Tbl # copy $Tbl
+ stvx $A,$x00,$offload # offload $A-$H
+ stvx $B,$x10,$offload
+ stvx $C,$x20,$offload
+ stvx $D,$x30,$offload
+ stvx $E,$x40,$offload
+ stvx $F,$x50,$offload
+ stvx $G,$x60,$offload
+ stvx $H,$x70,$offload
+ vaddu${sz}m $H,$H,$Ki # h+K[i]
+ lvx $Ki,$x10,$Tbl
+___
+for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ mtctr r0
+ b L16_xx
+.align 5
+L16_xx:
+___
+for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ bdnz L16_xx
+
+ lvx @X[2],$x00,$offload
+ subic. $num,$num,1
+ lvx @X[3],$x10,$offload
+ vaddu${sz}m $A,$A,@X[2]
+ lvx @X[4],$x20,$offload
+ vaddu${sz}m $B,$B,@X[3]
+ lvx @X[5],$x30,$offload
+ vaddu${sz}m $C,$C,@X[4]
+ lvx @X[6],$x40,$offload
+ vaddu${sz}m $D,$D,@X[5]
+ lvx @X[7],$x50,$offload
+ vaddu${sz}m $E,$E,@X[6]
+ lvx @X[8],$x60,$offload
+ vaddu${sz}m $F,$F,@X[7]
+ lvx @X[9],$x70,$offload
+ vaddu${sz}m $G,$G,@X[8]
+ vaddu${sz}m $H,$H,@X[9]
+ bne Loop
+___
+$code.=<<___ if ($SZ==4);
+ lvx @X[0],$x20,$idx
+ vperm $A,$A,$B,$Ki # pack the answer
+ lvx @X[1],$x30,$idx
+ vperm $E,$E,$F,$Ki
+ vperm $A,$A,$C,@X[0]
+ vperm $E,$E,$G,@X[0]
+ vperm $A,$A,$D,@X[1]
+ vperm $E,$E,$H,@X[1]
+ stvx_4w $A,$x00,$ctx
+ stvx_4w $E,$x10,$ctx
+___
+$code.=<<___ if ($SZ==8);
+ vperm $A,$A,$B,$Ki # pack the answer
+ vperm $C,$C,$D,$Ki
+ vperm $E,$E,$F,$Ki
+ vperm $G,$G,$H,$Ki
+ stvx_u $A,$x00,$ctx
+ stvx_u $C,$x10,$ctx
+ stvx_u $E,$x20,$ctx
+ stvx_u $G,$x30,$ctx
+___
+$code.=<<___;
+ addi $offload,$sp,`$LOCALS+15`
+ mtlr $lrsave
+ mtspr 256,$vrsave
+ lvx v24,$x00,$offload # ABI says so
+ lvx v25,$x10,$offload
+ lvx v26,$x20,$offload
+ lvx v27,$x30,$offload
+ lvx v28,$x40,$offload
+ lvx v29,$x50,$offload
+ lvx v30,$x60,$offload
+ lvx v31,$x70,$offload
+ $POP r26,`$FRAME-6*$SIZE_T`($sp)
+ $POP r27,`$FRAME-5*$SIZE_T`($sp)
+ $POP r28,`$FRAME-4*$SIZE_T`($sp)
+ $POP r29,`$FRAME-3*$SIZE_T`($sp)
+ $POP r30,`$FRAME-2*$SIZE_T`($sp)
+ $POP r31,`$FRAME-1*$SIZE_T`($sp)
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,6,3,0
+ .long 0
+.size $func,.-$func
+___
+
+# Ugly hack here, because PPC assembler syntax seem to vary too
+# much from platforms to platform...
+$code.=<<___;
+.align 6
+LPICmeup:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
+ addi $Tbl,$Tbl,`64-8`
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+ .space `64-9*4`
+___
+
+if ($SZ==8) {
+ local *table = sub {
+ foreach(@_) { $code.=".quad $_,$_\n"; }
+ };
+ table(
+ "0x428a2f98d728ae22","0x7137449123ef65cd",
+ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc",
+ "0x3956c25bf348b538","0x59f111f1b605d019",
+ "0x923f82a4af194f9b","0xab1c5ed5da6d8118",
+ "0xd807aa98a3030242","0x12835b0145706fbe",
+ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2",
+ "0x72be5d74f27b896f","0x80deb1fe3b1696b1",
+ "0x9bdc06a725c71235","0xc19bf174cf692694",
+ "0xe49b69c19ef14ad2","0xefbe4786384f25e3",
+ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65",
+ "0x2de92c6f592b0275","0x4a7484aa6ea6e483",
+ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5",
+ "0x983e5152ee66dfab","0xa831c66d2db43210",
+ "0xb00327c898fb213f","0xbf597fc7beef0ee4",
+ "0xc6e00bf33da88fc2","0xd5a79147930aa725",
+ "0x06ca6351e003826f","0x142929670a0e6e70",
+ "0x27b70a8546d22ffc","0x2e1b21385c26c926",
+ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df",
+ "0x650a73548baf63de","0x766a0abb3c77b2a8",
+ "0x81c2c92e47edaee6","0x92722c851482353b",
+ "0xa2bfe8a14cf10364","0xa81a664bbc423001",
+ "0xc24b8b70d0f89791","0xc76c51a30654be30",
+ "0xd192e819d6ef5218","0xd69906245565a910",
+ "0xf40e35855771202a","0x106aa07032bbd1b8",
+ "0x19a4c116b8d2d0c8","0x1e376c085141ab53",
+ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8",
+ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb",
+ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3",
+ "0x748f82ee5defb2fc","0x78a5636f43172f60",
+ "0x84c87814a1f0ab72","0x8cc702081a6439ec",
+ "0x90befffa23631e28","0xa4506cebde82bde9",
+ "0xbef9a3f7b2c67915","0xc67178f2e372532b",
+ "0xca273eceea26619c","0xd186b8c721c0c207",
+ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178",
+ "0x06f067aa72176fba","0x0a637dc5a2c898a6",
+ "0x113f9804bef90dae","0x1b710b35131c471b",
+ "0x28db77f523047d84","0x32caab7b40c72493",
+ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c",
+ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a",
+ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0");
+$code.=<<___ if (!$LENDIAN);
+.quad 0x0001020304050607,0x1011121314151617
+___
+$code.=<<___ if ($LENDIAN); # quad-swapped
+.quad 0x1011121314151617,0x0001020304050607
+___
+} else {
+ local *table = sub {
+ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; }
+ };
+ table(
+ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5",
+ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5",
+ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3",
+ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174",
+ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc",
+ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da",
+ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7",
+ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967",
+ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13",
+ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85",
+ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3",
+ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070",
+ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5",
+ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3",
+ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208",
+ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0");
+$code.=<<___ if (!$LENDIAN);
+.long 0x00010203,0x10111213,0x10111213,0x10111213
+.long 0x00010203,0x04050607,0x10111213,0x10111213
+.long 0x00010203,0x04050607,0x08090a0b,0x10111213
+___
+$code.=<<___ if ($LENDIAN); # word-swapped
+.long 0x10111213,0x10111213,0x10111213,0x00010203
+.long 0x10111213,0x10111213,0x04050607,0x00010203
+.long 0x10111213,0x08090a0b,0x04050607,0x00010203
+___
+}
+$code.=<<___;
+.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
diff --git a/security/nss/lib/freebl/secmpi.c b/security/nss/lib/freebl/secmpi.c
new file mode 100644
index 0000000000..7d6ee4405b
--- /dev/null
+++ b/security/nss/lib/freebl/secmpi.c
@@ -0,0 +1,28 @@
+#include "blapi.h"
+
+#include "mpi.h"
+#include "mpprime.h"
+
+mp_err
+mpp_random_secure(mp_int *a)
+{
+ SECStatus rv;
+ rv = RNG_GenerateGlobalRandomBytes((unsigned char *)MP_DIGITS(a), MP_USED(a) * sizeof(mp_digit));
+ if (rv != SECSuccess) {
+ return MP_UNDEF;
+ }
+ MP_SIGN(a) = MP_ZPOS;
+ return MP_OKAY;
+}
+
+mp_err
+mpp_pprime_secure(mp_int *a, int nt)
+{
+ return mpp_pprime_ext_random(a, nt, &mpp_random_secure);
+}
+
+mp_err
+mpp_make_prime_secure(mp_int *start, mp_size nBits, mp_size strong)
+{
+ return mpp_make_prime_ext_random(start, nBits, strong, &mpp_random_secure);
+}
diff --git a/security/nss/lib/freebl/secmpi.h b/security/nss/lib/freebl/secmpi.h
new file mode 100644
index 0000000000..53f9a53c10
--- /dev/null
+++ b/security/nss/lib/freebl/secmpi.h
@@ -0,0 +1,63 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+
+#define CHECK_SEC_OK(func) \
+ if (SECSuccess != (rv = func)) \
+ goto cleanup
+
+#define CHECK_MPI_OK(func) \
+ if (MP_OKAY > (err = func)) \
+ goto cleanup
+
+#define OCTETS_TO_MPINT(oc, mp, len) \
+ CHECK_MPI_OK(mp_read_unsigned_octets((mp), oc, len))
+
+#define SECITEM_TO_MPINT(it, mp) \
+ CHECK_MPI_OK(mp_read_unsigned_octets((mp), (it).data, (it).len))
+
+#define MPINT_TO_SECITEM(mp, it, arena) \
+ do { \
+ int mpintLen = mp_unsigned_octet_size(mp); \
+ if (mpintLen <= 0) { \
+ err = MP_RANGE; \
+ goto cleanup; \
+ } \
+ SECITEM_AllocItem(arena, (it), mpintLen); \
+ if ((it)->data == NULL) { \
+ err = MP_MEM; \
+ goto cleanup; \
+ } \
+ err = mp_to_unsigned_octets(mp, (it)->data, (it)->len); \
+ if (err < 0) \
+ goto cleanup; \
+ else \
+ err = MP_OKAY; \
+ } while (0)
+
+#define MP_TO_SEC_ERROR(err) \
+ switch (err) { \
+ case MP_MEM: \
+ PORT_SetError(SEC_ERROR_NO_MEMORY); \
+ break; \
+ case MP_RANGE: \
+ PORT_SetError(SEC_ERROR_BAD_DATA); \
+ break; \
+ case MP_BADARG: \
+ PORT_SetError(SEC_ERROR_INVALID_ARGS); \
+ break; \
+ default: \
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); \
+ break; \
+ }
+
+/* Fill the `used` digits of an mp_int with random bits */
+mp_err mpp_random_secure(mp_int *a);
+
+/* Pseudo-primality testing using `mpp_random_secure` to choose Miller-Rabin base */
+mp_err mpp_pprime_secure(mp_int *a, int nt);
+
+/* Variant of `mpp_make_prime` using `mpp_random_secure` to choose Miller-Rabin base */
+mp_err mpp_make_prime_secure(mp_int *start, mp_size nBits, mp_size strong);
diff --git a/security/nss/lib/freebl/secrng.h b/security/nss/lib/freebl/secrng.h
new file mode 100644
index 0000000000..19eae48331
--- /dev/null
+++ b/security/nss/lib/freebl/secrng.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SECRNG_H_
+#define _SECRNG_H_
+/*
+ * secrng.h - public data structures and prototypes for the secure random
+ * number generator
+ */
+
+/******************************************/
+/*
+** Random number generation. A cryptographically strong random number
+** generator.
+*/
+
+#include "blapi.h"
+
+/* the number of bytes to read from the system random number generator */
+#define SYSTEM_RNG_SEED_COUNT 1024
+
+SEC_BEGIN_PROTOS
+
+/*
+** The following functions are provided by the security library
+** but are differently implemented for the UNIX, Win, and OS/2
+** versions
+*/
+
+/*
+** Get the "noisiest" information available on the system.
+** The amount of data returned depends on the system implementation.
+** It will not exceed maxbytes, but may be (much) less.
+** Returns number of noise bytes copied into buf, or zero if error.
+*/
+extern size_t RNG_GetNoise(void *buf, size_t maxbytes);
+
+/*
+** RNG_SystemInfoForRNG should be called before any use of SSL. It
+** gathers up the system specific information to help seed the
+** state of the global random number generator.
+*/
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+** Use the contents (and stat) of a file to help seed the
+** global random number generator.
+*/
+extern void RNG_FileForRNG(const char *filename);
+
+/*
+** Get maxbytes bytes of random data from the system random number
+** generator.
+** Returns the number of bytes copied into buf -- maxbytes if success
+** or zero if error.
+** Errors:
+** PR_NOT_IMPLEMENTED_ERROR There is no system RNG on the platform.
+** SEC_ERROR_NEED_RANDOM The system RNG failed.
+*/
+extern size_t RNG_SystemRNG(void *buf, size_t maxbytes);
+
+SEC_END_PROTOS
+
+#endif /* _SECRNG_H_ */
diff --git a/security/nss/lib/freebl/sha-fast-amd64-sun.s b/security/nss/lib/freebl/sha-fast-amd64-sun.s
new file mode 100644
index 0000000000..6430469a4e
--- /dev/null
+++ b/security/nss/lib/freebl/sha-fast-amd64-sun.s
@@ -0,0 +1,2151 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "sha_fast.c"
+ .text
+ .align 16
+.globl SHA1_Begin
+ .type SHA1_Begin, @function
+SHA1_Begin:
+.LFB4:
+ movl $4023233417, %ecx
+ movl $2562383102, %edx
+ movl $3285377520, %eax
+ movq $0, 64(%rdi)
+ movq $1732584193, 72(%rdi)
+ movq %rcx, 80(%rdi)
+ movq %rdx, 88(%rdi)
+ movq $271733878, 96(%rdi)
+ movq %rax, 104(%rdi)
+ ret
+.LFE4:
+ .size SHA1_Begin, .-SHA1_Begin
+ .align 16
+ .type shaCompress, @function
+shaCompress:
+.LFB7:
+ pushq %r15
+.LCFI0:
+ pushq %r14
+.LCFI1:
+ pushq %r13
+.LCFI2:
+ pushq %r12
+.LCFI3:
+ movq -88(%rdi), %r12
+ movq -80(%rdi), %r10
+ movq -72(%rdi), %r13
+ movq -64(%rdi), %r8
+ pushq %rbx
+.LCFI4:
+ movq -56(%rdi), %rcx
+ movl (%rsi), %eax
+ movl %r12d, %edx
+ movq %r13, %r9
+ roll $5, %edx
+ movl 4(%rsi), %ebx
+ xorq %r8, %r9
+/APP
+ bswap %eax
+/NO_APP
+ andq %r10, %r9
+ mov %eax, %r15d
+ roll $30, %r10d
+ movq %r15, -48(%rdi)
+ xorq %r8, %r9
+ movq -48(%rdi), %r14
+ addq %r9, %rdx
+ movq %r10, %rax
+ movl %r12d, %r15d
+ addq %rcx, %rdx
+ xorq %r13, %rax
+ roll $30, %r15d
+ leaq 1518500249(%rdx,%r14), %rdx
+ andq %r12, %rax
+ movq %r15, %r12
+/APP
+ bswap %ebx
+/NO_APP
+ movl %edx, %ecx
+ mov %ebx, %r11d
+ xorq %r13, %rax
+ movq %r11, -40(%rdi)
+ roll $5, %ecx
+ movq -40(%rdi), %r9
+ addq %rax, %rcx
+ xorq %r10, %r12
+ movl 8(%rsi), %r14d
+ addq %r8, %rcx
+ andq %rdx, %r12
+ movl %edx, %r11d
+ leaq 1518500249(%rcx,%r9), %rcx
+ xorq %r10, %r12
+ roll $30, %r11d
+/APP
+ bswap %r14d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r14d, %ebx
+ movl 12(%rsi), %r9d
+ movq %rbx, -32(%rdi)
+ roll $5, %r8d
+ movq -32(%rdi), %rax
+ addq %r12, %r8
+ movq %r11, %r12
+ movl %ecx, %ebx
+ addq %r13, %r8
+ xorq %r15, %r12
+ roll $30, %ebx
+ leaq 1518500249(%r8,%rax), %r8
+ andq %rcx, %r12
+ movl 16(%rsi), %eax
+/APP
+ bswap %r9d
+/NO_APP
+ movl %r8d, %edx
+ mov %r9d, %r14d
+ xorq %r15, %r12
+ movq %r14, -24(%rdi)
+ roll $5, %edx
+ movq -24(%rdi), %r13
+ addq %r12, %rdx
+ movq %rbx, %r12
+ movl %r8d, %r14d
+ addq %r10, %rdx
+ leaq 1518500249(%rdx,%r13), %rdx
+ movl 20(%rsi), %r13d
+/APP
+ bswap %eax
+/NO_APP
+ movl %edx, %ecx
+ mov %eax, %r9d
+ roll $5, %ecx
+ xorq %r11, %r12
+ movq %r9, -16(%rdi)
+ andq %r8, %r12
+ movq -16(%rdi), %r10
+ roll $30, %r14d
+ xorq %r11, %r12
+ movq %r14, %rax
+ movl %edx, %r9d
+ addq %r12, %rcx
+ xorq %rbx, %rax
+ roll $30, %r9d
+ addq %r15, %rcx
+ andq %rdx, %rax
+ leaq 1518500249(%rcx,%r10), %rcx
+ xorq %rbx, %rax
+ movl 24(%rsi), %r10d
+/APP
+ bswap %r13d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r13d, %r15d
+ movq %r15, -8(%rdi)
+ roll $5, %r8d
+ movq -8(%rdi), %r12
+ addq %rax, %r8
+ movl %ecx, %r15d
+ addq %r11, %r8
+ movq %r9, %r11
+ roll $30, %r15d
+ leaq 1518500249(%r8,%r12), %r8
+ xorq %r14, %r11
+ movl 28(%rsi), %r12d
+/APP
+ bswap %r10d
+/NO_APP
+ andq %rcx, %r11
+ mov %r10d, %r13d
+ movl %r8d, %edx
+ movq %r13, (%rdi)
+ xorq %r14, %r11
+ movq (%rdi), %rax
+ roll $5, %edx
+ movq %r15, %r10
+ movl %r8d, %r13d
+ addq %r11, %rdx
+ xorq %r9, %r10
+ roll $30, %r13d
+ addq %rbx, %rdx
+ andq %r8, %r10
+ leaq 1518500249(%rdx,%rax), %rdx
+ xorq %r9, %r10
+ movl 32(%rsi), %eax
+/APP
+ bswap %r12d
+/NO_APP
+ movl %edx, %ecx
+ mov %r12d, %ebx
+ movq %rbx, 8(%rdi)
+ roll $5, %ecx
+ movq 8(%rdi), %r11
+ addq %r10, %rcx
+ movq %r13, %r10
+ movl %edx, %ebx
+ addq %r14, %rcx
+ leaq 1518500249(%rcx,%r11), %rcx
+/APP
+ bswap %eax
+/NO_APP
+ movl %ecx, %r8d
+ mov %eax, %r12d
+ roll $5, %r8d
+ xorq %r15, %r10
+ movq %r12, 16(%rdi)
+ andq %rdx, %r10
+ movq 16(%rdi), %r14
+ roll $30, %ebx
+ xorq %r15, %r10
+ movq %rbx, %rax
+ movl 36(%rsi), %r11d
+ addq %r10, %r8
+ xorq %r13, %rax
+ movl %ecx, %r12d
+ addq %r9, %r8
+ andq %rcx, %rax
+ roll $30, %r12d
+ leaq 1518500249(%r8,%r14), %r8
+ xorq %r13, %rax
+ movl 40(%rsi), %r14d
+/APP
+ bswap %r11d
+/NO_APP
+ movl %r8d, %edx
+ mov %r11d, %r9d
+ movq %r12, %r11
+ movq %r9, 24(%rdi)
+ roll $5, %edx
+ movq 24(%rdi), %r10
+ addq %rax, %rdx
+ xorq %rbx, %r11
+ movl %r8d, %r9d
+ addq %r15, %rdx
+ andq %r8, %r11
+ roll $30, %r9d
+ leaq 1518500249(%rdx,%r10), %rdx
+ xorq %rbx, %r11
+ movl 44(%rsi), %r10d
+/APP
+ bswap %r14d
+/NO_APP
+ movl %edx, %ecx
+ mov %r14d, %r15d
+ movq %r15, 32(%rdi)
+ roll $5, %ecx
+ movq 32(%rdi), %rax
+ addq %r11, %rcx
+ movq %r9, %r11
+ movl %edx, %r15d
+ addq %r13, %rcx
+ xorq %r12, %r11
+ roll $30, %r15d
+ leaq 1518500249(%rcx,%rax), %rcx
+ andq %rdx, %r11
+ movl 48(%rsi), %eax
+/APP
+ bswap %r10d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r10d, %r14d
+ xorq %r12, %r11
+ movq %r14, 40(%rdi)
+ roll $5, %r8d
+ movq 40(%rdi), %r13
+ addq %r11, %r8
+ movq %r15, %r10
+ movl %ecx, %r14d
+ addq %rbx, %r8
+ xorq %r9, %r10
+ leaq 1518500249(%r8,%r13), %r8
+ movl 52(%rsi), %r13d
+/APP
+ bswap %eax
+/NO_APP
+ movl %r8d, %edx
+ mov %eax, %ebx
+ roll $5, %edx
+ andq %rcx, %r10
+ movq %rbx, 48(%rdi)
+ xorq %r9, %r10
+ movq 48(%rdi), %r11
+ roll $30, %r14d
+ addq %r10, %rdx
+ movq %r14, %rax
+ movl %r8d, %ebx
+ addq %r12, %rdx
+ xorq %r15, %rax
+ roll $30, %ebx
+ leaq 1518500249(%rdx,%r11), %rdx
+ andq %r8, %rax
+ movl 56(%rsi), %r11d
+/APP
+ bswap %r13d
+/NO_APP
+ movl %edx, %ecx
+ mov %r13d, %r12d
+ xorq %r15, %rax
+ movq %r12, 56(%rdi)
+ roll $5, %ecx
+ movq 56(%rdi), %r10
+ addq %rax, %rcx
+ movl %edx, %r12d
+ addq %r9, %rcx
+ movq %rbx, %r9
+ roll $30, %r12d
+ leaq 1518500249(%rcx,%r10), %rcx
+ xorq %r14, %r9
+ movl 60(%rsi), %r10d
+/APP
+ bswap %r11d
+/NO_APP
+ andq %rdx, %r9
+ mov %r11d, %r13d
+ movl %ecx, %r8d
+ movq %r13, 64(%rdi)
+ xorq %r14, %r9
+ movq 64(%rdi), %rax
+ roll $5, %r8d
+ movq %r12, %r11
+ movl %ecx, %r13d
+ addq %r9, %r8
+ xorq %rbx, %r11
+ roll $30, %r13d
+ addq %r15, %r8
+ andq %rcx, %r11
+ leaq 1518500249(%r8,%rax), %r8
+ xorq %rbx, %r11
+/APP
+ bswap %r10d
+/NO_APP
+ movl %r8d, %esi
+ mov %r10d, %r15d
+ movq %r15, 72(%rdi)
+ roll $5, %esi
+ movq 72(%rdi), %r9
+ movq 56(%rdi), %r10
+ movq 16(%rdi), %rcx
+ addq %r11, %rsi
+ movq -32(%rdi), %rdx
+ addq %r14, %rsi
+ movq -48(%rdi), %rax
+ leaq 1518500249(%rsi,%r9), %r14
+ movq %r13, %r11
+ movl %r8d, %r15d
+ xorq %rcx, %r10
+ xorq %rdx, %r10
+ movl %r14d, %ecx
+ xorl %eax, %r10d
+ roll %r10d
+ roll $5, %ecx
+ xorq %r12, %r11
+ andq %r8, %r11
+ movq %r10, -48(%rdi)
+ movq -48(%rdi), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ movl %r14d, %r10d
+ addq %r11, %rcx
+ movq 64(%rdi), %r11
+ movq 24(%rdi), %rdx
+ addq %rbx, %rcx
+ movq -24(%rdi), %rbx
+ movq -40(%rdi), %rax
+ leaq 1518500249(%rcx,%r9), %rcx
+ movq %r15, %r8
+ roll $30, %r10d
+ xorq %rdx, %r11
+ xorq %r13, %r8
+ xorq %rbx, %r11
+ andq %r14, %r8
+ movl %ecx, %r9d
+ xorl %eax, %r11d
+ xorq %r13, %r8
+ roll $5, %r9d
+ roll %r11d
+ addq %r8, %r9
+ movq %r10, %rax
+ movq %r11, -40(%rdi)
+ movq -40(%rdi), %rsi
+ addq %r12, %r9
+ movq 72(%rdi), %rbx
+ movq 32(%rdi), %rdx
+ xorq %r15, %rax
+ movq -16(%rdi), %r14
+ movq -32(%rdi), %r12
+ andq %rcx, %rax
+ leaq 1518500249(%r9,%rsi), %r9
+ xorq %r15, %rax
+ movl %ecx, %r11d
+ xorq %rdx, %rbx
+ roll $30, %r11d
+ xorq %r14, %rbx
+ movl %r9d, %esi
+ xorl %r12d, %ebx
+ roll $5, %esi
+ roll %ebx
+ addq %rax, %rsi
+ movq %rbx, -32(%rdi)
+ movq -32(%rdi), %r8
+ addq %r13, %rsi
+ movq -48(%rdi), %r12
+ movq 40(%rdi), %rdx
+ movq %r11, %r13
+ movq -8(%rdi), %r14
+ movq -24(%rdi), %rcx
+ movl %r9d, %ebx
+ leaq 1518500249(%rsi,%r8), %rsi
+ xorq %rdx, %r12
+ xorq %r14, %r12
+ movl %esi, %r8d
+ xorl %ecx, %r12d
+ roll %r12d
+ roll $5, %r8d
+ xorq %r10, %r13
+ andq %r9, %r13
+ movq %r12, -24(%rdi)
+ movq -24(%rdi), %rax
+ xorq %r10, %r13
+ roll $30, %ebx
+ movl %esi, %r12d
+ addq %r13, %r8
+ xorq %rbx, %rsi
+ roll $30, %r12d
+ addq %r15, %r8
+ movq -40(%rdi), %r15
+ movq 48(%rdi), %rdx
+ movq (%rdi), %r14
+ movq -16(%rdi), %r9
+ leaq 1518500249(%r8,%rax), %r13
+ xorq %r11, %rsi
+ xorq %rdx, %r15
+ movl %r13d, %ecx
+ xorq %r14, %r15
+ roll $5, %ecx
+ xorl %r9d, %r15d
+ addq %rsi, %rcx
+ roll %r15d
+ addq %r10, %rcx
+ movq %r15, -16(%rdi)
+ movq -16(%rdi), %rsi
+ movl %r13d, %r15d
+ movq -32(%rdi), %r14
+ movq 56(%rdi), %rax
+ xorq %r12, %r13
+ movq 8(%rdi), %rdx
+ movq -8(%rdi), %r10
+ xorq %rbx, %r13
+ leaq 1859775393(%rcx,%rsi), %r9
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rdx, %r14
+ movl %r9d, %esi
+ xorl %r10d, %r14d
+ roll $5, %esi
+ roll %r14d
+ addq %r13, %rsi
+ movq %r14, -8(%rdi)
+ movq -8(%rdi), %r8
+ addq %r11, %rsi
+ movq -24(%rdi), %r13
+ movq 64(%rdi), %rax
+ movl %r9d, %r14d
+ movq 16(%rdi), %rdx
+ movq (%rdi), %r11
+ xorq %r15, %r9
+ leaq 1859775393(%rsi,%r8), %r10
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %r8d
+ xorl %r11d, %r13d
+ roll $5, %r8d
+ roll %r13d
+ xorq %r12, %r9
+ roll $30, %r14d
+ addq %r9, %r8
+ movq %r13, (%rdi)
+ movq (%rdi), %rcx
+ addq %rbx, %r8
+ movq -16(%rdi), %rbx
+ movq 72(%rdi), %rax
+ movq 24(%rdi), %rdx
+ movq 8(%rdi), %r9
+ movl %r10d, %r13d
+ leaq 1859775393(%r8,%rcx), %r11
+ xorq %r14, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ xorq %r15, %r10
+ xorq %rdx, %rbx
+ movl %r11d, %ecx
+ xorl %r9d, %ebx
+ roll $5, %ecx
+ roll %ebx
+ addq %r10, %rcx
+ movq %rbx, 8(%rdi)
+ movq 8(%rdi), %rsi
+ addq %r12, %rcx
+ movq -8(%rdi), %r12
+ movq -48(%rdi), %rax
+ movl %r11d, %ebx
+ movq 32(%rdi), %rdx
+ movq 16(%rdi), %r9
+ xorq %r13, %r11
+ leaq 1859775393(%rcx,%rsi), %r10
+ xorq %r14, %r11
+ roll $30, %ebx
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %esi
+ xorl %r9d, %r12d
+ roll $5, %esi
+ roll %r12d
+ addq %r11, %rsi
+ movq %r12, 16(%rdi)
+ addq %r15, %rsi
+ movq 16(%rdi), %r8
+ movq (%rdi), %r15
+ movq -40(%rdi), %rax
+ movl %r10d, %r12d
+ movq 40(%rdi), %rdx
+ movq 24(%rdi), %r9
+ xorq %rbx, %r10
+ leaq 1859775393(%rsi,%r8), %r11
+ xorq %r13, %r10
+ xorq %rax, %r15
+ xorq %rdx, %r15
+ movl %r11d, %r8d
+ xorl %r9d, %r15d
+ roll $5, %r8d
+ roll %r15d
+ addq %r10, %r8
+ movq %r15, 24(%rdi)
+ movq 24(%rdi), %rcx
+ addq %r14, %r8
+ movq 8(%rdi), %r14
+ movq -32(%rdi), %rax
+ roll $30, %r12d
+ movq 48(%rdi), %rdx
+ movq 32(%rdi), %r10
+ movl %r11d, %r15d
+ leaq 1859775393(%r8,%rcx), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rbx, %r11
+ xorq %rdx, %r14
+ movl %r9d, %ecx
+ xorl %r10d, %r14d
+ roll $5, %ecx
+ roll %r14d
+ addq %r11, %rcx
+ movq %r14, 32(%rdi)
+ addq %r13, %rcx
+ movq 32(%rdi), %rsi
+ movq 16(%rdi), %r13
+ movq -24(%rdi), %rax
+ movl %r9d, %r14d
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r11
+ xorq %r15, %r9
+ leaq 1859775393(%rcx,%rsi), %r10
+ xorq %r12, %r9
+ roll $30, %r14d
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %esi
+ xorl %r11d, %r13d
+ roll $5, %esi
+ roll %r13d
+ addq %r9, %rsi
+ movq %r13, 40(%rdi)
+ movq 40(%rdi), %r8
+ addq %rbx, %rsi
+ movq 24(%rdi), %rbx
+ movq -16(%rdi), %rax
+ movl %r10d, %r13d
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %r9
+ xorq %r14, %r10
+ leaq 1859775393(%rsi,%r8), %r11
+ xorq %r15, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ xorq %rdx, %rbx
+ movl %r11d, %r8d
+ xorl %r9d, %ebx
+ roll $5, %r8d
+ roll %ebx
+ addq %r10, %r8
+ movq %rbx, 48(%rdi)
+ addq %r12, %r8
+ movq 48(%rdi), %rcx
+ movq 32(%rdi), %r12
+ movq -8(%rdi), %rax
+ movl %r11d, %ebx
+ movq 72(%rdi), %rdx
+ movq 56(%rdi), %r9
+ leaq 1859775393(%r8,%rcx), %r10
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %ecx
+ xorl %r9d, %r12d
+ xorq %r13, %r11
+ roll $5, %ecx
+ xorq %r14, %r11
+ roll %r12d
+ roll $30, %ebx
+ addq %r11, %rcx
+ movq %r12, 56(%rdi)
+ movq 56(%rdi), %rsi
+ addq %r15, %rcx
+ movq 40(%rdi), %r15
+ movq (%rdi), %rax
+ movq -48(%rdi), %rdx
+ movq 64(%rdi), %r9
+ movl %r10d, %r12d
+ leaq 1859775393(%rcx,%rsi), %r11
+ xorq %rbx, %r10
+ roll $30, %r12d
+ xorq %rax, %r15
+ xorq %r13, %r10
+ xorq %rdx, %r15
+ movl %r11d, %esi
+ xorl %r9d, %r15d
+ roll $5, %esi
+ roll %r15d
+ addq %r10, %rsi
+ movq %r15, 64(%rdi)
+ movq 64(%rdi), %r8
+ addq %r14, %rsi
+ movq 48(%rdi), %r14
+ movq 8(%rdi), %rax
+ movl %r11d, %r15d
+ movq -40(%rdi), %rdx
+ movq 72(%rdi), %r10
+ xorq %r12, %r11
+ leaq 1859775393(%rsi,%r8), %r9
+ xorq %rbx, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rdx, %r14
+ movl %r9d, %r8d
+ xorl %r10d, %r14d
+ roll $5, %r8d
+ roll %r14d
+ addq %r11, %r8
+ movq %r14, 72(%rdi)
+ addq %r13, %r8
+ movq 72(%rdi), %rcx
+ movq 56(%rdi), %r13
+ movq 16(%rdi), %rax
+ movl %r9d, %r14d
+ movq -32(%rdi), %rdx
+ movq -48(%rdi), %r11
+ leaq 1859775393(%r8,%rcx), %r10
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %ecx
+ xorl %r11d, %r13d
+ roll $5, %ecx
+ roll %r13d
+ xorq %r15, %r9
+ roll $30, %r14d
+ xorq %r12, %r9
+ movq %r13, -48(%rdi)
+ movq -48(%rdi), %rsi
+ addq %r9, %rcx
+ movl %r10d, %r13d
+ xorq %r14, %r10
+ addq %rbx, %rcx
+ movq 64(%rdi), %rbx
+ movq 24(%rdi), %rax
+ movq -24(%rdi), %rdx
+ leaq 1859775393(%rcx,%rsi), %r11
+ movq -40(%rdi), %r9
+ xorq %r15, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ movl %r11d, %esi
+ xorq %rdx, %rbx
+ roll $5, %esi
+ xorl %r9d, %ebx
+ addq %r10, %rsi
+ roll %ebx
+ addq %r12, %rsi
+ movq %rbx, -40(%rdi)
+ movq -40(%rdi), %r8
+ movl %r11d, %ebx
+ movq 72(%rdi), %r12
+ movq 32(%rdi), %rax
+ xorq %r13, %r11
+ movq -16(%rdi), %rdx
+ movq -32(%rdi), %r9
+ xorq %r14, %r11
+ leaq 1859775393(%rsi,%r8), %r10
+ roll $30, %ebx
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %r8d
+ xorl %r9d, %r12d
+ roll $5, %r8d
+ roll %r12d
+ addq %r11, %r8
+ movq %r12, -32(%rdi)
+ movq -32(%rdi), %rcx
+ addq %r15, %r8
+ movq -48(%rdi), %r15
+ movq 40(%rdi), %rax
+ movl %r10d, %r12d
+ movq -8(%rdi), %rdx
+ movq -24(%rdi), %r9
+ xorq %rbx, %r10
+ leaq 1859775393(%r8,%rcx), %r11
+ xorq %r13, %r10
+ xorq %rax, %r15
+ xorq %rdx, %r15
+ movl %r11d, %ecx
+ xorl %r9d, %r15d
+ roll $5, %ecx
+ roll %r15d
+ addq %r10, %rcx
+ addq %r14, %rcx
+ movq %r15, -24(%rdi)
+ movq -24(%rdi), %rsi
+ movq -40(%rdi), %r14
+ movq 48(%rdi), %rax
+ roll $30, %r12d
+ movq (%rdi), %rdx
+ movq -16(%rdi), %r10
+ movl %r11d, %r15d
+ leaq 1859775393(%rcx,%rsi), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rbx, %r11
+ xorq %rdx, %r14
+ movl %r9d, %esi
+ xorl %r10d, %r14d
+ roll $5, %esi
+ roll %r14d
+ addq %r11, %rsi
+ movq %r14, -16(%rdi)
+ movq -16(%rdi), %r8
+ addq %r13, %rsi
+ movq -32(%rdi), %r11
+ movq 56(%rdi), %rax
+ movl %r9d, %r14d
+ movq 8(%rdi), %rdx
+ movq -8(%rdi), %r10
+ xorq %r15, %r9
+ leaq 1859775393(%rsi,%r8), %r13
+ xorq %r12, %r9
+ roll $30, %r14d
+ xorq %rax, %r11
+ xorq %rdx, %r11
+ movl %r13d, %r8d
+ xorl %r10d, %r11d
+ roll $5, %r8d
+ movl %r13d, %r10d
+ roll %r11d
+ addq %r9, %r8
+ xorq %r14, %r13
+ movq %r11, -8(%rdi)
+ addq %rbx, %r8
+ movq -8(%rdi), %rbx
+ movq -24(%rdi), %r9
+ movq 64(%rdi), %rax
+ xorq %r15, %r13
+ movq 16(%rdi), %rdx
+ movq (%rdi), %rcx
+ leaq 1859775393(%r8,%rbx), %r11
+ xorq %rax, %r9
+ xorq %rdx, %r9
+ movl %r11d, %ebx
+ xorl %ecx, %r9d
+ roll $5, %ebx
+ roll %r9d
+ addq %r13, %rbx
+ movq %r9, (%rdi)
+ movq (%rdi), %rsi
+ addq %r12, %rbx
+ movq -16(%rdi), %r12
+ movq 72(%rdi), %r13
+ movl %r11d, %r9d
+ leaq 1859775393(%rbx,%rsi), %rcx
+ movl %r10d, %ebx
+ movq 24(%rdi), %r10
+ movq 8(%rdi), %rax
+ xorq %r13, %r12
+ roll $30, %ebx
+ movl %ecx, %esi
+ xorq %r10, %r12
+ xorq %rbx, %r11
+ roll $5, %esi
+ xorl %eax, %r12d
+ xorq %r14, %r11
+ roll $30, %r9d
+ roll %r12d
+ addq %r11, %rsi
+ movq %rcx, %rax
+ movq %r12, 8(%rdi)
+ movq 8(%rdi), %rdx
+ addq %r15, %rsi
+ movq -8(%rdi), %r11
+ movq -48(%rdi), %r13
+ movl %ecx, %r12d
+ movq 32(%rdi), %r10
+ movq 16(%rdi), %r8
+ orq %r9, %rcx
+ leaq 1859775393(%rsi,%rdx), %rsi
+ andq %rbx, %rcx
+ andq %r9, %rax
+ xorq %r13, %r11
+ orq %rcx, %rax
+ roll $30, %r12d
+ xorq %r10, %r11
+ movq %rsi, %r10
+ xorl %r8d, %r11d
+ movl %esi, %r8d
+ andq %r12, %r10
+ roll %r11d
+ roll $5, %r8d
+ movq %r11, 16(%rdi)
+ addq %rax, %r8
+ movq 16(%rdi), %r15
+ movq (%rdi), %r13
+ movq -40(%rdi), %rdx
+ addq %r14, %r8
+ movq 40(%rdi), %r14
+ movq 24(%rdi), %rcx
+ movl %esi, %r11d
+ addq %r15, %r8
+ movl $2400959708, %r15d
+ orq %r12, %rsi
+ xorq %rdx, %r13
+ addq %r15, %r8
+ andq %r9, %rsi
+ xorq %r14, %r13
+ orq %rsi, %r10
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ roll %r13d
+ roll $5, %ecx
+ movq %r13, 24(%rdi)
+ addq %r10, %rcx
+ movq 24(%rdi), %rax
+ movq 8(%rdi), %r14
+ movq -32(%rdi), %rdx
+ addq %rbx, %rcx
+ movq 48(%rdi), %rbx
+ movq 32(%rdi), %rsi
+ roll $30, %r11d
+ addq %rax, %rcx
+ movl %r8d, %r13d
+ movq %r8, %r10
+ xorq %rdx, %r14
+ addq %r15, %rcx
+ orq %r11, %r8
+ xorq %rbx, %r14
+ andq %r12, %r8
+ andq %r11, %r10
+ xorl %esi, %r14d
+ movl %ecx, %esi
+ orq %r8, %r10
+ roll $5, %esi
+ roll %r14d
+ roll $30, %r13d
+ addq %r10, %rsi
+ movq %r14, 32(%rdi)
+ movq 32(%rdi), %rax
+ addq %r9, %rsi
+ movq 16(%rdi), %r9
+ movq -24(%rdi), %rdx
+ movq 56(%rdi), %rbx
+ movq 40(%rdi), %r8
+ movl %ecx, %r14d
+ addq %rax, %rsi
+ movq %rcx, %r10
+ orq %r13, %rcx
+ xorq %rdx, %r9
+ addq %r15, %rsi
+ andq %r11, %rcx
+ xorq %rbx, %r9
+ andq %r13, %r10
+ roll $30, %r14d
+ xorl %r8d, %r9d
+ movl %esi, %r8d
+ orq %rcx, %r10
+ roll %r9d
+ roll $5, %r8d
+ movq %r9, 40(%rdi)
+ addq %r10, %r8
+ movq 40(%rdi), %rax
+ movq 24(%rdi), %r10
+ movq -16(%rdi), %rdx
+ addq %r12, %r8
+ movq 64(%rdi), %rbx
+ movq 48(%rdi), %rcx
+ movl %esi, %r9d
+ addq %rax, %r8
+ movq %rsi, %r12
+ xorq %rdx, %r10
+ addq %r15, %r8
+ xorq %rbx, %r10
+ orq %r14, %rsi
+ andq %r14, %r12
+ andq %r13, %rsi
+ xorl %ecx, %r10d
+ movl %r8d, %ecx
+ orq %rsi, %r12
+ roll %r10d
+ roll $5, %ecx
+ movq %r10, 48(%rdi)
+ addq %r12, %rcx
+ movq 48(%rdi), %rax
+ movq 32(%rdi), %r12
+ movq -8(%rdi), %rdx
+ addq %r11, %rcx
+ movq 72(%rdi), %rbx
+ movq 56(%rdi), %rsi
+ roll $30, %r9d
+ addq %rax, %rcx
+ movl %r8d, %r10d
+ movq %r8, %r11
+ xorq %rdx, %r12
+ addq %r15, %rcx
+ orq %r9, %r8
+ xorq %rbx, %r12
+ andq %r14, %r8
+ andq %r9, %r11
+ xorl %esi, %r12d
+ movl %ecx, %esi
+ orq %r8, %r11
+ roll %r12d
+ roll $5, %esi
+ roll $30, %r10d
+ movq %r12, 56(%rdi)
+ addq %r11, %rsi
+ movq 56(%rdi), %rax
+ movq 40(%rdi), %r11
+ movq (%rdi), %rdx
+ addq %r13, %rsi
+ movq -48(%rdi), %rbx
+ movq 64(%rdi), %r8
+ movq %rcx, %r13
+ addq %rax, %rsi
+ andq %r10, %r13
+ movl %ecx, %r12d
+ xorq %rdx, %r11
+ addq %r15, %rsi
+ xorq %rbx, %r11
+ xorl %r8d, %r11d
+ movl %esi, %r8d
+ roll %r11d
+ roll $5, %r8d
+ orq %r10, %rcx
+ andq %r9, %rcx
+ movq %r11, 64(%rdi)
+ movq 64(%rdi), %rax
+ orq %rcx, %r13
+ roll $30, %r12d
+ movl %esi, %r11d
+ addq %r13, %r8
+ movq 48(%rdi), %r13
+ movq 8(%rdi), %rdx
+ movq -40(%rdi), %rbx
+ addq %r14, %r8
+ movq 72(%rdi), %rcx
+ addq %rax, %r8
+ movq %rsi, %r14
+ orq %r12, %rsi
+ xorq %rdx, %r13
+ addq %r15, %r8
+ andq %r10, %rsi
+ xorq %rbx, %r13
+ andq %r12, %r14
+ roll $30, %r11d
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ orq %rsi, %r14
+ roll %r13d
+ roll $5, %ecx
+ movq %r13, 72(%rdi)
+ addq %r14, %rcx
+ movq 72(%rdi), %rax
+ movq 56(%rdi), %r14
+ movq 16(%rdi), %rdx
+ addq %r9, %rcx
+ movq -32(%rdi), %rbx
+ movq -48(%rdi), %rsi
+ movl %r8d, %r13d
+ addq %rax, %rcx
+ movq %r8, %r9
+ orq %r11, %r8
+ xorq %rdx, %r14
+ addq %r15, %rcx
+ andq %r12, %r8
+ xorq %rbx, %r14
+ andq %r11, %r9
+ xorl %esi, %r14d
+ movl %ecx, %esi
+ orq %r8, %r9
+ roll $5, %esi
+ roll %r14d
+ addq %r9, %rsi
+ movq %r14, -48(%rdi)
+ movq -48(%rdi), %rax
+ addq %r10, %rsi
+ movq 64(%rdi), %r10
+ movq 24(%rdi), %rdx
+ movq -24(%rdi), %rbx
+ movq -40(%rdi), %r8
+ movl %ecx, %r14d
+ addq %rax, %rsi
+ roll $30, %r13d
+ movq %rcx, %r9
+ xorq %rdx, %r10
+ addq %r15, %rsi
+ orq %r13, %rcx
+ xorq %rbx, %r10
+ andq %r11, %rcx
+ andq %r13, %r9
+ xorl %r8d, %r10d
+ movl %esi, %r8d
+ orq %rcx, %r9
+ roll $5, %r8d
+ roll %r10d
+ roll $30, %r14d
+ addq %r9, %r8
+ movq %r10, -40(%rdi)
+ movq -40(%rdi), %rax
+ addq %r12, %r8
+ movq 72(%rdi), %r12
+ movq 32(%rdi), %rdx
+ movq -16(%rdi), %rbx
+ movq -32(%rdi), %rcx
+ movl %esi, %r10d
+ addq %rax, %r8
+ movq %rsi, %r9
+ orq %r14, %rsi
+ xorq %rdx, %r12
+ addq %r15, %r8
+ andq %r13, %rsi
+ xorq %rbx, %r12
+ andq %r14, %r9
+ roll $30, %r10d
+ xorl %ecx, %r12d
+ movl %r8d, %ecx
+ orq %rsi, %r9
+ roll $5, %ecx
+ roll %r12d
+ addq %r9, %rcx
+ movq %r12, -32(%rdi)
+ movq -32(%rdi), %rax
+ addq %r11, %rcx
+ movq -48(%rdi), %r11
+ movq 40(%rdi), %rdx
+ movq -8(%rdi), %rbx
+ movq -24(%rdi), %rsi
+ movl %r8d, %r12d
+ addq %rax, %rcx
+ movq %r8, %r9
+ xorq %rdx, %r11
+ addq %r15, %rcx
+ xorq %rbx, %r11
+ xorl %esi, %r11d
+ orq %r10, %r8
+ andq %r10, %r9
+ andq %r14, %r8
+ movl %ecx, %esi
+ roll %r11d
+ orq %r8, %r9
+ roll $5, %esi
+ movq %r11, -24(%rdi)
+ addq %r9, %rsi
+ movq -24(%rdi), %rax
+ roll $30, %r12d
+ addq %r13, %rsi
+ movq -40(%rdi), %r13
+ movq 48(%rdi), %rdx
+ movq (%rdi), %rbx
+ movq -16(%rdi), %r8
+ movl %ecx, %r11d
+ addq %rax, %rsi
+ movq %rcx, %r9
+ orq %r12, %rcx
+ xorq %rdx, %r13
+ addq %r15, %rsi
+ andq %r10, %rcx
+ xorq %rbx, %r13
+ andq %r12, %r9
+ roll $30, %r11d
+ xorl %r8d, %r13d
+ movl %esi, %r8d
+ orq %rcx, %r9
+ roll %r13d
+ roll $5, %r8d
+ movq %r13, -16(%rdi)
+ addq %r9, %r8
+ movq -16(%rdi), %rax
+ movq -32(%rdi), %r9
+ movq 56(%rdi), %rdx
+ addq %r14, %r8
+ movq 8(%rdi), %rcx
+ movq -8(%rdi), %rbx
+ movl %esi, %r13d
+ addq %rax, %r8
+ movq %rsi, %r14
+ orq %r11, %rsi
+ xorq %rdx, %r9
+ addq %r15, %r8
+ andq %r11, %r14
+ xorq %rcx, %r9
+ xorl %ebx, %r9d
+ movl %r8d, %ebx
+ roll %r9d
+ roll $5, %ebx
+ andq %r12, %rsi
+ orq %rsi, %r14
+ movq %r9, -8(%rdi)
+ movq -8(%rdi), %rax
+ addq %r14, %rbx
+ movq -24(%rdi), %r14
+ movq 64(%rdi), %rdx
+ movq 16(%rdi), %rcx
+ addq %r10, %rbx
+ movq (%rdi), %rsi
+ roll $30, %r13d
+ addq %rax, %rbx
+ movl %r8d, %r9d
+ xorq %rdx, %r14
+ addq %r15, %rbx
+ movq %r8, %r10
+ xorq %rcx, %r14
+ orq %r13, %r8
+ andq %r13, %r10
+ andq %r11, %r8
+ xorl %esi, %r14d
+ movl %ebx, %esi
+ orq %r8, %r10
+ roll $5, %esi
+ roll %r14d
+ addq %r10, %rsi
+ movq %r14, (%rdi)
+ movq (%rdi), %rax
+ addq %r12, %rsi
+ movq -16(%rdi), %r12
+ movq 72(%rdi), %rdx
+ movq 24(%rdi), %rcx
+ movq 8(%rdi), %r8
+ roll $30, %r9d
+ addq %rax, %rsi
+ movl %ebx, %r14d
+ movq %rbx, %r10
+ xorq %rdx, %r12
+ addq %r15, %rsi
+ orq %r9, %rbx
+ xorq %rcx, %r12
+ andq %r13, %rbx
+ andq %r9, %r10
+ xorl %r8d, %r12d
+ movl %esi, %r8d
+ orq %rbx, %r10
+ roll %r12d
+ roll $5, %r8d
+ movq %r12, 8(%rdi)
+ movq 8(%rdi), %rax
+ addq %r10, %r8
+ movq -8(%rdi), %rbx
+ movq -48(%rdi), %rdx
+ addq %r11, %r8
+ movq 32(%rdi), %r11
+ movq 16(%rdi), %rcx
+ movl %esi, %r12d
+ addq %rax, %r8
+ movq %rsi, %r10
+ addq %r15, %r8
+ xorq %rdx, %rbx
+ roll $30, %r14d
+ xorq %r11, %rbx
+ orq %r14, %rsi
+ andq %r14, %r10
+ xorl %ecx, %ebx
+ andq %r9, %rsi
+ movl %r8d, %ecx
+ roll %ebx
+ orq %rsi, %r10
+ roll $5, %ecx
+ movq %rbx, 16(%rdi)
+ movq 16(%rdi), %rsi
+ addq %r10, %rcx
+ movq (%rdi), %r11
+ movq -40(%rdi), %rax
+ addq %r13, %rcx
+ movq 40(%rdi), %rdx
+ movq 24(%rdi), %r13
+ roll $30, %r12d
+ addq %rsi, %rcx
+ movl %r8d, %ebx
+ movq %r8, %r10
+ xorq %rax, %r11
+ addq %r15, %rcx
+ orq %r12, %r8
+ xorq %rdx, %r11
+ andq %r14, %r8
+ andq %r12, %r10
+ xorl %r13d, %r11d
+ movl %ecx, %r13d
+ orq %r8, %r10
+ roll %r11d
+ roll $5, %r13d
+ roll $30, %ebx
+ movq %r11, 24(%rdi)
+ addq %r10, %r13
+ movq 24(%rdi), %rsi
+ movq 8(%rdi), %r10
+ movq -32(%rdi), %rax
+ addq %r9, %r13
+ movq 48(%rdi), %rdx
+ movq 32(%rdi), %r8
+ movl %ecx, %r11d
+ addq %rsi, %r13
+ movq %rcx, %r9
+ xorq %rax, %r10
+ addq %r15, %r13
+ xorq %rdx, %r10
+ xorl %r8d, %r10d
+ movl %r13d, %r8d
+ roll %r10d
+ orq %rbx, %rcx
+ andq %rbx, %r9
+ movq %r10, 32(%rdi)
+ andq %r12, %rcx
+ movl %r13d, %r10d
+ orq %rcx, %r9
+ roll $5, %r10d
+ movq 32(%rdi), %rsi
+ addq %r9, %r10
+ roll $30, %r11d
+ movq %r13, %rcx
+ addq %r14, %r10
+ movq 16(%rdi), %r14
+ movq -24(%rdi), %rax
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r9
+ addq %rsi, %r10
+ addq %r15, %r10
+ orq %r11, %r13
+ andq %r11, %rcx
+ xorq %rax, %r14
+ andq %rbx, %r13
+ xorq %rdx, %r14
+ orq %r13, %rcx
+ xorl %r9d, %r14d
+ movl %r10d, %r9d
+ roll %r14d
+ roll $5, %r9d
+ movq %r14, 40(%rdi)
+ movq 40(%rdi), %rsi
+ addq %rcx, %r9
+ movq 24(%rdi), %r13
+ addq %r12, %r9
+ movq -16(%rdi), %r12
+ movq 64(%rdi), %rax
+ movl %r10d, %r14d
+ addq %rsi, %r9
+ movl %r8d, %esi
+ addq %r15, %r9
+ movq 48(%rdi), %r15
+ xorq %r12, %r13
+ roll $30, %esi
+ xorq %rax, %r13
+ xorq %rsi, %r10
+ xorl %r15d, %r13d
+ movl %r9d, %r15d
+ xorq %r11, %r10
+ roll $5, %r15d
+ roll %r13d
+ addq %r10, %r15
+ movq %r13, 48(%rdi)
+ movq 48(%rdi), %r10
+ addq %rbx, %r15
+ movq 32(%rdi), %rbx
+ movq -8(%rdi), %r8
+ movq 72(%rdi), %rdx
+ movq 56(%rdi), %rcx
+ roll $30, %r14d
+ addq %r10, %r15
+ movl $3395469782, %r10d
+ movl %r9d, %r13d
+ xorq %r8, %rbx
+ addq %r10, %r15
+ xorq %r14, %r9
+ xorq %rdx, %rbx
+ xorq %rsi, %r9
+ roll $30, %r13d
+ xorl %ecx, %ebx
+ movl %r15d, %ecx
+ roll %ebx
+ roll $5, %ecx
+ movq %rbx, 56(%rdi)
+ addq %r9, %rcx
+ movq 56(%rdi), %r12
+ movq 40(%rdi), %r9
+ movq (%rdi), %rax
+ addq %r11, %rcx
+ movq -48(%rdi), %r8
+ movq 64(%rdi), %r11
+ movl %r15d, %ebx
+ addq %r12, %rcx
+ xorq %r13, %r15
+ roll $30, %ebx
+ xorq %rax, %r9
+ addq %r10, %rcx
+ xorq %r14, %r15
+ xorq %r8, %r9
+ xorl %r11d, %r9d
+ movl %ecx, %r11d
+ roll %r9d
+ roll $5, %r11d
+ movq %r9, 64(%rdi)
+ addq %r15, %r11
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %r15
+ movq 8(%rdi), %r12
+ addq %rsi, %r11
+ movq -40(%rdi), %rax
+ movq 72(%rdi), %r8
+ movl %ecx, %r9d
+ addq %rdx, %r11
+ xorq %r12, %r15
+ addq %r10, %r11
+ xorq %rax, %r15
+ xorl %r8d, %r15d
+ movl %r11d, %r8d
+ roll %r15d
+ roll $5, %r8d
+ xorq %rbx, %rcx
+ xorq %r13, %rcx
+ movq %r15, 72(%rdi)
+ movq 72(%rdi), %rsi
+ addq %rcx, %r8
+ movq 56(%rdi), %r12
+ movq 16(%rdi), %rcx
+ movq -32(%rdi), %rdx
+ addq %r14, %r8
+ movq -48(%rdi), %r14
+ addq %rsi, %r8
+ roll $30, %r9d
+ movl %r11d, %r15d
+ xorq %rcx, %r12
+ addq %r10, %r8
+ xorq %r9, %r11
+ xorq %rdx, %r12
+ xorq %rbx, %r11
+ roll $30, %r15d
+ xorl %r14d, %r12d
+ movl %r8d, %r14d
+ roll $5, %r14d
+ roll %r12d
+ addq %r11, %r14
+ movq %r12, -48(%rdi)
+ movq -48(%rdi), %rax
+ addq %r13, %r14
+ movq 64(%rdi), %r13
+ movq 24(%rdi), %rsi
+ movq -24(%rdi), %rcx
+ movq -40(%rdi), %r11
+ movl %r8d, %r12d
+ addq %rax, %r14
+ xorq %r15, %r8
+ roll $30, %r12d
+ xorq %rsi, %r13
+ addq %r10, %r14
+ xorq %r9, %r8
+ xorq %rcx, %r13
+ xorl %r11d, %r13d
+ movl %r14d, %r11d
+ roll $5, %r11d
+ roll %r13d
+ addq %r8, %r11
+ movq %r13, -40(%rdi)
+ movq -40(%rdi), %rdx
+ addq %rbx, %r11
+ movq 72(%rdi), %rbx
+ movq 32(%rdi), %rax
+ movq -16(%rdi), %rsi
+ movq -32(%rdi), %r8
+ movl %r14d, %r13d
+ addq %rdx, %r11
+ xorq %rax, %rbx
+ addq %r10, %r11
+ xorq %rsi, %rbx
+ xorl %r8d, %ebx
+ xorq %r12, %r14
+ movl %r11d, %r8d
+ xorq %r15, %r14
+ roll %ebx
+ roll $5, %r8d
+ movq %rbx, -32(%rdi)
+ addq %r14, %r8
+ movq -32(%rdi), %rcx
+ movq -48(%rdi), %r14
+ movq 40(%rdi), %rdx
+ addq %r9, %r8
+ movq -8(%rdi), %rax
+ movq -24(%rdi), %r9
+ roll $30, %r13d
+ addq %rcx, %r8
+ movl %r11d, %ebx
+ xorq %r13, %r11
+ xorq %rdx, %r14
+ addq %r10, %r8
+ xorq %r12, %r11
+ xorq %rax, %r14
+ roll $30, %ebx
+ xorl %r9d, %r14d
+ movl %r8d, %r9d
+ roll $5, %r9d
+ roll %r14d
+ addq %r11, %r9
+ movq %r14, -24(%rdi)
+ movq -24(%rdi), %rsi
+ addq %r15, %r9
+ movq -40(%rdi), %r15
+ movq 48(%rdi), %rcx
+ movq (%rdi), %rdx
+ movq -16(%rdi), %r11
+ movl %r8d, %r14d
+ addq %rsi, %r9
+ xorq %rbx, %r8
+ xorq %rcx, %r15
+ addq %r10, %r9
+ xorq %r13, %r8
+ xorq %rdx, %r15
+ xorl %r11d, %r15d
+ movl %r9d, %r11d
+ roll %r15d
+ roll $5, %r11d
+ movq %r15, -16(%rdi)
+ addq %r8, %r11
+ movq -16(%rdi), %rax
+ addq %r12, %r11
+ movq -32(%rdi), %r12
+ movq 56(%rdi), %rsi
+ movq 8(%rdi), %rcx
+ movq -8(%rdi), %r8
+ movl %r9d, %r15d
+ addq %rax, %r11
+ addq %r10, %r11
+ roll $30, %r14d
+ xorq %rsi, %r12
+ xorq %rcx, %r12
+ xorq %r14, %r9
+ roll $30, %r15d
+ xorl %r8d, %r12d
+ movl %r11d, %r8d
+ xorq %rbx, %r9
+ roll $5, %r8d
+ roll %r12d
+ addq %r9, %r8
+ movq %r12, -8(%rdi)
+ movq -8(%rdi), %rdx
+ addq %r13, %r8
+ movq -24(%rdi), %r13
+ movq 64(%rdi), %rax
+ movq 16(%rdi), %rsi
+ movq (%rdi), %rcx
+ movl %r11d, %r12d
+ addq %rdx, %r8
+ xorq %r15, %r11
+ roll $30, %r12d
+ xorq %rax, %r13
+ addq %r10, %r8
+ xorq %r14, %r11
+ xorq %rsi, %r13
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ roll $5, %ecx
+ roll %r13d
+ addq %r11, %rcx
+ movq %r13, (%rdi)
+ movq (%rdi), %r9
+ addq %rbx, %rcx
+ movq -16(%rdi), %rbx
+ movq 72(%rdi), %rdx
+ movq 24(%rdi), %rax
+ movq 8(%rdi), %rsi
+ movl %r8d, %r13d
+ addq %r9, %rcx
+ xorq %r12, %r8
+ xorq %rdx, %rbx
+ addq %r10, %rcx
+ xorq %r15, %r8
+ xorq %rax, %rbx
+ xorl %esi, %ebx
+ movl %ecx, %esi
+ roll $5, %esi
+ roll %ebx
+ addq %r8, %rsi
+ movq %rbx, 8(%rdi)
+ movq 8(%rdi), %r11
+ addq %r14, %rsi
+ movq -8(%rdi), %r14
+ movq -48(%rdi), %r9
+ movq 32(%rdi), %rdx
+ movq 16(%rdi), %r8
+ roll $30, %r13d
+ addq %r11, %rsi
+ movl %ecx, %ebx
+ xorq %r13, %rcx
+ xorq %r9, %r14
+ addq %r10, %rsi
+ xorq %r12, %rcx
+ xorq %rdx, %r14
+ roll $30, %ebx
+ xorl %r8d, %r14d
+ movl %esi, %r8d
+ roll $5, %r8d
+ roll %r14d
+ addq %rcx, %r8
+ movq %r14, 16(%rdi)
+ movq 16(%rdi), %rax
+ addq %r15, %r8
+ movq (%rdi), %r15
+ movq -40(%rdi), %r11
+ movq 40(%rdi), %r9
+ movq 24(%rdi), %rcx
+ movl %esi, %r14d
+ addq %rax, %r8
+ xorq %rbx, %rsi
+ roll $30, %r14d
+ xorq %r11, %r15
+ addq %r10, %r8
+ xorq %r13, %rsi
+ xorq %r9, %r15
+ xorl %ecx, %r15d
+ movl %r8d, %ecx
+ roll %r15d
+ roll $5, %ecx
+ movq %r15, 24(%rdi)
+ addq %rsi, %rcx
+ movq 24(%rdi), %rdx
+ movq 8(%rdi), %r11
+ movq -32(%rdi), %rax
+ addq %r12, %rcx
+ movq 48(%rdi), %r12
+ movq 32(%rdi), %rsi
+ movl %r8d, %r15d
+ addq %rdx, %rcx
+ xorq %rax, %r11
+ addq %r10, %rcx
+ xorq %r12, %r11
+ xorl %esi, %r11d
+ movl %ecx, %esi
+ roll %r11d
+ movq %r11, 32(%rdi)
+ movl %ecx, %r11d
+ movq 32(%rdi), %r9
+ roll $5, %r11d
+ xorq %r14, %r8
+ movq 16(%rdi), %r12
+ xorq %rbx, %r8
+ movq -24(%rdi), %rdx
+ movq 56(%rdi), %rax
+ addq %r8, %r11
+ movq 40(%rdi), %r8
+ roll $30, %r15d
+ addq %r13, %r11
+ xorq %r15, %rcx
+ addq %r9, %r11
+ xorq %rdx, %r12
+ xorq %r14, %rcx
+ addq %r10, %r11
+ xorq %rax, %r12
+ xorl %r8d, %r12d
+ movl %r11d, %r8d
+ roll $5, %r8d
+ roll %r12d
+ addq %rcx, %r8
+ movq %r12, 40(%rdi)
+ movq 40(%rdi), %r13
+ addq %rbx, %r8
+ movq 24(%rdi), %rbx
+ movq -16(%rdi), %r9
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %rcx
+ movl %r11d, %r12d
+ addq %r13, %r8
+ movl %esi, %r13d
+ roll $30, %r12d
+ xorq %r9, %rbx
+ addq %r10, %r8
+ roll $30, %r13d
+ xorq %rdx, %rbx
+ xorq %r13, %r11
+ xorl %ecx, %ebx
+ movl %r8d, %ecx
+ xorq %r15, %r11
+ roll %ebx
+ roll $5, %ecx
+ movq %rbx, 48(%rdi)
+ addq %r11, %rcx
+ movq 48(%rdi), %rax
+ movq 32(%rdi), %r11
+ movq -8(%rdi), %rsi
+ addq %r14, %rcx
+ movq 72(%rdi), %r9
+ movq 56(%rdi), %r14
+ movl %r8d, %ebx
+ addq %rax, %rcx
+ xorq %rsi, %r11
+ addq %r10, %rcx
+ xorq %r9, %r11
+ xorl %r14d, %r11d
+ xorq %r12, %r8
+ movl %ecx, %r14d
+ xorq %r13, %r8
+ roll %r11d
+ roll $5, %r14d
+ movq %r11, 56(%rdi)
+ addq %r8, %r14
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r8
+ movq (%rdi), %rax
+ addq %r15, %r14
+ movq -48(%rdi), %r15
+ movq 64(%rdi), %rsi
+ roll $30, %ebx
+ addq %rdx, %r14
+ movl %ecx, %r11d
+ xorq %rbx, %rcx
+ xorq %rax, %r8
+ addq %r10, %r14
+ xorq %r12, %rcx
+ xorq %r15, %r8
+ roll $30, %r11d
+ xorl %esi, %r8d
+ movl %r14d, %esi
+ roll %r8d
+ roll $5, %esi
+ movq %r8, 64(%rdi)
+ movq 64(%rdi), %r9
+ addq %rcx, %rsi
+ movq 48(%rdi), %r15
+ movq 8(%rdi), %rcx
+ addq %r13, %rsi
+ movq -40(%rdi), %rdx
+ movq 72(%rdi), %rax
+ movl %r14d, %r8d
+ addq %r9, %rsi
+ xorq %r11, %r14
+ addq %r10, %rsi
+ xorq %rcx, %r15
+ xorq %rbx, %r14
+ xorq %rdx, %r15
+ movl %esi, %r13d
+ xorl %eax, %r15d
+ roll $5, %r13d
+ roll %r15d
+ addq %r14, %r13
+ movq %r15, 72(%rdi)
+ addq %r12, %r13
+ movq 72(%rdi), %r12
+ addq %r12, %r13
+ addq %r10, %r13
+ movq -88(%rdi), %r10
+ roll $30, %r8d
+ addq %r13, %r10
+ movq %r10, -88(%rdi)
+ movq -80(%rdi), %r9
+ addq %rsi, %r9
+ movq %r9, -80(%rdi)
+ movq -72(%rdi), %rcx
+ addq %r8, %rcx
+ movq %rcx, -72(%rdi)
+ movq -64(%rdi), %rdx
+ addq %r11, %rdx
+ movq %rdx, -64(%rdi)
+ movq -56(%rdi), %rax
+ addq %rbx, %rax
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ movq %rax, -56(%rdi)
+ ret
+.LFE7:
+ .size shaCompress, .-shaCompress
+ .align 16
+.globl SHA1_Update
+ .type SHA1_Update, @function
+SHA1_Update:
+.LFB5:
+ pushq %rbp
+.LCFI5:
+ movq %rsp, %rbp
+.LCFI6:
+ movq %r13, -24(%rbp)
+.LCFI7:
+ movq %r14, -16(%rbp)
+.LCFI8:
+ movl %edx, %r13d
+ movq %r15, -8(%rbp)
+.LCFI9:
+ movq %rbx, -40(%rbp)
+.LCFI10:
+ movq %rdi, %r15
+ movq %r12, -32(%rbp)
+.LCFI11:
+ subq $48, %rsp
+.LCFI12:
+ testl %edx, %edx
+ movq %rsi, %r14
+ je .L243
+ movq 64(%rdi), %rdx
+ mov %r13d, %ecx
+ leaq (%rdx,%rcx), %rax
+ movq %rax, 64(%rdi)
+ movl %edx, %eax
+ andl $63, %eax
+ movl %eax, -44(%rbp)
+ jne .L256
+.L245:
+ cmpl $63, %r13d
+ jbe .L253
+ leaq 160(%r15), %rbx
+ .align 16
+.L250:
+ movq %r14, %rsi
+ subl $64, %r13d
+ movq %rbx, %rdi
+ call shaCompress
+ addq $64, %r14
+ cmpl $63, %r13d
+ ja .L250
+.L253:
+ testl %r13d, %r13d
+ je .L243
+ mov %r13d, %edx
+ movq %r14, %rsi
+ movq %r15, %rdi
+ movq -40(%rbp), %rbx
+ movq -32(%rbp), %r12
+ movq -24(%rbp), %r13
+ movq -16(%rbp), %r14
+ movq -8(%rbp), %r15
+ leave
+ jmp memcpy@PLT
+ .align 16
+.L243:
+ movq -40(%rbp), %rbx
+ movq -32(%rbp), %r12
+ movq -24(%rbp), %r13
+ movq -16(%rbp), %r14
+ movq -8(%rbp), %r15
+ leave
+ ret
+.L256:
+ movl $64, %ebx
+ mov %eax, %edi
+ subl %eax, %ebx
+ cmpl %ebx, %r13d
+ cmovb %r13d, %ebx
+ addq %r15, %rdi
+ mov %ebx, %r12d
+ subl %ebx, %r13d
+ movq %r12, %rdx
+ addq %r12, %r14
+ call memcpy@PLT
+ addl -44(%rbp), %ebx
+ andl $63, %ebx
+ jne .L245
+ leaq 160(%r15), %rdi
+ movq %r15, %rsi
+ call shaCompress
+ jmp .L245
+.LFE5:
+ .size SHA1_Update, .-SHA1_Update
+ .section .rodata
+ .align 32
+ .type bulk_pad.0, @object
+ .size bulk_pad.0, 64
+bulk_pad.0:
+ .byte -128
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .text
+ .align 16
+.globl SHA1_End
+ .type SHA1_End, @function
+SHA1_End:
+.LFB6:
+ pushq %rbp
+.LCFI13:
+ movq %rsp, %rbp
+.LCFI14:
+ movq %r12, -24(%rbp)
+.LCFI15:
+ movq %r13, -16(%rbp)
+.LCFI16:
+ movq %rsi, %r13
+ movq %r14, -8(%rbp)
+.LCFI17:
+ movq %rbx, -32(%rbp)
+.LCFI18:
+ subq $32, %rsp
+.LCFI19:
+ movq 64(%rdi), %rbx
+ movq %rdx, %r14
+ movl $119, %edx
+ leaq bulk_pad.0(%rip), %rsi
+ movq %rdi, %r12
+ movl %ebx, %r8d
+ salq $3, %rbx
+ andl $63, %r8d
+ subl %r8d, %edx
+ andl $63, %edx
+ incl %edx
+ call SHA1_Update@PLT
+ movq %rbx, %rdi
+ movq %r12, %rsi
+ shrq $32, %rdi
+/APP
+ bswap %edi
+/NO_APP
+ movl %edi, 56(%r12)
+ leaq 160(%r12), %rdi
+/APP
+ bswap %ebx
+/NO_APP
+ movl %ebx, 60(%r12)
+ call shaCompress
+ movl 72(%r12), %esi
+ movl 80(%r12), %ebx
+ movl 88(%r12), %ecx
+ movl 96(%r12), %edx
+ movl 104(%r12), %eax
+ movq 8(%rsp), %r12
+/APP
+ bswap %ebx
+ bswap %esi
+/NO_APP
+ movl %ebx, 4(%r13)
+ movl %esi, (%r13)
+/APP
+ bswap %ecx
+ bswap %edx
+/NO_APP
+ movl %ecx, 8(%r13)
+ movl %edx, 12(%r13)
+/APP
+ bswap %eax
+/NO_APP
+ movq (%rsp), %rbx
+ movl %eax, 16(%r13)
+ cmpq $0, %r14
+ je .L133
+ movl $20, (%r14)
+.L133:
+ movq 16(%rsp), %r13
+ movq 24(%rsp), %r14
+ leave
+ ret
+.LFE6:
+ .size SHA1_End, .-SHA1_End
+ .align 16
+.globl SHA1_NewContext
+ .type SHA1_NewContext, @function
+SHA1_NewContext:
+.LFB8:
+ movl $248, %edi
+ jmp PORT_Alloc_Util@PLT
+.LFE8:
+ .size SHA1_NewContext, .-SHA1_NewContext
+ .align 16
+.globl SHA1_DestroyContext
+ .type SHA1_DestroyContext, @function
+SHA1_DestroyContext:
+.LFB9:
+ pushq %rbp
+.LCFI20:
+ movl $248, %edx
+ movq %rsp, %rbp
+.LCFI21:
+ movq %rbx, -16(%rbp)
+.LCFI22:
+ movq %r12, -8(%rbp)
+.LCFI23:
+ movl %esi, %ebx
+ subq $16, %rsp
+.LCFI24:
+ xorl %esi, %esi
+ movq %rdi, %r12
+ call memset@PLT
+ testl %ebx, %ebx
+ jne .L268
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ ret
+ .align 16
+.L268:
+ movq %r12, %rdi
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ jmp PORT_Free_Util@PLT
+.LFE9:
+ .size SHA1_DestroyContext, .-SHA1_DestroyContext
+ .align 16
+.globl SHA1_HashBuf
+ .type SHA1_HashBuf, @function
+SHA1_HashBuf:
+.LFB10:
+ pushq %rbp
+.LCFI25:
+ movq %rsp, %rbp
+.LCFI26:
+ movq %rbx, -32(%rbp)
+.LCFI27:
+ leaq -288(%rbp), %rbx
+ movq %r12, -24(%rbp)
+.LCFI28:
+ movq %r13, -16(%rbp)
+.LCFI29:
+ movq %r14, -8(%rbp)
+.LCFI30:
+ movq %rsi, %r13
+ subq $304, %rsp
+.LCFI31:
+ movq %rdi, %r14
+ movl %edx, %r12d
+ movq %rbx, %rdi
+ call SHA1_Begin@PLT
+ movl %r12d, %edx
+ movq %r13, %rsi
+ movq %rbx, %rdi
+ call SHA1_Update@PLT
+ leaq -292(%rbp), %rdx
+ movq %r14, %rsi
+ movq %rbx, %rdi
+ movl $20, %ecx
+ call SHA1_End@PLT
+ movq -32(%rbp), %rbx
+ movq -24(%rbp), %r12
+ xorl %eax, %eax
+ movq -16(%rbp), %r13
+ movq -8(%rbp), %r14
+ leave
+ ret
+.LFE10:
+ .size SHA1_HashBuf, .-SHA1_HashBuf
+ .align 16
+.globl SHA1_Hash
+ .type SHA1_Hash, @function
+SHA1_Hash:
+.LFB11:
+ pushq %rbp
+.LCFI32:
+ movq %rsp, %rbp
+.LCFI33:
+ movq %rbx, -16(%rbp)
+.LCFI34:
+ movq %r12, -8(%rbp)
+.LCFI35:
+ movq %rsi, %rbx
+ subq $16, %rsp
+.LCFI36:
+ movq %rdi, %r12
+ movq %rsi, %rdi
+ call strlen@PLT
+ movq %rbx, %rsi
+ movq %r12, %rdi
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ movl %eax, %edx
+ jmp SHA1_HashBuf@PLT
+.LFE11:
+ .size SHA1_Hash, .-SHA1_Hash
+ .align 16
+.globl SHA1_FlattenSize
+ .type SHA1_FlattenSize, @function
+SHA1_FlattenSize:
+.LFB12:
+ movl $248, %eax
+ ret
+.LFE12:
+ .size SHA1_FlattenSize, .-SHA1_FlattenSize
+ .align 16
+.globl SHA1_Flatten
+ .type SHA1_Flatten, @function
+SHA1_Flatten:
+.LFB13:
+ pushq %rbp
+.LCFI37:
+ movq %rsi, %rax
+ movl $248, %edx
+ movq %rdi, %rsi
+ movq %rax, %rdi
+ movq %rsp, %rbp
+.LCFI38:
+ call memcpy@PLT
+ leave
+ xorl %eax, %eax
+ ret
+.LFE13:
+ .size SHA1_Flatten, .-SHA1_Flatten
+ .align 16
+.globl SHA1_Resurrect
+ .type SHA1_Resurrect, @function
+SHA1_Resurrect:
+.LFB14:
+ pushq %rbp
+.LCFI39:
+ movq %rsp, %rbp
+.LCFI40:
+ movq %rbx, -16(%rbp)
+.LCFI41:
+ movq %r12, -8(%rbp)
+.LCFI42:
+ subq $16, %rsp
+.LCFI43:
+ movq %rdi, %r12
+ call SHA1_NewContext@PLT
+ movq %rax, %rbx
+ xorl %eax, %eax
+ testq %rbx, %rbx
+ je .L273
+ movl $248, %edx
+ movq %r12, %rsi
+ movq %rbx, %rdi
+ call memcpy@PLT
+ movq %rbx, %rax
+.L273:
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ ret
+.LFE14:
+ .size SHA1_Resurrect, .-SHA1_Resurrect
+ .align 16
+.globl SHA1_Clone
+ .type SHA1_Clone, @function
+SHA1_Clone:
+.LFB15:
+ movl $248, %edx
+ jmp memcpy@PLT
+.LFE15:
+ .size SHA1_Clone, .-SHA1_Clone
+ .align 16
+.globl SHA1_TraceState
+ .type SHA1_TraceState, @function
+SHA1_TraceState:
+.LFB16:
+ movl $-5992, %edi
+ jmp PORT_SetError_Util@PLT
+.LFE16:
+ .size SHA1_TraceState, .-SHA1_TraceState
+ .align 16
+.globl SHA1_EndRaw
+ .type SHA1_EndRaw, @function
+SHA1_EndRaw:
+.LFB50:
+ movq 72(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, (%rsi)
+ movq 80(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 4(%rsi)
+ movq 88(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 8(%rsi)
+ movq 96(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 12(%rsi)
+ movq 104(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ testq %rdx, %rdx
+ movl %eax, 16(%rsi)
+ je .L14
+ movl $20, (%rdx)
+.L14:
+ rep
+ ret
+.LFE50:
+ .size SHA1_EndRaw, .-SHA1_EndRaw
diff --git a/security/nss/lib/freebl/sha1-armv8.c b/security/nss/lib/freebl/sha1-armv8.c
new file mode 100644
index 0000000000..63e4dad33e
--- /dev/null
+++ b/security/nss/lib/freebl/sha1-armv8.c
@@ -0,0 +1,264 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef USE_HW_SHA1
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error "Compiler option is invalid"
+#endif
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <arm_neon.h>
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11
+#else
+#define H2X 0
+#endif
+
+static void shaCompress(SHA_HW_t *X, const PRUint32 *datain);
+
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
+/*
+ * SHA: Add data to context.
+ */
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ unsigned int lenB;
+ unsigned int togo;
+
+ if (!len) {
+ return;
+ }
+
+ /* accumulate the byte count. */
+ lenB = (unsigned int)(ctx->size) & 63U;
+
+ ctx->size += len;
+
+ /*
+ * Read the data into W and process blocks as they get full
+ */
+ if (lenB > 0) {
+ togo = 64U - lenB;
+ if (len < togo) {
+ togo = len;
+ }
+ memcpy(ctx->u.b + lenB, dataIn, togo);
+ len -= togo;
+ dataIn += togo;
+ lenB = (lenB + togo) & 63U;
+ if (!lenB) {
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+ }
+ }
+
+ while (len >= 64U) {
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+ dataIn += 64U;
+ }
+
+ if (len) {
+ memcpy(ctx->u.b, dataIn, len);
+ }
+}
+
+/*
+ * SHA: Compression function, unrolled.
+ */
+static void
+shaCompress(SHA_HW_t *X, const PRUint32 *inbuf)
+{
+#define XH(n) X[n - H2X]
+
+ const uint32x4_t K0 = vdupq_n_u32(0x5a827999);
+ const uint32x4_t K1 = vdupq_n_u32(0x6ed9eba1);
+ const uint32x4_t K2 = vdupq_n_u32(0x8f1bbcdc);
+ const uint32x4_t K3 = vdupq_n_u32(0xca62c1d6);
+
+ uint32x4_t abcd = vld1q_u32(&XH(0));
+ PRUint32 e = XH(4);
+
+ const uint32x4_t origABCD = abcd;
+ const PRUint32 origE = e;
+
+ uint32x4_t w0 = vld1q_u32(inbuf);
+ uint32x4_t w1 = vld1q_u32(inbuf + 4);
+ uint32x4_t w2 = vld1q_u32(inbuf + 8);
+ uint32x4_t w3 = vld1q_u32(inbuf + 12);
+
+ w0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w0)));
+ w1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w1)));
+ w2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w2)));
+ w3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(w3)));
+
+ uint32x4_t t0 = vaddq_u32(w0, K0);
+ uint32x4_t t1 = vaddq_u32(w1, K0);
+
+ PRUint32 tmpE;
+
+ /*
+ * Using the following ARM instructions to accelerate SHA1
+ *
+ * sha1c for round 0 - 20
+ * sha1p for round 20 - 40
+ * sha1m for round 40 - 60
+ * sha1p for round 60 - 80
+ * sha1su0 and shasu1 for message schedule
+ * sha1h for rotate left 30
+ */
+
+ /* Round 0-3 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K0);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 4-7 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K0);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 8-11 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K0);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 12-15 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K1);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 16-19 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1cq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K1);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 20-23 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K1);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 24-27 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K1);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 28-31 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K1);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 32-35 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K2);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 36-39 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K2);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 40-43 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K2);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 44-47 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K2);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 48-51 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K2);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 52-55 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K3);
+ w0 = vsha1su1q_u32(w0, w3);
+ w1 = vsha1su0q_u32(w1, w2, w3);
+
+ /* Round 56-59 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1mq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w0, K3);
+ w1 = vsha1su1q_u32(w1, w0);
+ w2 = vsha1su0q_u32(w2, w3, w0);
+
+ /* Round 60-63 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w1, K3);
+ w2 = vsha1su1q_u32(w2, w1);
+ w3 = vsha1su0q_u32(w3, w0, w1);
+
+ /* Round 64-67 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+ t0 = vaddq_u32(w2, K3);
+ w3 = vsha1su1q_u32(w3, w2);
+ w0 = vsha1su0q_u32(w0, w1, w2);
+
+ /* Round 68-71 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+ t1 = vaddq_u32(w3, K3);
+ w0 = vsha1su1q_u32(w0, w3);
+
+ /* Round 72-75 */
+ tmpE = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, e, t0);
+
+ /* Round 76-79 */
+ e = vsha1h_u32(vgetq_lane_u32(abcd, 0));
+ abcd = vsha1pq_u32(abcd, tmpE, t1);
+
+ e += origE;
+ abcd = vaddq_u32(origABCD, abcd);
+
+ vst1q_u32(&XH(0), abcd);
+ XH(4) = e;
+}
+
+#endif /* USE_HW_SHA1 */
diff --git a/security/nss/lib/freebl/sha256-armv8.c b/security/nss/lib/freebl/sha256-armv8.c
new file mode 100644
index 0000000000..17fe126c4c
--- /dev/null
+++ b/security/nss/lib/freebl/sha256-armv8.c
@@ -0,0 +1,203 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef USE_HW_SHA2
+
+#ifndef __ARM_FEATURE_CRYPTO
+#error "Compiler option is invalid"
+#endif
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#include "prtypes.h" /* for PRUintXX */
+#include "prlong.h"
+#include "blapi.h"
+#include "sha256.h"
+
+#include <arm_neon.h>
+
+/* SHA-256 constants, K256. */
+static const PRUint32 __attribute__((aligned(16))) K256[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+#define ROUND(n, a, b, c, d) \
+ { \
+ uint32x4_t t = vaddq_u32(a, k##n); \
+ uint32x4_t wt = w0; \
+ w0 = vsha256hq_u32(w0, w1, t); \
+ w1 = vsha256h2q_u32(w1, wt, t); \
+ if (n < 12) { \
+ a = vsha256su0q_u32(a, b); \
+ a = vsha256su1q_u32(a, c, d); \
+ } \
+ }
+
+void
+SHA256_Compress_Native(SHA256Context *ctx)
+{
+ const uint32x4_t k0 = vld1q_u32(K256);
+ const uint32x4_t k1 = vld1q_u32(K256 + 4);
+ const uint32x4_t k2 = vld1q_u32(K256 + 8);
+ const uint32x4_t k3 = vld1q_u32(K256 + 12);
+ const uint32x4_t k4 = vld1q_u32(K256 + 16);
+ const uint32x4_t k5 = vld1q_u32(K256 + 20);
+ const uint32x4_t k6 = vld1q_u32(K256 + 24);
+ const uint32x4_t k7 = vld1q_u32(K256 + 28);
+ const uint32x4_t k8 = vld1q_u32(K256 + 32);
+ const uint32x4_t k9 = vld1q_u32(K256 + 36);
+ const uint32x4_t k10 = vld1q_u32(K256 + 40);
+ const uint32x4_t k11 = vld1q_u32(K256 + 44);
+ const uint32x4_t k12 = vld1q_u32(K256 + 48);
+ const uint32x4_t k13 = vld1q_u32(K256 + 52);
+ const uint32x4_t k14 = vld1q_u32(K256 + 56);
+ const uint32x4_t k15 = vld1q_u32(K256 + 60);
+
+ uint32x4_t h0 = vld1q_u32(ctx->h);
+ uint32x4_t h1 = vld1q_u32(ctx->h + 4);
+
+ unsigned char *input = ctx->u.b;
+
+ uint32x4_t a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input)));
+ uint32x4_t b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16)));
+ uint32x4_t c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32)));
+ uint32x4_t d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48)));
+
+ uint32x4_t w0 = h0;
+ uint32x4_t w1 = h1;
+
+ ROUND(0, a, b, c, d)
+ ROUND(1, b, c, d, a)
+ ROUND(2, c, d, a, b)
+ ROUND(3, d, a, b, c)
+ ROUND(4, a, b, c, d)
+ ROUND(5, b, c, d, a)
+ ROUND(6, c, d, a, b)
+ ROUND(7, d, a, b, c)
+ ROUND(8, a, b, c, d)
+ ROUND(9, b, c, d, a)
+ ROUND(10, c, d, a, b)
+ ROUND(11, d, a, b, c)
+ ROUND(12, a, b, c, d)
+ ROUND(13, b, c, d, a)
+ ROUND(14, c, d, a, b)
+ ROUND(15, d, a, b, c)
+
+ h0 = vaddq_u32(h0, w0);
+ h1 = vaddq_u32(h1, w1);
+
+ vst1q_u32(ctx->h, h0);
+ vst1q_u32(ctx->h + 4, h1);
+}
+
+void
+SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ const uint32x4_t k0 = vld1q_u32(K256);
+ const uint32x4_t k1 = vld1q_u32(K256 + 4);
+ const uint32x4_t k2 = vld1q_u32(K256 + 8);
+ const uint32x4_t k3 = vld1q_u32(K256 + 12);
+ const uint32x4_t k4 = vld1q_u32(K256 + 16);
+ const uint32x4_t k5 = vld1q_u32(K256 + 20);
+ const uint32x4_t k6 = vld1q_u32(K256 + 24);
+ const uint32x4_t k7 = vld1q_u32(K256 + 28);
+ const uint32x4_t k8 = vld1q_u32(K256 + 32);
+ const uint32x4_t k9 = vld1q_u32(K256 + 36);
+ const uint32x4_t k10 = vld1q_u32(K256 + 40);
+ const uint32x4_t k11 = vld1q_u32(K256 + 44);
+ const uint32x4_t k12 = vld1q_u32(K256 + 48);
+ const uint32x4_t k13 = vld1q_u32(K256 + 52);
+ const uint32x4_t k14 = vld1q_u32(K256 + 56);
+ const uint32x4_t k15 = vld1q_u32(K256 + 60);
+
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ if (!inputLen) {
+ return;
+ }
+
+ /* Add inputLen into the count of bytes processed, before processing */
+ if ((ctx->sizeLo += inputLen) < inputLen) {
+ ctx->sizeHi++;
+ }
+
+ /* if data already in buffer, attemp to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo) {
+ todo = inputLen;
+ }
+ memcpy(ctx->u.b + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA256_BLOCK_LENGTH) {
+ SHA256_Compress_Native(ctx);
+ }
+ }
+
+ uint32x4_t h0 = vld1q_u32(ctx->h);
+ uint32x4_t h1 = vld1q_u32(ctx->h + 4);
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA256_BLOCK_LENGTH) {
+ uint32x4_t a, b, c, d;
+ a = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input)));
+ b = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 16)));
+ c = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 32)));
+ d = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(input + 48)));
+ input += SHA256_BLOCK_LENGTH;
+ inputLen -= SHA256_BLOCK_LENGTH;
+
+ uint32x4_t w0 = h0;
+ uint32x4_t w1 = h1;
+
+ ROUND(0, a, b, c, d)
+ ROUND(1, b, c, d, a)
+ ROUND(2, c, d, a, b)
+ ROUND(3, d, a, b, c)
+ ROUND(4, a, b, c, d)
+ ROUND(5, b, c, d, a)
+ ROUND(6, c, d, a, b)
+ ROUND(7, d, a, b, c)
+ ROUND(8, a, b, c, d)
+ ROUND(9, b, c, d, a)
+ ROUND(10, c, d, a, b)
+ ROUND(11, d, a, b, c)
+ ROUND(12, a, b, c, d)
+ ROUND(13, b, c, d, a)
+ ROUND(14, c, d, a, b)
+ ROUND(15, d, a, b, c)
+
+ h0 = vaddq_u32(h0, w0);
+ h1 = vaddq_u32(h1, w1);
+ }
+
+ vst1q_u32(ctx->h, h0);
+ vst1q_u32(ctx->h + 4, h1);
+
+ /* if data left over, fill it into buffer */
+ if (inputLen) {
+ memcpy(ctx->u.b, input, inputLen);
+ }
+}
+
+#endif /* USE_HW_SHA2 */
diff --git a/security/nss/lib/freebl/sha256-x86.c b/security/nss/lib/freebl/sha256-x86.c
new file mode 100644
index 0000000000..3aa30e9ccc
--- /dev/null
+++ b/security/nss/lib/freebl/sha256-x86.c
@@ -0,0 +1,236 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef USE_HW_SHA2
+
+#include <immintrin.h>
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "prcpucfg.h"
+#include "prtypes.h" /* for PRUintXX */
+#include "prlong.h"
+#include "blapi.h"
+#include "sha256.h"
+
+/* SHA-256 constants, K256. */
+pre_align static const PRUint32 K256[64] post_align = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+#define ROUND(n, a, b, c, d) \
+ { \
+ __m128i t = _mm_add_epi32(a, k##n); \
+ w1 = _mm_sha256rnds2_epu32(w1, w0, t); \
+ t = _mm_shuffle_epi32(t, 0x0e); \
+ w0 = _mm_sha256rnds2_epu32(w0, w1, t); \
+ if (n < 12) { \
+ a = _mm_sha256msg1_epu32(a, b); \
+ a = _mm_add_epi32(a, _mm_alignr_epi8(d, c, 4)); \
+ a = _mm_sha256msg2_epu32(a, d); \
+ } \
+ }
+
+void
+SHA256_Compress_Native(SHA256Context *ctx)
+{
+ __m128i h0, h1, th;
+ __m128i a, b, c, d;
+ __m128i w0, w1;
+ const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+
+ const __m128i *K = (__m128i *)K256;
+ const __m128i k0 = _mm_load_si128(K);
+ const __m128i k1 = _mm_load_si128(K + 1);
+ const __m128i k2 = _mm_load_si128(K + 2);
+ const __m128i k3 = _mm_load_si128(K + 3);
+ const __m128i k4 = _mm_load_si128(K + 4);
+ const __m128i k5 = _mm_load_si128(K + 5);
+ const __m128i k6 = _mm_load_si128(K + 6);
+ const __m128i k7 = _mm_load_si128(K + 7);
+ const __m128i k8 = _mm_load_si128(K + 8);
+ const __m128i k9 = _mm_load_si128(K + 9);
+ const __m128i k10 = _mm_load_si128(K + 10);
+ const __m128i k11 = _mm_load_si128(K + 11);
+ const __m128i k12 = _mm_load_si128(K + 12);
+ const __m128i k13 = _mm_load_si128(K + 13);
+ const __m128i k14 = _mm_load_si128(K + 14);
+ const __m128i k15 = _mm_load_si128(K + 15);
+
+ const __m128i *input = (__m128i *)ctx->u.b;
+
+ h0 = _mm_loadu_si128((__m128i *)(ctx->h));
+ h1 = _mm_loadu_si128((__m128i *)(ctx->h + 4));
+
+ /* H0123:4567 -> H01256:H2367 */
+ th = _mm_shuffle_epi32(h0, 0xb1);
+ h1 = _mm_shuffle_epi32(h1, 0x1b);
+ h0 = _mm_alignr_epi8(th, h1, 8);
+ h1 = _mm_blend_epi16(h1, th, 0xf0);
+
+ a = _mm_shuffle_epi8(_mm_loadu_si128(input), shuffle);
+ b = _mm_shuffle_epi8(_mm_loadu_si128(input + 1), shuffle);
+ c = _mm_shuffle_epi8(_mm_loadu_si128(input + 2), shuffle);
+ d = _mm_shuffle_epi8(_mm_loadu_si128(input + 3), shuffle);
+
+ w0 = h0;
+ w1 = h1;
+
+ ROUND(0, a, b, c, d)
+ ROUND(1, b, c, d, a)
+ ROUND(2, c, d, a, b)
+ ROUND(3, d, a, b, c)
+ ROUND(4, a, b, c, d)
+ ROUND(5, b, c, d, a)
+ ROUND(6, c, d, a, b)
+ ROUND(7, d, a, b, c)
+ ROUND(8, a, b, c, d)
+ ROUND(9, b, c, d, a)
+ ROUND(10, c, d, a, b)
+ ROUND(11, d, a, b, c)
+ ROUND(12, a, b, c, d)
+ ROUND(13, b, c, d, a)
+ ROUND(14, c, d, a, b)
+ ROUND(15, d, a, b, c)
+
+ h0 = _mm_add_epi32(h0, w0);
+ h1 = _mm_add_epi32(h1, w1);
+
+ /* H0145:2367 -> H0123:4567 */
+ th = _mm_shuffle_epi32(h0, 0x1b);
+ h1 = _mm_shuffle_epi32(h1, 0xb1);
+ h0 = _mm_blend_epi16(th, h1, 0xf0);
+ h1 = _mm_alignr_epi8(h1, th, 8);
+
+ _mm_storeu_si128((__m128i *)ctx->h, h0);
+ _mm_storeu_si128((__m128i *)(ctx->h + 4), h1);
+}
+
+void
+SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ __m128i h0, h1, th;
+ const __m128i shuffle = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+
+ const __m128i *K = (__m128i *)K256;
+ const __m128i k0 = _mm_load_si128(K);
+ const __m128i k1 = _mm_load_si128(K + 1);
+ const __m128i k2 = _mm_load_si128(K + 2);
+ const __m128i k3 = _mm_load_si128(K + 3);
+ const __m128i k4 = _mm_load_si128(K + 4);
+ const __m128i k5 = _mm_load_si128(K + 5);
+ const __m128i k6 = _mm_load_si128(K + 6);
+ const __m128i k7 = _mm_load_si128(K + 7);
+ const __m128i k8 = _mm_load_si128(K + 8);
+ const __m128i k9 = _mm_load_si128(K + 9);
+ const __m128i k10 = _mm_load_si128(K + 10);
+ const __m128i k11 = _mm_load_si128(K + 11);
+ const __m128i k12 = _mm_load_si128(K + 12);
+ const __m128i k13 = _mm_load_si128(K + 13);
+ const __m128i k14 = _mm_load_si128(K + 14);
+ const __m128i k15 = _mm_load_si128(K + 15);
+
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ if (!inputLen) {
+ return;
+ }
+
+ /* Add inputLen into the count of bytes processed, before processing */
+ if ((ctx->sizeLo += inputLen) < inputLen) {
+ ctx->sizeHi++;
+ }
+
+ /* if data already in buffer, attempt to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo) {
+ todo = inputLen;
+ }
+ memcpy(ctx->u.b + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA256_BLOCK_LENGTH) {
+ SHA256_Compress_Native(ctx);
+ }
+ }
+
+ h0 = _mm_loadu_si128((__m128i *)(ctx->h));
+ h1 = _mm_loadu_si128((__m128i *)(ctx->h + 4));
+
+ /* H0123:4567 -> H01256:H2367 */
+ th = _mm_shuffle_epi32(h0, 0xb1);
+ h1 = _mm_shuffle_epi32(h1, 0x1b);
+ h0 = _mm_alignr_epi8(th, h1, 8);
+ h1 = _mm_blend_epi16(h1, th, 0xf0);
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA256_BLOCK_LENGTH) {
+ __m128i a, b, c, d;
+ __m128i w0, w1;
+ a = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)input), shuffle);
+ b = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 16)), shuffle);
+ c = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 32)), shuffle);
+ d = _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(input + 48)), shuffle);
+ input += SHA256_BLOCK_LENGTH;
+ inputLen -= SHA256_BLOCK_LENGTH;
+
+ w0 = h0;
+ w1 = h1;
+
+ ROUND(0, a, b, c, d)
+ ROUND(1, b, c, d, a)
+ ROUND(2, c, d, a, b)
+ ROUND(3, d, a, b, c)
+ ROUND(4, a, b, c, d)
+ ROUND(5, b, c, d, a)
+ ROUND(6, c, d, a, b)
+ ROUND(7, d, a, b, c)
+ ROUND(8, a, b, c, d)
+ ROUND(9, b, c, d, a)
+ ROUND(10, c, d, a, b)
+ ROUND(11, d, a, b, c)
+ ROUND(12, a, b, c, d)
+ ROUND(13, b, c, d, a)
+ ROUND(14, c, d, a, b)
+ ROUND(15, d, a, b, c)
+
+ h0 = _mm_add_epi32(h0, w0);
+ h1 = _mm_add_epi32(h1, w1);
+ }
+
+ // H01234567 -> H01256 and H2367
+ th = _mm_shuffle_epi32(h0, 0x1b);
+ h1 = _mm_shuffle_epi32(h1, 0xb1);
+ h0 = _mm_blend_epi16(th, h1, 0xf0);
+ h1 = _mm_alignr_epi8(h1, th, 8);
+
+ _mm_storeu_si128((__m128i *)ctx->h, h0);
+ _mm_storeu_si128((__m128i *)(ctx->h + 4), h1);
+
+ /* if data left over, fill it into buffer */
+ if (inputLen) {
+ memcpy(ctx->u.b, input, inputLen);
+ }
+}
+
+#endif /* USE_HW_SHA2 */
diff --git a/security/nss/lib/freebl/sha256.h b/security/nss/lib/freebl/sha256.h
new file mode 100644
index 0000000000..645118b07e
--- /dev/null
+++ b/security/nss/lib/freebl/sha256.h
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_256_H_
+#define _SHA_256_H_
+
+#include "prtypes.h"
+
+struct SHA256ContextStr;
+
+typedef void (*sha256_compress_t)(struct SHA256ContextStr *);
+typedef void (*sha256_update_t)(struct SHA256ContextStr *, const unsigned char *,
+ unsigned int);
+
+struct SHA256ContextStr {
+ union {
+ PRUint32 w[64]; /* message schedule, input buffer, plus 48 words */
+ PRUint8 b[256];
+ } u;
+ PRUint32 h[8]; /* 8 state variables */
+ PRUint32 sizeHi, sizeLo; /* 64-bit count of hashed bytes. */
+ sha256_compress_t compress;
+ sha256_update_t update;
+};
+
+#endif /* _SHA_256_H_ */
diff --git a/security/nss/lib/freebl/sha3.c b/security/nss/lib/freebl/sha3.c
new file mode 100644
index 0000000000..25ba169003
--- /dev/null
+++ b/security/nss/lib/freebl/sha3.c
@@ -0,0 +1,288 @@
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prtypes.h" /* for PRUintXX */
+#include "secport.h" /* for PORT_XXX */
+#include "blapi.h"
+#include "blapii.h"
+#include "blapit.h"
+#include "secerr.h"
+#include "Hacl_Hash_SHA3.h"
+
+struct SHA3ContextStr {
+ Hacl_Streaming_Keccak_state *st;
+};
+
+SHA3_224Context *
+SHA3_224_NewContext()
+{
+ SHA3_224Context *ctx = PORT_New(SHA3_224Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_SHA3_224);
+ return ctx;
+}
+
+SHA3_256Context *
+SHA3_256_NewContext()
+{
+ SHA3_256Context *ctx = PORT_New(SHA3_256Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_SHA3_256);
+ return ctx;
+}
+
+SHA3_384Context *
+SHA3_384_NewContext()
+{
+ SHA3_384Context *ctx = PORT_New(SHA3_384Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_SHA3_384);
+ return ctx;
+}
+
+SHA3_512Context *
+SHA3_512_NewContext()
+{
+ SHA3_512Context *ctx = PORT_New(SHA3_512Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_SHA3_512);
+ return ctx;
+}
+
+void
+SHA3_224_DestroyContext(SHA3_224Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA3_256_DestroyContext(SHA3_256Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA3_384_DestroyContext(SHA3_384Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA3_512_DestroyContext(SHA3_512Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+unsigned int
+SHA3_224_FlattenSize(SHA3_224Context *ctx)
+{
+ return 0;
+}
+
+unsigned int
+SHA3_256_FlattenSize(SHA3_256Context *ctx)
+{
+ return 0;
+}
+
+unsigned int
+SHA3_384_FlattenSize(SHA3_384Context *ctx)
+{
+ return 0;
+}
+
+unsigned int
+SHA3_512_FlattenSize(SHA3_512Context *ctx)
+{
+ return 0;
+}
+
+void
+SHA3_224_Begin(SHA3_224Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHA3_256_Begin(SHA3_256Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHA3_384_Begin(SHA3_384Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHA3_512_Begin(SHA3_512Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHA3_224_Update(SHA3_224Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHA3_256_Update(SHA3_256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHA3_384_Update(SHA3_384Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHA3_512_Update(SHA3_512Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHA3_224_End(SHA3_224Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ uint8_t sha3_digest[SHA3_224_LENGTH] = { 0 };
+ Hacl_Streaming_Keccak_finish(ctx->st, sha3_digest);
+
+ unsigned int len = PR_MIN(SHA3_224_LENGTH, maxDigestLen);
+ memcpy(digest, sha3_digest, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+void
+SHA3_256_End(SHA3_256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ uint8_t sha3_digest[SHA3_256_LENGTH] = { 0 };
+ Hacl_Streaming_Keccak_finish(ctx->st, sha3_digest);
+
+ unsigned int len = PR_MIN(SHA3_256_LENGTH, maxDigestLen);
+ memcpy(digest, sha3_digest, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+void
+SHA3_384_End(SHA3_384Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ uint8_t sha3_digest[SHA3_384_LENGTH] = { 0 };
+ Hacl_Streaming_Keccak_finish(ctx->st, sha3_digest);
+
+ unsigned int len = PR_MIN(SHA3_384_LENGTH, maxDigestLen);
+ memcpy(digest, sha3_digest, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+void
+SHA3_512_End(SHA3_512Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ uint8_t sha3_digest[SHA3_512_LENGTH] = { 0 };
+ Hacl_Streaming_Keccak_finish(ctx->st, sha3_digest);
+
+ unsigned int len = PR_MIN(SHA3_512_LENGTH, maxDigestLen);
+ memcpy(digest, sha3_digest, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+SECStatus
+SHA3_224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA3_224Context *ctx = SHA3_224_NewContext();
+ SHA3_224_Begin(ctx);
+ SHA3_224_Update(ctx, src, src_length);
+ SHA3_224_End(ctx, dest, NULL, SHA3_224_LENGTH);
+ SHA3_224_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHA3_256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA3_256Context *ctx = SHA3_256_NewContext();
+ SHA3_256_Begin(ctx);
+ SHA3_256_Update(ctx, src, src_length);
+ SHA3_256_End(ctx, dest, NULL, SHA3_256_LENGTH);
+ SHA3_256_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHA3_384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA3_384Context *ctx = SHA3_384_NewContext();
+ SHA3_384_Begin(ctx);
+ SHA3_384_Update(ctx, src, src_length);
+ SHA3_384_End(ctx, dest, NULL, SHA3_384_LENGTH);
+ SHA3_384_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHA3_512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA3_512Context *ctx = SHA3_512_NewContext();
+ SHA3_512_Begin(ctx);
+ SHA3_512_Update(ctx, src, src_length);
+ SHA3_512_End(ctx, dest, NULL, SHA3_512_LENGTH);
+ SHA3_512_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHA3_224_Hash(unsigned char *dest, const char *src)
+{
+ return SHA3_224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+SHA3_256_Hash(unsigned char *dest, const char *src)
+{
+ return SHA3_256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+SHA3_384_Hash(unsigned char *dest, const char *src)
+{
+ return SHA3_384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+SHA3_512_Hash(unsigned char *dest, const char *src)
+{
+ return SHA3_512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
diff --git a/security/nss/lib/freebl/sha512-p8.s b/security/nss/lib/freebl/sha512-p8.s
new file mode 100644
index 0000000000..d84ec04780
--- /dev/null
+++ b/security/nss/lib/freebl/sha512-p8.s
@@ -0,0 +1,851 @@
+# Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+# See the full LICENSE under scripts/.
+
+.machine "any"
+.abiversion 2
+.text
+
+.globl sha512_block_p8
+.type sha512_block_p8,@function
+.align 6
+sha512_block_p8:
+.localentry sha512_block_p8,0
+
+ stdu 1,-384(1)
+ mflr 8
+ li 10,207
+ li 11,223
+ stvx 24,10,1
+ addi 10,10,32
+ li 12,-1
+ stvx 25,11,1
+ addi 11,11,32
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ li 11,-4096+255
+ stw 12,332(1)
+ li 10,0x10
+ std 26,336(1)
+ li 26,0x20
+ std 27,344(1)
+ li 27,0x30
+ std 28,352(1)
+ li 28,0x40
+ std 29,360(1)
+ li 29,0x50
+ std 30,368(1)
+ li 30,0x60
+ std 31,376(1)
+ li 31,0x70
+ std 8,400(1)
+ or 11,11,11
+
+ bl .LPICmeup
+ addi 11,1,79
+ li 7,8
+ lvsl 31,0,7
+ vspltisb 28,0x0f
+ vxor 31,31,28
+ .long 0x7C001E99
+ .long 0x7C4A1E99
+ .long 0x7C9A1E99
+ vsldoi 1,0,0,8
+ .long 0x7CDB1E99
+ vsldoi 3,2,2,8
+ vsldoi 5,4,4,8
+ vsldoi 7,6,6,8
+ li 0,4
+ b .Loop
+.align 5
+.Loop:
+ lvx 28,0,6
+ .long 0x7D002699
+ addi 4,4,16
+ mr 7,6
+ stvx 0,0,11
+ stvx 1,10,11
+ stvx 2,26,11
+ stvx 3,27,11
+ stvx 4,28,11
+ stvx 5,29,11
+ stvx 6,30,11
+ stvx 7,31,11
+ .long 0x10E7E0C0
+ lvx 28,10,6
+ vperm 8,8,8,31
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7D402699
+ addi 4,4,16
+ vsldoi 9,8,8,8
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ vperm 10,10,10,31
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7D802699
+ addi 4,4,16
+ vsldoi 11,10,10,8
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ vperm 12,12,12,31
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7DC02699
+ addi 4,4,16
+ vsldoi 13,12,12,8
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ vperm 14,14,14,31
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x7E002699
+ addi 4,4,16
+ vsldoi 15,14,14,8
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ vperm 16,16,16,31
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x7E402699
+ addi 4,4,16
+ vsldoi 17,16,16,8
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ vperm 18,18,18,31
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x7F002699
+ addi 4,4,16
+ vsldoi 19,18,18,8
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ vperm 24,24,24,31
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x7F402699
+ addi 4,4,16
+ vsldoi 25,24,24,8
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ vperm 26,26,26,31
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ vsldoi 27,26,26,8
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ mtctr 0
+ b .L16_xx
+.align 5
+.L16_xx:
+ .long 0x13CA06C2
+ .long 0x1129F0C0
+ .long 0x13DB7EC2
+ .long 0x1129F0C0
+ .long 0x112990C0
+ .long 0x10E740C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13CB06C2
+ .long 0x114AF0C0
+ .long 0x13C87EC2
+ .long 0x114AF0C0
+ .long 0x114A98C0
+ .long 0x10C648C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13CC06C2
+ .long 0x116BF0C0
+ .long 0x13C97EC2
+ .long 0x116BF0C0
+ .long 0x116BC0C0
+ .long 0x10A550C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13CD06C2
+ .long 0x118CF0C0
+ .long 0x13CA7EC2
+ .long 0x118CF0C0
+ .long 0x118CC8C0
+ .long 0x108458C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13CE06C2
+ .long 0x11ADF0C0
+ .long 0x13CB7EC2
+ .long 0x11ADF0C0
+ .long 0x11ADD0C0
+ .long 0x106360C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13CF06C2
+ .long 0x11CEF0C0
+ .long 0x13CC7EC2
+ .long 0x11CEF0C0
+ .long 0x11CED8C0
+ .long 0x104268C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13D006C2
+ .long 0x11EFF0C0
+ .long 0x13CD7EC2
+ .long 0x11EFF0C0
+ .long 0x11EF40C0
+ .long 0x102170C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13D106C2
+ .long 0x1210F0C0
+ .long 0x13CE7EC2
+ .long 0x1210F0C0
+ .long 0x121048C0
+ .long 0x100078C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ .long 0x13D206C2
+ .long 0x1231F0C0
+ .long 0x13CF7EC2
+ .long 0x1231F0C0
+ .long 0x123150C0
+ .long 0x10E780C0
+ vsel 29,6,5,4
+ .long 0x10C6E0C0
+ .long 0x10E7E8C0
+ .long 0x13C4FEC2
+ .long 0x10E7F0C0
+ vxor 29,0,1
+ vsel 29,1,2,29
+ .long 0x106338C0
+ .long 0x13C086C2
+ .long 0x13DEE8C0
+ .long 0x10E7F0C0
+ lvx 28,26,7
+ .long 0x13D306C2
+ .long 0x1252F0C0
+ .long 0x13D07EC2
+ .long 0x1252F0C0
+ .long 0x125258C0
+ .long 0x10C688C0
+ vsel 29,5,4,3
+ .long 0x10A5E0C0
+ .long 0x10C6E8C0
+ .long 0x13C3FEC2
+ .long 0x10C6F0C0
+ vxor 29,7,0
+ vsel 29,0,1,29
+ .long 0x104230C0
+ .long 0x13C786C2
+ .long 0x13DEE8C0
+ .long 0x10C6F0C0
+ lvx 28,27,7
+ .long 0x13D806C2
+ .long 0x1273F0C0
+ .long 0x13D17EC2
+ .long 0x1273F0C0
+ .long 0x127360C0
+ .long 0x10A590C0
+ vsel 29,4,3,2
+ .long 0x1084E0C0
+ .long 0x10A5E8C0
+ .long 0x13C2FEC2
+ .long 0x10A5F0C0
+ vxor 29,6,7
+ vsel 29,7,0,29
+ .long 0x102128C0
+ .long 0x13C686C2
+ .long 0x13DEE8C0
+ .long 0x10A5F0C0
+ lvx 28,28,7
+ .long 0x13D906C2
+ .long 0x1318F0C0
+ .long 0x13D27EC2
+ .long 0x1318F0C0
+ .long 0x131868C0
+ .long 0x108498C0
+ vsel 29,3,2,1
+ .long 0x1063E0C0
+ .long 0x1084E8C0
+ .long 0x13C1FEC2
+ .long 0x1084F0C0
+ vxor 29,5,6
+ vsel 29,6,7,29
+ .long 0x100020C0
+ .long 0x13C586C2
+ .long 0x13DEE8C0
+ .long 0x1084F0C0
+ lvx 28,29,7
+ .long 0x13DA06C2
+ .long 0x1339F0C0
+ .long 0x13D37EC2
+ .long 0x1339F0C0
+ .long 0x133970C0
+ .long 0x1063C0C0
+ vsel 29,2,1,0
+ .long 0x1042E0C0
+ .long 0x1063E8C0
+ .long 0x13C0FEC2
+ .long 0x1063F0C0
+ vxor 29,4,5
+ vsel 29,5,6,29
+ .long 0x10E718C0
+ .long 0x13C486C2
+ .long 0x13DEE8C0
+ .long 0x1063F0C0
+ lvx 28,30,7
+ .long 0x13DB06C2
+ .long 0x135AF0C0
+ .long 0x13D87EC2
+ .long 0x135AF0C0
+ .long 0x135A78C0
+ .long 0x1042C8C0
+ vsel 29,1,0,7
+ .long 0x1021E0C0
+ .long 0x1042E8C0
+ .long 0x13C7FEC2
+ .long 0x1042F0C0
+ vxor 29,3,4
+ vsel 29,4,5,29
+ .long 0x10C610C0
+ .long 0x13C386C2
+ .long 0x13DEE8C0
+ .long 0x1042F0C0
+ lvx 28,31,7
+ addi 7,7,0x80
+ .long 0x13C806C2
+ .long 0x137BF0C0
+ .long 0x13D97EC2
+ .long 0x137BF0C0
+ .long 0x137B80C0
+ .long 0x1021D0C0
+ vsel 29,0,7,6
+ .long 0x1000E0C0
+ .long 0x1021E8C0
+ .long 0x13C6FEC2
+ .long 0x1021F0C0
+ vxor 29,2,3
+ vsel 29,3,4,29
+ .long 0x10A508C0
+ .long 0x13C286C2
+ .long 0x13DEE8C0
+ .long 0x1021F0C0
+ lvx 28,0,7
+ .long 0x13C906C2
+ .long 0x1108F0C0
+ .long 0x13DA7EC2
+ .long 0x1108F0C0
+ .long 0x110888C0
+ .long 0x1000D8C0
+ vsel 29,7,6,5
+ .long 0x10E7E0C0
+ .long 0x1000E8C0
+ .long 0x13C5FEC2
+ .long 0x1000F0C0
+ vxor 29,1,2
+ vsel 29,2,3,29
+ .long 0x108400C0
+ .long 0x13C186C2
+ .long 0x13DEE8C0
+ .long 0x1000F0C0
+ lvx 28,10,7
+ bdnz .L16_xx
+
+ lvx 10,0,11
+ subic. 5,5,1
+ lvx 11,10,11
+ .long 0x100050C0
+ lvx 12,26,11
+ .long 0x102158C0
+ lvx 13,27,11
+ .long 0x104260C0
+ lvx 14,28,11
+ .long 0x106368C0
+ lvx 15,29,11
+ .long 0x108470C0
+ lvx 16,30,11
+ .long 0x10A578C0
+ lvx 17,31,11
+ .long 0x10C680C0
+ .long 0x10E788C0
+ bne .Loop
+ vperm 0,0,1,28
+ vperm 2,2,3,28
+ vperm 4,4,5,28
+ vperm 6,6,7,28
+ .long 0x7C001F99
+ .long 0x7C4A1F99
+ .long 0x7C9A1F99
+ .long 0x7CDB1F99
+ addi 11,1,207
+ mtlr 8
+ or 12,12,12
+ lvx 24,0,11
+ lvx 25,10,11
+ lvx 26,26,11
+ lvx 27,27,11
+ lvx 28,28,11
+ lvx 29,29,11
+ lvx 30,30,11
+ lvx 31,31,11
+ ld 26,336(1)
+ ld 27,344(1)
+ ld 28,352(1)
+ ld 29,360(1)
+ ld 30,368(1)
+ ld 31,376(1)
+ addi 1,1,384
+ blr
+.long 0
+.byte 0,12,4,1,0x80,6,3,0
+.long 0
+.size sha512_block_p8,.-sha512_block_p8
+.align 6
+.LPICmeup:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 6
+ addi 6,6,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.long 0xd728ae22,0x428a2f98
+.long 0xd728ae22,0x428a2f98
+.long 0x23ef65cd,0x71374491
+.long 0x23ef65cd,0x71374491
+.long 0xec4d3b2f,0xb5c0fbcf
+.long 0xec4d3b2f,0xb5c0fbcf
+.long 0x8189dbbc,0xe9b5dba5
+.long 0x8189dbbc,0xe9b5dba5
+.long 0xf348b538,0x3956c25b
+.long 0xf348b538,0x3956c25b
+.long 0xb605d019,0x59f111f1
+.long 0xb605d019,0x59f111f1
+.long 0xaf194f9b,0x923f82a4
+.long 0xaf194f9b,0x923f82a4
+.long 0xda6d8118,0xab1c5ed5
+.long 0xda6d8118,0xab1c5ed5
+.long 0xa3030242,0xd807aa98
+.long 0xa3030242,0xd807aa98
+.long 0x45706fbe,0x12835b01
+.long 0x45706fbe,0x12835b01
+.long 0x4ee4b28c,0x243185be
+.long 0x4ee4b28c,0x243185be
+.long 0xd5ffb4e2,0x550c7dc3
+.long 0xd5ffb4e2,0x550c7dc3
+.long 0xf27b896f,0x72be5d74
+.long 0xf27b896f,0x72be5d74
+.long 0x3b1696b1,0x80deb1fe
+.long 0x3b1696b1,0x80deb1fe
+.long 0x25c71235,0x9bdc06a7
+.long 0x25c71235,0x9bdc06a7
+.long 0xcf692694,0xc19bf174
+.long 0xcf692694,0xc19bf174
+.long 0x9ef14ad2,0xe49b69c1
+.long 0x9ef14ad2,0xe49b69c1
+.long 0x384f25e3,0xefbe4786
+.long 0x384f25e3,0xefbe4786
+.long 0x8b8cd5b5,0x0fc19dc6
+.long 0x8b8cd5b5,0x0fc19dc6
+.long 0x77ac9c65,0x240ca1cc
+.long 0x77ac9c65,0x240ca1cc
+.long 0x592b0275,0x2de92c6f
+.long 0x592b0275,0x2de92c6f
+.long 0x6ea6e483,0x4a7484aa
+.long 0x6ea6e483,0x4a7484aa
+.long 0xbd41fbd4,0x5cb0a9dc
+.long 0xbd41fbd4,0x5cb0a9dc
+.long 0x831153b5,0x76f988da
+.long 0x831153b5,0x76f988da
+.long 0xee66dfab,0x983e5152
+.long 0xee66dfab,0x983e5152
+.long 0x2db43210,0xa831c66d
+.long 0x2db43210,0xa831c66d
+.long 0x98fb213f,0xb00327c8
+.long 0x98fb213f,0xb00327c8
+.long 0xbeef0ee4,0xbf597fc7
+.long 0xbeef0ee4,0xbf597fc7
+.long 0x3da88fc2,0xc6e00bf3
+.long 0x3da88fc2,0xc6e00bf3
+.long 0x930aa725,0xd5a79147
+.long 0x930aa725,0xd5a79147
+.long 0xe003826f,0x06ca6351
+.long 0xe003826f,0x06ca6351
+.long 0x0a0e6e70,0x14292967
+.long 0x0a0e6e70,0x14292967
+.long 0x46d22ffc,0x27b70a85
+.long 0x46d22ffc,0x27b70a85
+.long 0x5c26c926,0x2e1b2138
+.long 0x5c26c926,0x2e1b2138
+.long 0x5ac42aed,0x4d2c6dfc
+.long 0x5ac42aed,0x4d2c6dfc
+.long 0x9d95b3df,0x53380d13
+.long 0x9d95b3df,0x53380d13
+.long 0x8baf63de,0x650a7354
+.long 0x8baf63de,0x650a7354
+.long 0x3c77b2a8,0x766a0abb
+.long 0x3c77b2a8,0x766a0abb
+.long 0x47edaee6,0x81c2c92e
+.long 0x47edaee6,0x81c2c92e
+.long 0x1482353b,0x92722c85
+.long 0x1482353b,0x92722c85
+.long 0x4cf10364,0xa2bfe8a1
+.long 0x4cf10364,0xa2bfe8a1
+.long 0xbc423001,0xa81a664b
+.long 0xbc423001,0xa81a664b
+.long 0xd0f89791,0xc24b8b70
+.long 0xd0f89791,0xc24b8b70
+.long 0x0654be30,0xc76c51a3
+.long 0x0654be30,0xc76c51a3
+.long 0xd6ef5218,0xd192e819
+.long 0xd6ef5218,0xd192e819
+.long 0x5565a910,0xd6990624
+.long 0x5565a910,0xd6990624
+.long 0x5771202a,0xf40e3585
+.long 0x5771202a,0xf40e3585
+.long 0x32bbd1b8,0x106aa070
+.long 0x32bbd1b8,0x106aa070
+.long 0xb8d2d0c8,0x19a4c116
+.long 0xb8d2d0c8,0x19a4c116
+.long 0x5141ab53,0x1e376c08
+.long 0x5141ab53,0x1e376c08
+.long 0xdf8eeb99,0x2748774c
+.long 0xdf8eeb99,0x2748774c
+.long 0xe19b48a8,0x34b0bcb5
+.long 0xe19b48a8,0x34b0bcb5
+.long 0xc5c95a63,0x391c0cb3
+.long 0xc5c95a63,0x391c0cb3
+.long 0xe3418acb,0x4ed8aa4a
+.long 0xe3418acb,0x4ed8aa4a
+.long 0x7763e373,0x5b9cca4f
+.long 0x7763e373,0x5b9cca4f
+.long 0xd6b2b8a3,0x682e6ff3
+.long 0xd6b2b8a3,0x682e6ff3
+.long 0x5defb2fc,0x748f82ee
+.long 0x5defb2fc,0x748f82ee
+.long 0x43172f60,0x78a5636f
+.long 0x43172f60,0x78a5636f
+.long 0xa1f0ab72,0x84c87814
+.long 0xa1f0ab72,0x84c87814
+.long 0x1a6439ec,0x8cc70208
+.long 0x1a6439ec,0x8cc70208
+.long 0x23631e28,0x90befffa
+.long 0x23631e28,0x90befffa
+.long 0xde82bde9,0xa4506ceb
+.long 0xde82bde9,0xa4506ceb
+.long 0xb2c67915,0xbef9a3f7
+.long 0xb2c67915,0xbef9a3f7
+.long 0xe372532b,0xc67178f2
+.long 0xe372532b,0xc67178f2
+.long 0xea26619c,0xca273ece
+.long 0xea26619c,0xca273ece
+.long 0x21c0c207,0xd186b8c7
+.long 0x21c0c207,0xd186b8c7
+.long 0xcde0eb1e,0xeada7dd6
+.long 0xcde0eb1e,0xeada7dd6
+.long 0xee6ed178,0xf57d4f7f
+.long 0xee6ed178,0xf57d4f7f
+.long 0x72176fba,0x06f067aa
+.long 0x72176fba,0x06f067aa
+.long 0xa2c898a6,0x0a637dc5
+.long 0xa2c898a6,0x0a637dc5
+.long 0xbef90dae,0x113f9804
+.long 0xbef90dae,0x113f9804
+.long 0x131c471b,0x1b710b35
+.long 0x131c471b,0x1b710b35
+.long 0x23047d84,0x28db77f5
+.long 0x23047d84,0x28db77f5
+.long 0x40c72493,0x32caab7b
+.long 0x40c72493,0x32caab7b
+.long 0x15c9bebc,0x3c9ebe0a
+.long 0x15c9bebc,0x3c9ebe0a
+.long 0x9c100d4c,0x431d67c4
+.long 0x9c100d4c,0x431d67c4
+.long 0xcb3e42b6,0x4cc5d4be
+.long 0xcb3e42b6,0x4cc5d4be
+.long 0xfc657e2a,0x597f299c
+.long 0xfc657e2a,0x597f299c
+.long 0x3ad6faec,0x5fcb6fab
+.long 0x3ad6faec,0x5fcb6fab
+.long 0x4a475817,0x6c44198c
+.long 0x4a475817,0x6c44198c
+.long 0,0
+.long 0,0
+.long 0x14151617,0x10111213
+.long 0x04050607,0x00010203
+.byte 83,72,65,53,49,50,32,102,111,114,32,80,111,119,101,114,73,83,65,32,50,46,48,55,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2
diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c
new file mode 100644
index 0000000000..14584a8906
--- /dev/null
+++ b/security/nss/lib/freebl/sha512.c
@@ -0,0 +1,1776 @@
+/*
+ * sha512.c - implementation of SHA224, SHA256, SHA384 and SHA512
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#if defined(NSS_X86) || defined(SHA_NO_LONG_LONG)
+#define NOUNROLL512 1
+#undef HAVE_LONG_LONG
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "prlong.h"
+#include "secport.h" /* for PORT_XXX */
+#include "blapi.h"
+#include "blapii.h"
+#include "secerr.h"
+#include "sha256.h" /* for struct SHA256ContextStr */
+#include "crypto_primitives.h"
+#include "ppc-crypto.h" /* for USE_PPC_CRYPTO */
+
+/* ============= Common constants and defines ======================= */
+
+#define W ctx->u.w
+#define B ctx->u.b
+#define H ctx->h
+
+#define SHR(x, n) (x >> n)
+#define SHL(x, n) (x << n)
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define SHA_MIN(a, b) (a < b ? a : b)
+
+/* Padding used with all flavors of SHA */
+static const PRUint8 pad[240] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ /* compiler will fill the rest in with zeros */
+};
+
+/* ============= SHA256 implementation ================================== */
+
+/* SHA-256 constants, K256. */
+pre_align static const PRUint32 K256[64] post_align = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/* SHA-256 initial hash values */
+static const PRUint32 H256[8] = {
+ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(_MSC_VER) && defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif
+#endif
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+ __asm {
+ mov eax,dwd
+ bswap eax
+ }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ __asm__("bswap %0"
+ : "+r"(value));
+ return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && (defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__))))
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 ret;
+ __asm__("rev %0, %1"
+ : "=r"(ret)
+ : "r"(value));
+ return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#else
+#define SWAP4MASK 0x00FF00FF
+static PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 t1 = (value << 16) | (value >> 16);
+ return ((t1 & SWAP4MASK) << 8) | ((t1 >> 8) & SWAP4MASK);
+}
+#define SHA_HTONL(x) swap4b(x)
+#endif
+#define BYTESWAP4(x) x = SHA_HTONL(x)
+#endif /* defined(IS_LITTLE_ENDIAN) */
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_lrotr, _lrotl)
+#define ROTR32(x, n) _lrotr(x, n)
+#define ROTL32(x, n) _lrotl(x, n)
+#else
+#define ROTR32(x, n) ((x >> n) | (x << ((8 * sizeof x) - n)))
+#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n)))
+#endif
+
+/* Capitol Sigma and lower case sigma functions */
+#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
+#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
+#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ SHR(x, 10))
+
+void SHA256_Compress_Native(SHA256Context *ctx);
+void SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input, unsigned int inputLen);
+
+static void SHA256_Compress_Generic(SHA256Context *ctx);
+static void SHA256_Update_Generic(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen);
+
+#if !defined(USE_HW_SHA2)
+void
+SHA256_Compress_Native(SHA256Context *ctx)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+SHA256_Update_Native(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+#endif
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+ SHA256Context *ctx = PORT_New(SHA256Context);
+ return ctx;
+}
+
+void
+SHA256_DestroyContext(SHA256Context *ctx, PRBool freeit)
+{
+ memset(ctx, 0, sizeof *ctx);
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA256_Begin(SHA256Context *ctx)
+{
+ PRBool use_hw_sha2 = PR_FALSE;
+
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H256, sizeof H256);
+
+#if defined(USE_HW_SHA2) && defined(IS_LITTLE_ENDIAN)
+ /* arm's implementation is tested on little endian only */
+ use_hw_sha2 = arm_sha2_support() || (sha_support() && ssse3_support() && sse4_1_support());
+#endif
+
+ if (use_hw_sha2) {
+ ctx->compress = SHA256_Compress_Native;
+ ctx->update = SHA256_Update_Native;
+ } else {
+ ctx->compress = SHA256_Compress_Generic;
+ ctx->update = SHA256_Update_Generic;
+ }
+}
+
+#if defined(USE_PPC_CRYPTO)
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ s0 = __builtin_crypto_vshasigmaw(e, 1, 0xf); \
+ h += s0 + vec_sel(g, f, e) + w[n / 4]; \
+ d += h; \
+ s0 = __builtin_crypto_vshasigmaw(a, 1, 0); \
+ h += s0 + vec_sel(b, c, vec_xor(a, b)); \
+ if (n % 4 != 3) \
+ w[n / 4] = vec_sro(w[n / 4], rshift);
+
+#else
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ h += S1(e) + Ch(e, f, g) + K256[n] + W[n]; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
+
+#endif
+
+#define SHA256_UNROLLED_ROUNDS \
+ ROUND(0, a, b, c, d, e, f, g, h) \
+ ROUND(1, h, a, b, c, d, e, f, g) \
+ ROUND(2, g, h, a, b, c, d, e, f) \
+ ROUND(3, f, g, h, a, b, c, d, e) \
+ ROUND(4, e, f, g, h, a, b, c, d) \
+ ROUND(5, d, e, f, g, h, a, b, c) \
+ ROUND(6, c, d, e, f, g, h, a, b) \
+ ROUND(7, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(8, a, b, c, d, e, f, g, h) \
+ ROUND(9, h, a, b, c, d, e, f, g) \
+ ROUND(10, g, h, a, b, c, d, e, f) \
+ ROUND(11, f, g, h, a, b, c, d, e) \
+ ROUND(12, e, f, g, h, a, b, c, d) \
+ ROUND(13, d, e, f, g, h, a, b, c) \
+ ROUND(14, c, d, e, f, g, h, a, b) \
+ ROUND(15, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(16, a, b, c, d, e, f, g, h) \
+ ROUND(17, h, a, b, c, d, e, f, g) \
+ ROUND(18, g, h, a, b, c, d, e, f) \
+ ROUND(19, f, g, h, a, b, c, d, e) \
+ ROUND(20, e, f, g, h, a, b, c, d) \
+ ROUND(21, d, e, f, g, h, a, b, c) \
+ ROUND(22, c, d, e, f, g, h, a, b) \
+ ROUND(23, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(24, a, b, c, d, e, f, g, h) \
+ ROUND(25, h, a, b, c, d, e, f, g) \
+ ROUND(26, g, h, a, b, c, d, e, f) \
+ ROUND(27, f, g, h, a, b, c, d, e) \
+ ROUND(28, e, f, g, h, a, b, c, d) \
+ ROUND(29, d, e, f, g, h, a, b, c) \
+ ROUND(30, c, d, e, f, g, h, a, b) \
+ ROUND(31, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(32, a, b, c, d, e, f, g, h) \
+ ROUND(33, h, a, b, c, d, e, f, g) \
+ ROUND(34, g, h, a, b, c, d, e, f) \
+ ROUND(35, f, g, h, a, b, c, d, e) \
+ ROUND(36, e, f, g, h, a, b, c, d) \
+ ROUND(37, d, e, f, g, h, a, b, c) \
+ ROUND(38, c, d, e, f, g, h, a, b) \
+ ROUND(39, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(40, a, b, c, d, e, f, g, h) \
+ ROUND(41, h, a, b, c, d, e, f, g) \
+ ROUND(42, g, h, a, b, c, d, e, f) \
+ ROUND(43, f, g, h, a, b, c, d, e) \
+ ROUND(44, e, f, g, h, a, b, c, d) \
+ ROUND(45, d, e, f, g, h, a, b, c) \
+ ROUND(46, c, d, e, f, g, h, a, b) \
+ ROUND(47, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(48, a, b, c, d, e, f, g, h) \
+ ROUND(49, h, a, b, c, d, e, f, g) \
+ ROUND(50, g, h, a, b, c, d, e, f) \
+ ROUND(51, f, g, h, a, b, c, d, e) \
+ ROUND(52, e, f, g, h, a, b, c, d) \
+ ROUND(53, d, e, f, g, h, a, b, c) \
+ ROUND(54, c, d, e, f, g, h, a, b) \
+ ROUND(55, b, c, d, e, f, g, h, a) \
+ \
+ ROUND(56, a, b, c, d, e, f, g, h) \
+ ROUND(57, h, a, b, c, d, e, f, g) \
+ ROUND(58, g, h, a, b, c, d, e, f) \
+ ROUND(59, f, g, h, a, b, c, d, e) \
+ ROUND(60, e, f, g, h, a, b, c, d) \
+ ROUND(61, d, e, f, g, h, a, b, c) \
+ ROUND(62, c, d, e, f, g, h, a, b) \
+ ROUND(63, b, c, d, e, f, g, h, a)
+
+static void
+SHA256_Compress_Generic(SHA256Context *ctx)
+{
+#if defined(USE_PPC_CRYPTO)
+ vec_u32 w[16], s0, s1;
+ const vec_u8 rshift = (vec_u8)vec_splats(4 << 3);
+ const vec_u8 shifthalf = (vec_u8)vec_splats(8 << 3);
+ const vec_u8 bswap4 = (vec_u8){
+ 3, 2, 1, 0, 7, 6, 5, 4, 11,
+ 10, 9, 8, 15, 14, 13, 12
+ };
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ w[i] = vec_vsx_ld(0, &W[i * 4]);
+ w[i] = vec_perm(w[i], w[i], bswap4);
+ }
+
+ /* prepare the message schedule */
+ for (i = 4; i < 16; i++) {
+ vec_u32 off1 = vec_sld(w[i - 3], w[i - 4], 12);
+ vec_u32 off2 = vec_sld(w[i - 1], w[i - 2], 12);
+ s0 = __builtin_crypto_vshasigmaw(off1, 0, 0);
+ /* first half, s1 depends on two prior ints */
+ s1 = __builtin_crypto_vshasigmaw(w[i - 1], 0, 0xf);
+ s1 = vec_sro(s1, shifthalf);
+ w[i] = w[i - 4] + s0 + off2 + s1;
+
+ /* second half s1 */
+ s1 = __builtin_crypto_vshasigmaw(w[i], 0, 0xf);
+ s1 = vec_slo(s1, shifthalf);
+ w[i] += s1;
+ }
+
+ for (i = 0; i < 16; i++) {
+ w[i] += vec_ld(0, &K256[i * 4]);
+ }
+
+ vec_u32 a, b, c, d, e, f, g, h;
+ a = vec_splats(H[0]);
+ b = vec_splats(H[1]);
+ c = vec_splats(H[2]);
+ d = vec_splats(H[3]);
+ e = vec_splats(H[4]);
+ f = vec_splats(H[5]);
+ g = vec_splats(H[6]);
+ h = vec_splats(H[7]);
+
+ SHA256_UNROLLED_ROUNDS;
+
+ H[0] += a[0];
+ H[1] += b[0];
+ H[2] += c[0];
+ H[3] += d[0];
+ H[4] += e[0];
+ H[5] += f[0];
+ H[6] += g[0];
+ H[7] += h[0];
+
+#undef ROUND
+
+#else /* USE_PPC_CRYPTO*/
+
+ {
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(W[0]);
+ BYTESWAP4(W[1]);
+ BYTESWAP4(W[2]);
+ BYTESWAP4(W[3]);
+ BYTESWAP4(W[4]);
+ BYTESWAP4(W[5]);
+ BYTESWAP4(W[6]);
+ BYTESWAP4(W[7]);
+ BYTESWAP4(W[8]);
+ BYTESWAP4(W[9]);
+ BYTESWAP4(W[10]);
+ BYTESWAP4(W[11]);
+ BYTESWAP4(W[12]);
+ BYTESWAP4(W[13]);
+ BYTESWAP4(W[14]);
+ BYTESWAP4(W[15]);
+#endif
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+/* prepare the "message schedule" */
+#ifdef NOUNROLL256
+ {
+ int t;
+ for (t = 16; t < 64; ++t) {
+ INITW(t);
+ }
+ }
+#else
+ INITW(16);
+ INITW(17);
+ INITW(18);
+ INITW(19);
+
+ INITW(20);
+ INITW(21);
+ INITW(22);
+ INITW(23);
+ INITW(24);
+ INITW(25);
+ INITW(26);
+ INITW(27);
+ INITW(28);
+ INITW(29);
+
+ INITW(30);
+ INITW(31);
+ INITW(32);
+ INITW(33);
+ INITW(34);
+ INITW(35);
+ INITW(36);
+ INITW(37);
+ INITW(38);
+ INITW(39);
+
+ INITW(40);
+ INITW(41);
+ INITW(42);
+ INITW(43);
+ INITW(44);
+ INITW(45);
+ INITW(46);
+ INITW(47);
+ INITW(48);
+ INITW(49);
+
+ INITW(50);
+ INITW(51);
+ INITW(52);
+ INITW(53);
+ INITW(54);
+ INITW(55);
+ INITW(56);
+ INITW(57);
+ INITW(58);
+ INITW(59);
+
+ INITW(60);
+ INITW(61);
+ INITW(62);
+ INITW(63);
+
+#endif
+#undef INITW
+ }
+ {
+ PRUint32 a, b, c, d, e, f, g, h;
+
+ a = H[0];
+ b = H[1];
+ c = H[2];
+ d = H[3];
+ e = H[4];
+ f = H[5];
+ g = H[6];
+ h = H[7];
+
+#ifdef NOUNROLL256
+ {
+ int t;
+ for (t = 0; t < 64; t += 8) {
+ ROUND(t + 0, a, b, c, d, e, f, g, h)
+ ROUND(t + 1, h, a, b, c, d, e, f, g)
+ ROUND(t + 2, g, h, a, b, c, d, e, f)
+ ROUND(t + 3, f, g, h, a, b, c, d, e)
+ ROUND(t + 4, e, f, g, h, a, b, c, d)
+ ROUND(t + 5, d, e, f, g, h, a, b, c)
+ ROUND(t + 6, c, d, e, f, g, h, a, b)
+ ROUND(t + 7, b, c, d, e, f, g, h, a)
+ }
+ }
+#else
+ SHA256_UNROLLED_ROUNDS;
+#endif
+
+ H[0] += a;
+ H[1] += b;
+ H[2] += c;
+ H[3] += d;
+ H[4] += e;
+ H[5] += f;
+ H[6] += g;
+ H[7] += h;
+ }
+#undef ROUND
+#endif /* !USE_PPC_CRYPTO */
+}
+
+#undef s0
+#undef s1
+#undef S0
+#undef S1
+
+void
+SHA256_Update(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ ctx->update(ctx, input, inputLen);
+}
+
+static void
+SHA256_Update_Generic(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ if (!inputLen)
+ return;
+
+ /* Add inputLen into the count of bytes processed, before processing */
+ if ((ctx->sizeLo += inputLen) < inputLen)
+ ctx->sizeHi++;
+
+ /* if data already in buffer, attemp to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo)
+ todo = inputLen;
+ memcpy(B + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA256_BLOCK_LENGTH)
+ SHA256_Compress_Generic(ctx);
+ }
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA256_BLOCK_LENGTH) {
+ memcpy(B, input, SHA256_BLOCK_LENGTH);
+ input += SHA256_BLOCK_LENGTH;
+ inputLen -= SHA256_BLOCK_LENGTH;
+ SHA256_Compress_Generic(ctx);
+ }
+ /* if data left over, fill it into buffer */
+ if (inputLen)
+ memcpy(B, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ unsigned int padLen = (inBuf < 56) ? (56 - inBuf) : (56 + 64 - inBuf);
+ PRUint32 hi, lo;
+
+ hi = (ctx->sizeHi << 3) | (ctx->sizeLo >> 29);
+ lo = (ctx->sizeLo << 3);
+
+ ctx->update(ctx, pad, padLen);
+
+#if defined(IS_LITTLE_ENDIAN)
+ W[14] = SHA_HTONL(hi);
+ W[15] = SHA_HTONL(lo);
+#else
+ W[14] = hi;
+ W[15] = lo;
+#endif
+ ctx->compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(H[0]);
+ BYTESWAP4(H[1]);
+ BYTESWAP4(H[2]);
+ BYTESWAP4(H[3]);
+ BYTESWAP4(H[4]);
+ BYTESWAP4(H[5]);
+ BYTESWAP4(H[6]);
+ BYTESWAP4(H[7]);
+#endif
+ padLen = PR_MIN(SHA256_LENGTH, maxDigestLen);
+ memcpy(digest, H, padLen);
+ if (digestLen)
+ *digestLen = padLen;
+}
+
+void
+SHA256_EndRaw(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint32 h[8];
+ unsigned int len;
+
+ memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(h[0]);
+ BYTESWAP4(h[1]);
+ BYTESWAP4(h[2]);
+ BYTESWAP4(h[3]);
+ BYTESWAP4(h[4]);
+ BYTESWAP4(h[5]);
+ BYTESWAP4(h[6]);
+ BYTESWAP4(h[7]);
+#endif
+
+ len = PR_MIN(SHA256_LENGTH, maxDigestLen);
+ memcpy(digest, h, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA256Context ctx;
+ unsigned int outLen;
+
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, src, src_length);
+ SHA256_End(&ctx, dest, &outLen, SHA256_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+ return SHA256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA256_TraceState(SHA256Context *ctx)
+{
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *ctx)
+{
+ return sizeof *ctx;
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *ctx, unsigned char *space)
+{
+ PORT_Memcpy(space, ctx, sizeof *ctx);
+ return SECSuccess;
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+ SHA256Context *ctx = SHA256_NewContext();
+ if (ctx)
+ PORT_Memcpy(ctx, space, sizeof *ctx);
+ return ctx;
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ============= SHA224 implementation ================================== */
+
+/* SHA-224 initial hash values */
+static const PRUint32 H224[8] = {
+ 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
+ 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
+};
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+ return SHA256_NewContext();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *ctx, PRBool freeit)
+{
+ SHA256_DestroyContext(ctx, freeit);
+}
+
+void
+SHA224_Begin(SHA224Context *ctx)
+{
+ PRBool use_hw_sha2;
+
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H224, sizeof H224);
+
+#if defined(USE_HW_SHA2) && defined(IS_LITTLE_ENDIAN)
+ /* arm's implementation is tested on little endian only */
+ use_hw_sha2 = arm_sha2_support() || (sha_support() && ssse3_support() && sse4_1_support());
+#else
+ use_hw_sha2 = PR_FALSE;
+#endif
+
+ if (use_hw_sha2) {
+ ctx->compress = SHA256_Compress_Native;
+ ctx->update = SHA256_Update_Native;
+ } else {
+ ctx->compress = SHA256_Compress_Generic;
+ ctx->update = SHA256_Update_Generic;
+ }
+}
+
+void
+SHA224_Update(SHA224Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ ctx->update(ctx, input, inputLen);
+}
+
+void
+SHA224_End(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+ SHA256_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA224_EndRaw(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+ SHA256_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA256Context ctx;
+ unsigned int outLen;
+
+ SHA224_Begin(&ctx);
+ SHA256_Update(&ctx, src, src_length);
+ SHA256_End(&ctx, dest, &outLen, SHA224_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+ return SHA224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA224_TraceState(SHA224Context *ctx)
+{
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *ctx)
+{
+ return SHA256_FlattenSize(ctx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *ctx, unsigned char *space)
+{
+ return SHA256_Flatten(ctx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+ return SHA256_Resurrect(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+ SHA256_Clone(dest, src);
+}
+
+/* ======= SHA512 and SHA384 common constants and defines ================= */
+
+/* common #defines for SHA512 and SHA384 */
+#if defined(HAVE_LONG_LONG)
+#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
+#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
+#define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7))
+#define s1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SHR(x, 6))
+
+#if PR_BYTES_PER_LONG == 8
+#define ULLC(hi, lo) 0x##hi##lo##UL
+#elif defined(_MSC_VER)
+#define ULLC(hi, lo) 0x##hi##lo##ui64
+#else
+#define ULLC(hi, lo) 0x##hi##lo##ULL
+#endif
+
+#define BYTESWAP8(x) x = FREEBL_HTONLL(x)
+
+#else /* no long long */
+
+#if defined(IS_LITTLE_ENDIAN)
+#define ULLC(hi, lo) \
+ { \
+ 0x##lo##U, 0x##hi##U \
+ }
+#define FREEBL_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \
+ x.hi ^= x.lo ^= x.hi ^= x.lo, x)
+#define BYTESWAP8(x) \
+ do { \
+ PRUint32 tmp; \
+ BYTESWAP4(x.lo); \
+ BYTESWAP4(x.hi); \
+ tmp = x.lo; \
+ x.lo = x.hi; \
+ x.hi = tmp; \
+ } while (0)
+#else
+#define ULLC(hi, lo) \
+ { \
+ 0x##hi##U, 0x##lo##U \
+ }
+#endif
+
+#endif
+
+#if defined(USE_PPC_CRYPTO)
+void sha512_block_p8(void *ctx, const void *inp, size_t len);
+
+#else /* USE_PPC_CRYPTO */
+
+/* SHA-384 and SHA-512 constants, K512. */
+static const PRUint64 K512[80] = {
+#if PR_BYTES_PER_LONG == 8
+ 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
+ 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
+ 0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
+ 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
+ 0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
+ 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
+ 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
+ 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
+ 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
+ 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
+ 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
+ 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
+ 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
+ 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
+ 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
+ 0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
+ 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
+ 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
+ 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
+ 0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
+ 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
+ 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
+ 0xd192e819d6ef5218UL, 0xd69906245565a910UL,
+ 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
+ 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
+ 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
+ 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
+ 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
+ 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
+ 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
+ 0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
+ 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
+ 0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
+ 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
+ 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
+ 0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
+ 0x28db77f523047d84UL, 0x32caab7b40c72493UL,
+ 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
+ 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
+ 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL
+#else
+ ULLC(428a2f98, d728ae22), ULLC(71374491, 23ef65cd),
+ ULLC(b5c0fbcf, ec4d3b2f), ULLC(e9b5dba5, 8189dbbc),
+ ULLC(3956c25b, f348b538), ULLC(59f111f1, b605d019),
+ ULLC(923f82a4, af194f9b), ULLC(ab1c5ed5, da6d8118),
+ ULLC(d807aa98, a3030242), ULLC(12835b01, 45706fbe),
+ ULLC(243185be, 4ee4b28c), ULLC(550c7dc3, d5ffb4e2),
+ ULLC(72be5d74, f27b896f), ULLC(80deb1fe, 3b1696b1),
+ ULLC(9bdc06a7, 25c71235), ULLC(c19bf174, cf692694),
+ ULLC(e49b69c1, 9ef14ad2), ULLC(efbe4786, 384f25e3),
+ ULLC(0fc19dc6, 8b8cd5b5), ULLC(240ca1cc, 77ac9c65),
+ ULLC(2de92c6f, 592b0275), ULLC(4a7484aa, 6ea6e483),
+ ULLC(5cb0a9dc, bd41fbd4), ULLC(76f988da, 831153b5),
+ ULLC(983e5152, ee66dfab), ULLC(a831c66d, 2db43210),
+ ULLC(b00327c8, 98fb213f), ULLC(bf597fc7, beef0ee4),
+ ULLC(c6e00bf3, 3da88fc2), ULLC(d5a79147, 930aa725),
+ ULLC(06ca6351, e003826f), ULLC(14292967, 0a0e6e70),
+ ULLC(27b70a85, 46d22ffc), ULLC(2e1b2138, 5c26c926),
+ ULLC(4d2c6dfc, 5ac42aed), ULLC(53380d13, 9d95b3df),
+ ULLC(650a7354, 8baf63de), ULLC(766a0abb, 3c77b2a8),
+ ULLC(81c2c92e, 47edaee6), ULLC(92722c85, 1482353b),
+ ULLC(a2bfe8a1, 4cf10364), ULLC(a81a664b, bc423001),
+ ULLC(c24b8b70, d0f89791), ULLC(c76c51a3, 0654be30),
+ ULLC(d192e819, d6ef5218), ULLC(d6990624, 5565a910),
+ ULLC(f40e3585, 5771202a), ULLC(106aa070, 32bbd1b8),
+ ULLC(19a4c116, b8d2d0c8), ULLC(1e376c08, 5141ab53),
+ ULLC(2748774c, df8eeb99), ULLC(34b0bcb5, e19b48a8),
+ ULLC(391c0cb3, c5c95a63), ULLC(4ed8aa4a, e3418acb),
+ ULLC(5b9cca4f, 7763e373), ULLC(682e6ff3, d6b2b8a3),
+ ULLC(748f82ee, 5defb2fc), ULLC(78a5636f, 43172f60),
+ ULLC(84c87814, a1f0ab72), ULLC(8cc70208, 1a6439ec),
+ ULLC(90befffa, 23631e28), ULLC(a4506ceb, de82bde9),
+ ULLC(bef9a3f7, b2c67915), ULLC(c67178f2, e372532b),
+ ULLC(ca273ece, ea26619c), ULLC(d186b8c7, 21c0c207),
+ ULLC(eada7dd6, cde0eb1e), ULLC(f57d4f7f, ee6ed178),
+ ULLC(06f067aa, 72176fba), ULLC(0a637dc5, a2c898a6),
+ ULLC(113f9804, bef90dae), ULLC(1b710b35, 131c471b),
+ ULLC(28db77f5, 23047d84), ULLC(32caab7b, 40c72493),
+ ULLC(3c9ebe0a, 15c9bebc), ULLC(431d67c4, 9c100d4c),
+ ULLC(4cc5d4be, cb3e42b6), ULLC(597f299c, fc657e2a),
+ ULLC(5fcb6fab, 3ad6faec), ULLC(6c44198c, 4a475817)
+#endif
+};
+
+#endif /* !USE_PPC_CRYPTO */
+
+struct SHA512ContextStr {
+ union {
+ PRUint64 w[80]; /* message schedule, input buffer, plus 64 words */
+ PRUint32 l[160];
+ PRUint8 b[640];
+ } u;
+ PRUint64 h[8]; /* 8 state variables */
+ PRUint64 sizeLo; /* 64-bit count of hashed bytes. */
+};
+
+/* =========== SHA512 implementation ===================================== */
+
+/* SHA-512 initial hash values */
+static const PRUint64 H512[8] = {
+#if PR_BYTES_PER_LONG == 8
+ 0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL,
+ 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL,
+ 0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL,
+ 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL
+#else
+ ULLC(6a09e667, f3bcc908), ULLC(bb67ae85, 84caa73b),
+ ULLC(3c6ef372, fe94f82b), ULLC(a54ff53a, 5f1d36f1),
+ ULLC(510e527f, ade682d1), ULLC(9b05688c, 2b3e6c1f),
+ ULLC(1f83d9ab, fb41bd6b), ULLC(5be0cd19, 137e2179)
+#endif
+};
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+ SHA512Context *ctx = PORT_New(SHA512Context);
+ return ctx;
+}
+
+void
+SHA512_DestroyContext(SHA512Context *ctx, PRBool freeit)
+{
+ memset(ctx, 0, sizeof *ctx);
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA512_Begin(SHA512Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H512, sizeof H512);
+}
+
+#if defined(SHA512_TRACE)
+#if defined(HAVE_LONG_LONG)
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %016lx, %s = %016lx\n", \
+ n, #e, d, #a, h);
+#else
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %08x%08x, %s = %08x%08x\n", \
+ n, #e, d.hi, d.lo, #a, h.hi, h.lo);
+#endif
+#else
+#define DUMP(n, a, d, e, h)
+#endif
+
+#if defined(HAVE_LONG_LONG)
+
+#define ADDTO(x, y) y += x
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ h += S1(e) + Ch(e, f, g) + K512[n] + W[n]; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c); \
+ DUMP(n, a, d, e, h)
+
+#else /* use only 32-bit variables, and don't unroll loops */
+
+#undef NOUNROLL512
+#define NOUNROLL512 1
+
+#define ADDTO(x, y) \
+ y.lo += x.lo; \
+ y.hi += x.hi + (x.lo > y.lo)
+
+#define ROTR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+#define ROTR64A(x, n, lo, hi) (x.lo << (64 - n) | x.hi >> (n - 32))
+#define SHR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+
+/* Capitol Sigma and lower case sigma functions */
+#define s0lo(x) (ROTR64a(x, 1, lo, hi) ^ ROTR64a(x, 8, lo, hi) ^ SHR64a(x, 7, lo, hi))
+#define s0hi(x) (ROTR64a(x, 1, hi, lo) ^ ROTR64a(x, 8, hi, lo) ^ (x.hi >> 7))
+
+#define s1lo(x) (ROTR64a(x, 19, lo, hi) ^ ROTR64A(x, 61, lo, hi) ^ SHR64a(x, 6, lo, hi))
+#define s1hi(x) (ROTR64a(x, 19, hi, lo) ^ ROTR64A(x, 61, hi, lo) ^ (x.hi >> 6))
+
+#define S0lo(x) (ROTR64a(x, 28, lo, hi) ^ ROTR64A(x, 34, lo, hi) ^ ROTR64A(x, 39, lo, hi))
+#define S0hi(x) (ROTR64a(x, 28, hi, lo) ^ ROTR64A(x, 34, hi, lo) ^ ROTR64A(x, 39, hi, lo))
+
+#define S1lo(x) (ROTR64a(x, 14, lo, hi) ^ ROTR64a(x, 18, lo, hi) ^ ROTR64A(x, 41, lo, hi))
+#define S1hi(x) (ROTR64a(x, 14, hi, lo) ^ ROTR64a(x, 18, hi, lo) ^ ROTR64A(x, 41, hi, lo))
+
+/* 32-bit versions of Ch and Maj */
+#define Chxx(x, y, z, lo) ((x.lo & y.lo) ^ (~x.lo & z.lo))
+#define Majx(x, y, z, lo) ((x.lo & y.lo) ^ (x.lo & z.lo) ^ (y.lo & z.lo))
+
+#define INITW(t) \
+ do { \
+ PRUint32 lo, tm; \
+ PRUint32 cy = 0; \
+ lo = s1lo(W[t - 2]); \
+ lo += (tm = W[t - 7].lo); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = s0lo(W[t - 15])); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = W[t - 16].lo); \
+ if (lo < tm) \
+ cy++; \
+ W[t].lo = lo; \
+ W[t].hi = cy + s1hi(W[t - 2]) + W[t - 7].hi + s0hi(W[t - 15]) + W[t - 16].hi; \
+ } while (0)
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ { \
+ PRUint32 lo, tm, cy; \
+ lo = S1lo(e); \
+ lo += (tm = Chxx(e, f, g, lo)); \
+ cy = (lo < tm); \
+ lo += (tm = K512[n].lo); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = W[n].lo); \
+ if (lo < tm) \
+ cy++; \
+ h.lo += lo; \
+ if (h.lo < lo) \
+ cy++; \
+ h.hi += cy + S1hi(e) + Chxx(e, f, g, hi) + K512[n].hi + W[n].hi; \
+ d.lo += h.lo; \
+ d.hi += h.hi + (d.lo < h.lo); \
+ lo = S0lo(a); \
+ lo += (tm = Majx(a, b, c, lo)); \
+ cy = (lo < tm); \
+ h.lo += lo; \
+ if (h.lo < lo) \
+ cy++; \
+ h.hi += cy + S0hi(a) + Majx(a, b, c, hi); \
+ DUMP(n, a, d, e, h) \
+ }
+#endif
+
+static void
+SHA512_Compress(SHA512Context *ctx)
+{
+#if defined(USE_PPC_CRYPTO)
+ sha512_block_p8(&H[0], &W[0], 1);
+#else /* USE_PPC_CRYPTO */
+
+#if defined(IS_LITTLE_ENDIAN)
+ {
+ BYTESWAP8(W[0]);
+ BYTESWAP8(W[1]);
+ BYTESWAP8(W[2]);
+ BYTESWAP8(W[3]);
+ BYTESWAP8(W[4]);
+ BYTESWAP8(W[5]);
+ BYTESWAP8(W[6]);
+ BYTESWAP8(W[7]);
+ BYTESWAP8(W[8]);
+ BYTESWAP8(W[9]);
+ BYTESWAP8(W[10]);
+ BYTESWAP8(W[11]);
+ BYTESWAP8(W[12]);
+ BYTESWAP8(W[13]);
+ BYTESWAP8(W[14]);
+ BYTESWAP8(W[15]);
+ }
+#endif
+
+ {
+#ifdef NOUNROLL512
+ {
+ /* prepare the "message schedule" */
+ int t;
+ for (t = 16; t < 80; ++t) {
+ INITW(t);
+ }
+ }
+#else
+ INITW(16);
+ INITW(17);
+ INITW(18);
+ INITW(19);
+
+ INITW(20);
+ INITW(21);
+ INITW(22);
+ INITW(23);
+ INITW(24);
+ INITW(25);
+ INITW(26);
+ INITW(27);
+ INITW(28);
+ INITW(29);
+
+ INITW(30);
+ INITW(31);
+ INITW(32);
+ INITW(33);
+ INITW(34);
+ INITW(35);
+ INITW(36);
+ INITW(37);
+ INITW(38);
+ INITW(39);
+
+ INITW(40);
+ INITW(41);
+ INITW(42);
+ INITW(43);
+ INITW(44);
+ INITW(45);
+ INITW(46);
+ INITW(47);
+ INITW(48);
+ INITW(49);
+
+ INITW(50);
+ INITW(51);
+ INITW(52);
+ INITW(53);
+ INITW(54);
+ INITW(55);
+ INITW(56);
+ INITW(57);
+ INITW(58);
+ INITW(59);
+
+ INITW(60);
+ INITW(61);
+ INITW(62);
+ INITW(63);
+ INITW(64);
+ INITW(65);
+ INITW(66);
+ INITW(67);
+ INITW(68);
+ INITW(69);
+
+ INITW(70);
+ INITW(71);
+ INITW(72);
+ INITW(73);
+ INITW(74);
+ INITW(75);
+ INITW(76);
+ INITW(77);
+ INITW(78);
+ INITW(79);
+#endif
+ }
+#ifdef SHA512_TRACE
+ {
+ int i;
+ for (i = 0; i < 80; ++i) {
+#ifdef HAVE_LONG_LONG
+ printf("W[%2d] = %016lx\n", i, W[i]);
+#else
+ printf("W[%2d] = %08x%08x\n", i, W[i].hi, W[i].lo);
+#endif
+ }
+ }
+#endif
+ {
+ PRUint64 a, b, c, d, e, f, g, h;
+
+ a = H[0];
+ b = H[1];
+ c = H[2];
+ d = H[3];
+ e = H[4];
+ f = H[5];
+ g = H[6];
+ h = H[7];
+
+#ifdef NOUNROLL512
+ {
+ int t;
+ for (t = 0; t < 80; t += 8) {
+ ROUND(t + 0, a, b, c, d, e, f, g, h)
+ ROUND(t + 1, h, a, b, c, d, e, f, g)
+ ROUND(t + 2, g, h, a, b, c, d, e, f)
+ ROUND(t + 3, f, g, h, a, b, c, d, e)
+ ROUND(t + 4, e, f, g, h, a, b, c, d)
+ ROUND(t + 5, d, e, f, g, h, a, b, c)
+ ROUND(t + 6, c, d, e, f, g, h, a, b)
+ ROUND(t + 7, b, c, d, e, f, g, h, a)
+ }
+ }
+#else
+ ROUND(0, a, b, c, d, e, f, g, h)
+ ROUND(1, h, a, b, c, d, e, f, g)
+ ROUND(2, g, h, a, b, c, d, e, f)
+ ROUND(3, f, g, h, a, b, c, d, e)
+ ROUND(4, e, f, g, h, a, b, c, d)
+ ROUND(5, d, e, f, g, h, a, b, c)
+ ROUND(6, c, d, e, f, g, h, a, b)
+ ROUND(7, b, c, d, e, f, g, h, a)
+
+ ROUND(8, a, b, c, d, e, f, g, h)
+ ROUND(9, h, a, b, c, d, e, f, g)
+ ROUND(10, g, h, a, b, c, d, e, f)
+ ROUND(11, f, g, h, a, b, c, d, e)
+ ROUND(12, e, f, g, h, a, b, c, d)
+ ROUND(13, d, e, f, g, h, a, b, c)
+ ROUND(14, c, d, e, f, g, h, a, b)
+ ROUND(15, b, c, d, e, f, g, h, a)
+
+ ROUND(16, a, b, c, d, e, f, g, h)
+ ROUND(17, h, a, b, c, d, e, f, g)
+ ROUND(18, g, h, a, b, c, d, e, f)
+ ROUND(19, f, g, h, a, b, c, d, e)
+ ROUND(20, e, f, g, h, a, b, c, d)
+ ROUND(21, d, e, f, g, h, a, b, c)
+ ROUND(22, c, d, e, f, g, h, a, b)
+ ROUND(23, b, c, d, e, f, g, h, a)
+
+ ROUND(24, a, b, c, d, e, f, g, h)
+ ROUND(25, h, a, b, c, d, e, f, g)
+ ROUND(26, g, h, a, b, c, d, e, f)
+ ROUND(27, f, g, h, a, b, c, d, e)
+ ROUND(28, e, f, g, h, a, b, c, d)
+ ROUND(29, d, e, f, g, h, a, b, c)
+ ROUND(30, c, d, e, f, g, h, a, b)
+ ROUND(31, b, c, d, e, f, g, h, a)
+
+ ROUND(32, a, b, c, d, e, f, g, h)
+ ROUND(33, h, a, b, c, d, e, f, g)
+ ROUND(34, g, h, a, b, c, d, e, f)
+ ROUND(35, f, g, h, a, b, c, d, e)
+ ROUND(36, e, f, g, h, a, b, c, d)
+ ROUND(37, d, e, f, g, h, a, b, c)
+ ROUND(38, c, d, e, f, g, h, a, b)
+ ROUND(39, b, c, d, e, f, g, h, a)
+
+ ROUND(40, a, b, c, d, e, f, g, h)
+ ROUND(41, h, a, b, c, d, e, f, g)
+ ROUND(42, g, h, a, b, c, d, e, f)
+ ROUND(43, f, g, h, a, b, c, d, e)
+ ROUND(44, e, f, g, h, a, b, c, d)
+ ROUND(45, d, e, f, g, h, a, b, c)
+ ROUND(46, c, d, e, f, g, h, a, b)
+ ROUND(47, b, c, d, e, f, g, h, a)
+
+ ROUND(48, a, b, c, d, e, f, g, h)
+ ROUND(49, h, a, b, c, d, e, f, g)
+ ROUND(50, g, h, a, b, c, d, e, f)
+ ROUND(51, f, g, h, a, b, c, d, e)
+ ROUND(52, e, f, g, h, a, b, c, d)
+ ROUND(53, d, e, f, g, h, a, b, c)
+ ROUND(54, c, d, e, f, g, h, a, b)
+ ROUND(55, b, c, d, e, f, g, h, a)
+
+ ROUND(56, a, b, c, d, e, f, g, h)
+ ROUND(57, h, a, b, c, d, e, f, g)
+ ROUND(58, g, h, a, b, c, d, e, f)
+ ROUND(59, f, g, h, a, b, c, d, e)
+ ROUND(60, e, f, g, h, a, b, c, d)
+ ROUND(61, d, e, f, g, h, a, b, c)
+ ROUND(62, c, d, e, f, g, h, a, b)
+ ROUND(63, b, c, d, e, f, g, h, a)
+
+ ROUND(64, a, b, c, d, e, f, g, h)
+ ROUND(65, h, a, b, c, d, e, f, g)
+ ROUND(66, g, h, a, b, c, d, e, f)
+ ROUND(67, f, g, h, a, b, c, d, e)
+ ROUND(68, e, f, g, h, a, b, c, d)
+ ROUND(69, d, e, f, g, h, a, b, c)
+ ROUND(70, c, d, e, f, g, h, a, b)
+ ROUND(71, b, c, d, e, f, g, h, a)
+
+ ROUND(72, a, b, c, d, e, f, g, h)
+ ROUND(73, h, a, b, c, d, e, f, g)
+ ROUND(74, g, h, a, b, c, d, e, f)
+ ROUND(75, f, g, h, a, b, c, d, e)
+ ROUND(76, e, f, g, h, a, b, c, d)
+ ROUND(77, d, e, f, g, h, a, b, c)
+ ROUND(78, c, d, e, f, g, h, a, b)
+ ROUND(79, b, c, d, e, f, g, h, a)
+#endif
+
+ ADDTO(a, H[0]);
+ ADDTO(b, H[1]);
+ ADDTO(c, H[2]);
+ ADDTO(d, H[3]);
+ ADDTO(e, H[4]);
+ ADDTO(f, H[5]);
+ ADDTO(g, H[6]);
+ ADDTO(h, H[7]);
+ }
+
+#endif /* !USE_PPC_CRYPTO */
+}
+
+void
+SHA512_Update(SHA512Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ unsigned int inBuf;
+ if (!inputLen)
+ return;
+
+#if defined(HAVE_LONG_LONG)
+ inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+ /* Add inputLen into the count of bytes processed, before processing */
+ ctx->sizeLo += inputLen;
+#else
+ inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+ ctx->sizeLo.lo += inputLen;
+ if (ctx->sizeLo.lo < inputLen)
+ ctx->sizeLo.hi++;
+#endif
+
+ /* if data already in buffer, attemp to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA512_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo)
+ todo = inputLen;
+ memcpy(B + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA512_BLOCK_LENGTH)
+ SHA512_Compress(ctx);
+ }
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA512_BLOCK_LENGTH) {
+ memcpy(B, input, SHA512_BLOCK_LENGTH);
+ input += SHA512_BLOCK_LENGTH;
+ inputLen -= SHA512_BLOCK_LENGTH;
+ SHA512_Compress(ctx);
+ }
+ /* if data left over, fill it into buffer */
+ if (inputLen)
+ memcpy(B, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#if defined(HAVE_LONG_LONG)
+ unsigned int inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+#else
+ unsigned int inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+#endif
+ unsigned int padLen = (inBuf < 112) ? (112 - inBuf) : (112 + 128 - inBuf);
+ PRUint64 lo;
+ LL_SHL(lo, ctx->sizeLo, 3);
+
+ SHA512_Update(ctx, pad, padLen);
+
+#if defined(HAVE_LONG_LONG)
+ W[14] = 0;
+#else
+ W[14].lo = 0;
+ W[14].hi = 0;
+#endif
+
+ W[15] = lo;
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(W[15]);
+#endif
+ SHA512_Compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(H[0]);
+ BYTESWAP8(H[1]);
+ BYTESWAP8(H[2]);
+ BYTESWAP8(H[3]);
+ BYTESWAP8(H[4]);
+ BYTESWAP8(H[5]);
+ BYTESWAP8(H[6]);
+ BYTESWAP8(H[7]);
+#endif
+ padLen = PR_MIN(SHA512_LENGTH, maxDigestLen);
+ memcpy(digest, H, padLen);
+ if (digestLen)
+ *digestLen = padLen;
+}
+
+void
+SHA512_EndRaw(SHA512Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint64 h[8];
+ unsigned int len;
+
+ memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(h[0]);
+ BYTESWAP8(h[1]);
+ BYTESWAP8(h[2]);
+ BYTESWAP8(h[3]);
+ BYTESWAP8(h[4]);
+ BYTESWAP8(h[5]);
+ BYTESWAP8(h[6]);
+ BYTESWAP8(h[7]);
+#endif
+ len = PR_MIN(SHA512_LENGTH, maxDigestLen);
+ memcpy(digest, h, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA512Context ctx;
+ unsigned int outLen;
+
+ SHA512_Begin(&ctx);
+ SHA512_Update(&ctx, src, src_length);
+ SHA512_End(&ctx, dest, &outLen, SHA512_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+ return SHA512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA512_TraceState(SHA512Context *ctx)
+{
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *ctx)
+{
+ return sizeof *ctx;
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *ctx, unsigned char *space)
+{
+ PORT_Memcpy(space, ctx, sizeof *ctx);
+ return SECSuccess;
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+ SHA512Context *ctx = SHA512_NewContext();
+ if (ctx)
+ PORT_Memcpy(ctx, space, sizeof *ctx);
+ return ctx;
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+/* SHA384 uses a SHA512Context as the real context.
+** The only differences between SHA384 an SHA512 are:
+** a) the intialization values for the context, and
+** b) the number of bytes of data produced as output.
+*/
+
+/* SHA-384 initial hash values */
+static const PRUint64 H384[8] = {
+#if PR_BYTES_PER_LONG == 8
+ 0xcbbb9d5dc1059ed8UL, 0x629a292a367cd507UL,
+ 0x9159015a3070dd17UL, 0x152fecd8f70e5939UL,
+ 0x67332667ffc00b31UL, 0x8eb44a8768581511UL,
+ 0xdb0c2e0d64f98fa7UL, 0x47b5481dbefa4fa4UL
+#else
+ ULLC(cbbb9d5d, c1059ed8), ULLC(629a292a, 367cd507),
+ ULLC(9159015a, 3070dd17), ULLC(152fecd8, f70e5939),
+ ULLC(67332667, ffc00b31), ULLC(8eb44a87, 68581511),
+ ULLC(db0c2e0d, 64f98fa7), ULLC(47b5481d, befa4fa4)
+#endif
+};
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+ return SHA512_NewContext();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *ctx, PRBool freeit)
+{
+ SHA512_DestroyContext(ctx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H384, sizeof H384);
+}
+
+void
+SHA384_Update(SHA384Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ SHA512_Update(ctx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+ SHA512_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA384_EndRaw(SHA384Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+ SHA512_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA512Context ctx;
+ unsigned int outLen;
+
+ SHA384_Begin(&ctx);
+ SHA512_Update(&ctx, src, src_length);
+ SHA512_End(&ctx, dest, &outLen, SHA384_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+ return SHA384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA384_TraceState(SHA384Context *ctx)
+{
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *ctx)
+{
+ return sizeof(SHA384Context);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *ctx, unsigned char *space)
+{
+ return SHA512_Flatten(ctx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+ return SHA512_Resurrect(space, arg);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+#ifdef SELFTEST
+#include <stdio.h>
+
+static const char abc[] = { "abc" };
+static const char abcdbc[] = {
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+};
+static const char abcdef[] = {
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
+};
+
+void
+dumpHash32(const unsigned char *buf, unsigned int bufLen)
+{
+ unsigned int i;
+ for (i = 0; i < bufLen; i += 4) {
+ printf(" %02x%02x%02x%02x", buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
+ }
+ printf("\n");
+}
+
+void
+test256(void)
+{
+ unsigned char outBuf[SHA256_LENGTH];
+
+ printf("SHA256, input = %s\n", abc);
+ SHA256_Hash(outBuf, abc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ printf("SHA256, input = %s\n", abcdbc);
+ SHA256_Hash(outBuf, abcdbc);
+ dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+test224(void)
+{
+ SHA224Context ctx;
+ unsigned char a1000times[1000];
+ unsigned int outLen;
+ unsigned char outBuf[SHA224_LENGTH];
+ int i;
+
+ /* Test Vector 1 */
+ printf("SHA224, input = %s\n", abc);
+ SHA224_Hash(outBuf, abc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ /* Test Vector 2 */
+ printf("SHA224, input = %s\n", abcdbc);
+ SHA224_Hash(outBuf, abcdbc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ /* Test Vector 3 */
+
+ /* to hash one million 'a's perform 1000
+ * sha224 updates on a buffer with 1000 'a's
+ */
+ memset(a1000times, 'a', 1000);
+ printf("SHA224, input = %s\n", "a one million times");
+ SHA224_Begin(&ctx);
+ for (i = 0; i < 1000; i++)
+ SHA224_Update(&ctx, a1000times, 1000);
+ SHA224_End(&ctx, outBuf, &outLen, SHA224_LENGTH);
+ dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+dumpHash64(const unsigned char *buf, unsigned int bufLen)
+{
+ unsigned int i;
+ for (i = 0; i < bufLen; i += 8) {
+ if (i % 32 == 0)
+ printf("\n");
+ printf(" %02x%02x%02x%02x%02x%02x%02x%02x",
+ buf[i], buf[i + 1], buf[i + 2], buf[i + 3],
+ buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+ }
+ printf("\n");
+}
+
+void
+test512(void)
+{
+ unsigned char outBuf[SHA512_LENGTH];
+
+ printf("SHA512, input = %s\n", abc);
+ SHA512_Hash(outBuf, abc);
+ dumpHash64(outBuf, sizeof outBuf);
+
+ printf("SHA512, input = %s\n", abcdef);
+ SHA512_Hash(outBuf, abcdef);
+ dumpHash64(outBuf, sizeof outBuf);
+}
+
+void
+time512(void)
+{
+ unsigned char outBuf[SHA512_LENGTH];
+
+ SHA512_Hash(outBuf, abc);
+ SHA512_Hash(outBuf, abcdef);
+}
+
+void
+test384(void)
+{
+ unsigned char outBuf[SHA384_LENGTH];
+
+ printf("SHA384, input = %s\n", abc);
+ SHA384_Hash(outBuf, abc);
+ dumpHash64(outBuf, sizeof outBuf);
+
+ printf("SHA384, input = %s\n", abcdef);
+ SHA384_Hash(outBuf, abcdef);
+ dumpHash64(outBuf, sizeof outBuf);
+}
+
+int
+main(int argc, char *argv[], char *envp[])
+{
+ int i = 1;
+ if (argc > 1) {
+ i = atoi(argv[1]);
+ }
+ if (i < 2) {
+ test224();
+ test256();
+ test384();
+ test512();
+ } else {
+ while (i-- > 0) {
+ time512();
+ }
+ printf("done\n");
+ }
+ return 0;
+}
+
+void *
+PORT_Alloc(size_t len)
+{
+ return malloc(len);
+}
+void
+PORT_Free(void *ptr)
+{
+ free(ptr);
+}
+void
+PORT_ZFree(void *ptr, size_t len)
+{
+ memset(ptr, 0, len);
+ free(ptr);
+}
+#endif
diff --git a/security/nss/lib/freebl/sha_fast.c b/security/nss/lib/freebl/sha_fast.c
new file mode 100644
index 0000000000..2a8ac576c1
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.c
@@ -0,0 +1,592 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+#include "prerror.h"
+#include "secerr.h"
+
+#ifdef TRACING_SSL
+#include "ssl.h"
+#include "ssltrace.h"
+#endif
+
+static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
+
+#define W u.w
+#define B u.b
+
+#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
+#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
+
+#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
+
+void SHA1_Compress_Native(SHA1Context *ctx);
+void SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+static void SHA1_Compress_Generic(SHA1Context *ctx);
+static void SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len);
+
+#ifndef USE_HW_SHA1
+void
+SHA1_Compress_Native(SHA1Context *ctx)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+
+void
+SHA1_Update_Native(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ PORT_Assert(0);
+}
+#endif
+
+/*
+ * SHA: initialize context
+ */
+void
+SHA1_Begin(SHA1Context *ctx)
+{
+ ctx->size = 0;
+ /*
+ * Initialize H with constants from FIPS180-1.
+ */
+ ctx->H[0] = 0x67452301L;
+ ctx->H[1] = 0xefcdab89L;
+ ctx->H[2] = 0x98badcfeL;
+ ctx->H[3] = 0x10325476L;
+ ctx->H[4] = 0xc3d2e1f0L;
+
+#if defined(USE_HW_SHA1) && defined(IS_LITTLE_ENDIAN)
+ /* arm's implementation is tested on little endian only */
+ if (arm_sha1_support()) {
+ ctx->compress = SHA1_Compress_Native;
+ ctx->update = SHA1_Update_Native;
+ } else
+#endif
+ {
+ ctx->compress = SHA1_Compress_Generic;
+ ctx->update = SHA1_Update_Generic;
+ }
+}
+
+/* Explanation of H array and index values:
+ * The context's H array is actually the concatenation of two arrays
+ * defined by SHA1, the H array of state variables (5 elements),
+ * and the W array of intermediate values, of which there are 16 elements.
+ * The W array starts at H[5], that is W[0] is H[5].
+ * Although these values are defined as 32-bit values, we use 64-bit
+ * variables to hold them because the AMD64 stores 64 bit values in
+ * memory MUCH faster than it stores any smaller values.
+ *
+ * Rather than passing the context structure to shaCompress, we pass
+ * this combined array of H and W values. We do not pass the address
+ * of the first element of this array, but rather pass the address of an
+ * element in the middle of the array, element X. Presently X[0] is H[11].
+ * So we pass the address of H[11] as the address of array X to shaCompress.
+ * Then shaCompress accesses the members of the array using positive AND
+ * negative indexes.
+ *
+ * Pictorially: (each element is 8 bytes)
+ * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
+ * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
+ *
+ * The byte offset from X[0] to any member of H and W is always
+ * representable in a signed 8-bit value, which will be encoded
+ * as a single byte offset in the X86-64 instruction set.
+ * If we didn't pass the address of H[11], and instead passed the
+ * address of H[0], the offsets to elements H[16] and above would be
+ * greater than 127, not representable in a signed 8-bit value, and the
+ * x86-64 instruction set would encode every such offset as a 32-bit
+ * signed number in each instruction that accessed element H[16] or
+ * higher. This results in much bigger and slower code.
+ */
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
+#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */
+#else
+#define H2X 0
+#endif
+
+/*
+ * SHA: Add data to context.
+ */
+void
+SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ ctx->update(ctx, dataIn, len);
+}
+
+static void
+SHA1_Update_Generic(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ register unsigned int lenB;
+ register unsigned int togo;
+
+ if (!len)
+ return;
+
+ /* accumulate the byte count. */
+ lenB = (unsigned int)(ctx->size) & 63U;
+
+ ctx->size += len;
+
+ /*
+ * Read the data into W and process blocks as they get full
+ */
+ if (lenB > 0) {
+ togo = 64U - lenB;
+ if (len < togo)
+ togo = len;
+ memcpy(ctx->B + lenB, dataIn, togo);
+ len -= togo;
+ dataIn += togo;
+ lenB = (lenB + togo) & 63U;
+ if (!lenB) {
+ shaCompress(&ctx->H[H2X], ctx->W);
+ }
+ }
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ if ((ptrdiff_t)dataIn % sizeof(PRUint32)) {
+ while (len >= 64U) {
+ memcpy(ctx->B, dataIn, 64);
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], ctx->W);
+ dataIn += 64U;
+ }
+ } else
+#endif
+ {
+ while (len >= 64U) {
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+ dataIn += 64U;
+ }
+ }
+ if (len) {
+ memcpy(ctx->B, dataIn, len);
+ }
+}
+
+/*
+ * SHA: Generate hash value from context
+ */
+void NO_SANITIZE_ALIGNMENT
+SHA1_End(SHA1Context *ctx, unsigned char *hashout,
+ unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+ register PRUint64 size;
+ register PRUint32 lenB;
+
+ static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+#define tmp lenB
+
+ PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+ /*
+ * Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits
+ */
+ size = ctx->size;
+
+ lenB = (PRUint32)size & 63;
+ SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1);
+ PORT_Assert(((PRUint32)ctx->size & 63) == 56);
+ /* Convert size from bytes to bits. */
+ size <<= 3;
+ ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
+ ctx->W[15] = SHA_HTONL((PRUint32)size);
+ ctx->compress(ctx);
+
+ /*
+ * Output hash
+ */
+ SHA_STORE_RESULT;
+ if (pDigestLen) {
+ *pDigestLen = SHA1_LENGTH;
+ }
+#undef tmp
+}
+
+void
+SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout,
+ unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+#if defined(SHA_NEED_TMP_VARIABLE)
+ register PRUint32 tmp;
+#endif
+ PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+ SHA_STORE_RESULT;
+ if (pDigestLen)
+ *pDigestLen = SHA1_LENGTH;
+}
+
+#undef B
+/*
+ * SHA: Compression function, unrolled.
+ *
+ * Some operations in shaCompress are done as 5 groups of 16 operations.
+ * Others are done as 4 groups of 20 operations.
+ * The code below shows that structure.
+ *
+ * The functions that compute the new values of the 5 state variables
+ * A-E are done in 4 groups of 20 operations (or you may also think
+ * of them as being done in 16 groups of 5 operations). They are
+ * done by the SHA_RNDx macros below, in the right column.
+ *
+ * The functions that set the 16 values of the W array are done in
+ * 5 groups of 16 operations. The first group is done by the
+ * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
+ * in the left column.
+ *
+ * gcc's optimizer observes that each member of the W array is assigned
+ * a value 5 times in this code. It reduces the number of store
+ * operations done to the W array in the context (that is, in the X array)
+ * by creating a W array on the stack, and storing the W values there for
+ * the first 4 groups of operations on W, and storing the values in the
+ * context's W array only in the fifth group. This is undesirable.
+ * It is MUCH bigger code than simply using the context's W array, because
+ * all the offsets to the W array in the stack are 32-bit signed offsets,
+ * and it is no faster than storing the values in the context's W array.
+ *
+ * The original code for sha_fast.c prevented this creation of a separate
+ * W array in the stack by creating a W array of 80 members, each of
+ * whose elements is assigned only once. It also separated the computations
+ * of the W array values and the computations of the values for the 5
+ * state variables into two separate passes, W's, then A-E's so that the
+ * second pass could be done all in registers (except for accessing the W
+ * array) on machines with fewer registers. The method is suboptimal
+ * for machines with enough registers to do it all in one pass, and it
+ * necessitates using many instructions with 32-bit offsets.
+ *
+ * This code eliminates the separate W array on the stack by a completely
+ * different means: by declaring the X array volatile. This prevents
+ * the optimizer from trying to reduce the use of the X array by the
+ * creation of a MORE expensive W array on the stack. The result is
+ * that all instructions use signed 8-bit offsets and not 32-bit offsets.
+ *
+ * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
+ * results in code that is 3 times faster than the previous NSS sha_fast
+ * code on AMD64.
+ */
+static void NO_SANITIZE_ALIGNMENT
+shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
+{
+ register SHA_HW_t A, B, C, D, E;
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+ register PRUint32 tmp;
+#endif
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define XH(n) X[n - H2X]
+#define XW(n) X[n - W2X]
+#else
+ SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7,
+ w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+#define XW(n) w_##n
+#define XH(n) X[n]
+#endif
+
+#define K0 0x5a827999L
+#define K1 0x6ed9eba1L
+#define K2 0x8f1bbcdcL
+#define K3 0xca62c1d6L
+
+#define SHA_RND1(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND2(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND3(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND4(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
+ c = SHA_ROTL(c, 30)
+
+#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n])
+
+ A = XH(0);
+ B = XH(1);
+ C = XH(2);
+ D = XH(3);
+ E = XH(4);
+
+ LOAD(0);
+ SHA_RND1(E, A, B, C, D, 0);
+ LOAD(1);
+ SHA_RND1(D, E, A, B, C, 1);
+ LOAD(2);
+ SHA_RND1(C, D, E, A, B, 2);
+ LOAD(3);
+ SHA_RND1(B, C, D, E, A, 3);
+ LOAD(4);
+ SHA_RND1(A, B, C, D, E, 4);
+ LOAD(5);
+ SHA_RND1(E, A, B, C, D, 5);
+ LOAD(6);
+ SHA_RND1(D, E, A, B, C, 6);
+ LOAD(7);
+ SHA_RND1(C, D, E, A, B, 7);
+ LOAD(8);
+ SHA_RND1(B, C, D, E, A, 8);
+ LOAD(9);
+ SHA_RND1(A, B, C, D, E, 9);
+ LOAD(10);
+ SHA_RND1(E, A, B, C, D, 10);
+ LOAD(11);
+ SHA_RND1(D, E, A, B, C, 11);
+ LOAD(12);
+ SHA_RND1(C, D, E, A, B, 12);
+ LOAD(13);
+ SHA_RND1(B, C, D, E, A, 13);
+ LOAD(14);
+ SHA_RND1(A, B, C, D, E, 14);
+ LOAD(15);
+ SHA_RND1(E, A, B, C, D, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND1(D, E, A, B, C, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND1(C, D, E, A, B, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND1(B, C, D, E, A, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND1(A, B, C, D, E, 3);
+
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND2(E, A, B, C, D, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND2(D, E, A, B, C, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND2(C, D, E, A, B, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND2(B, C, D, E, A, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND2(A, B, C, D, E, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND2(E, A, B, C, D, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND2(D, E, A, B, C, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND2(C, D, E, A, B, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND2(B, C, D, E, A, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND2(A, B, C, D, E, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND2(E, A, B, C, D, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND2(D, E, A, B, C, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND2(C, D, E, A, B, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND2(B, C, D, E, A, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND2(A, B, C, D, E, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND2(E, A, B, C, D, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND2(D, E, A, B, C, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND2(C, D, E, A, B, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND2(B, C, D, E, A, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND2(A, B, C, D, E, 7);
+
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND3(E, A, B, C, D, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND3(D, E, A, B, C, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND3(C, D, E, A, B, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND3(B, C, D, E, A, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND3(A, B, C, D, E, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND3(E, A, B, C, D, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND3(D, E, A, B, C, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND3(C, D, E, A, B, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND3(B, C, D, E, A, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND3(A, B, C, D, E, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND3(E, A, B, C, D, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND3(D, E, A, B, C, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND3(C, D, E, A, B, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND3(B, C, D, E, A, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND3(A, B, C, D, E, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND3(E, A, B, C, D, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND3(D, E, A, B, C, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND3(C, D, E, A, B, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND3(B, C, D, E, A, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND3(A, B, C, D, E, 11);
+
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND4(E, A, B, C, D, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND4(D, E, A, B, C, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND4(C, D, E, A, B, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND4(B, C, D, E, A, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND4(A, B, C, D, E, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND4(E, A, B, C, D, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND4(D, E, A, B, C, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND4(C, D, E, A, B, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND4(B, C, D, E, A, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND4(A, B, C, D, E, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND4(E, A, B, C, D, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND4(D, E, A, B, C, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND4(C, D, E, A, B, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND4(B, C, D, E, A, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND4(A, B, C, D, E, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND4(E, A, B, C, D, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND4(D, E, A, B, C, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND4(C, D, E, A, B, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND4(B, C, D, E, A, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND4(A, B, C, D, E, 15);
+
+ XH(0) += A;
+ XH(1) += B;
+ XH(2) += C;
+ XH(3) += D;
+ XH(4) += E;
+}
+
+static void
+SHA1_Compress_Generic(SHA1Context *ctx)
+{
+ shaCompress(&ctx->H[H2X], ctx->u.w);
+}
+
+/*************************************************************************
+** Code below this line added to make SHA code support BLAPI interface
+*/
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+ SHA1Context *cx;
+
+ /* no need to ZNew, SHA1_Begin will init the context */
+ cx = PORT_New(SHA1Context);
+ return cx;
+}
+
+/* Zero and free the context */
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ SHA1Context ctx;
+ unsigned int outLen;
+
+ SHA1_Begin(&ctx);
+ ctx.update(&ctx, src, src_length);
+ SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+ return SECSuccess;
+}
+
+/* Hash a null-terminated character string. */
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+ return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+/*
+ * need to support save/restore state in pkcs11. Stores all the info necessary
+ * for a structure into just a stream of bytes.
+ */
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+ return sizeof(SHA1Context);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+ PORT_Memcpy(space, cx, sizeof(SHA1Context));
+ return SECSuccess;
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+ SHA1Context *cx = SHA1_NewContext();
+ if (cx == NULL)
+ return NULL;
+
+ PORT_Memcpy(cx, space, sizeof(SHA1Context));
+ return cx;
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+void
+SHA1_TraceState(SHA1Context *ctx)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/sha_fast.h b/security/nss/lib/freebl/sha_fast.h
new file mode 100644
index 0000000000..c03c0637a3
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.h
@@ -0,0 +1,186 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_FAST_H_
+#define _SHA_FAST_H_
+
+#include "prlong.h"
+#include "blapii.h"
+
+#define SHA1_INPUT_LEN 64
+
+#if defined(IS_64) && !defined(__sparc) && !defined(__aarch64__)
+typedef PRUint64 SHA_HW_t;
+#define SHA1_USING_64_BIT 1
+#else
+typedef PRUint32 SHA_HW_t;
+#endif
+
+struct SHA1ContextStr;
+
+typedef void (*sha1_compress_t)(struct SHA1ContextStr *);
+typedef void (*sha1_update_t)(struct SHA1ContextStr *, const unsigned char *,
+ unsigned int);
+
+struct SHA1ContextStr {
+ union {
+ PRUint32 w[16]; /* input buffer */
+ PRUint8 b[64];
+ } u;
+ PRUint64 size; /* count of hashed bytes. */
+ SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */
+ sha1_compress_t compress;
+ sha1_update_t update;
+};
+
+#if defined(_MSC_VER)
+#include <stdlib.h>
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif /* _MSC_VER */
+#endif /* !defined FORCEINLINE */
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+ __asm {
+ mov eax,dwd
+ bswap eax
+ }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+#endif /* NSS_X86_OR_X64 */
+#endif /* IS_LITTLE_ENDIAN */
+
+#pragma intrinsic(_lrotr, _lrotl)
+#define SHA_ROTL(x, n) _lrotl(x, n)
+#define SHA_ROTL_IS_DEFINED 1
+#endif /* _MSC_VER */
+
+#if defined(__GNUC__)
+/* __x86_64__ and __x86_64 are defined by GCC on x86_64 CPUs */
+#if defined(SHA1_USING_64_BIT)
+static __inline__ PRUint64
+SHA_ROTL(PRUint64 x, PRUint32 n)
+{
+ PRUint32 t = (PRUint32)x;
+ return ((t << n) | (t >> (32 - n)));
+}
+#else
+static __inline__ PRUint32
+SHA_ROTL(PRUint32 t, PRUint32 n)
+{
+ return ((t << n) | (t >> (32 - n)));
+}
+#endif
+#define SHA_ROTL_IS_DEFINED 1
+
+#if defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ __asm__("bswap %0"
+ : "+r"(value));
+ return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__)))
+#if defined(IS_LITTLE_ENDIAN)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 ret;
+ __asm__("rev %0, %1"
+ : "=r"(ret)
+ : "r"(value));
+ return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+#endif
+
+#endif /* x86 family */
+
+#endif /* __GNUC__ */
+
+#if !defined(SHA_ROTL_IS_DEFINED)
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_ROTL(X, n) (tmp = (X), ((tmp) << (n)) | ((tmp) >> (32 - (n))))
+#endif
+
+#if !defined(SHA_HTONL)
+#define SHA_MASK 0x00FF00FF
+#if defined(IS_LITTLE_ENDIAN)
+#undef SHA_NEED_TMP_VARIABLE
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_HTONL(x) (tmp = (x), tmp = (tmp << 16) | (tmp >> 16), \
+ ((tmp & SHA_MASK) << 8) | ((tmp >> 8) & SHA_MASK))
+#else
+#define SHA_HTONL(x) (x)
+#endif
+#endif
+
+#define SHA_BYTESWAP(x) x = SHA_HTONL(x)
+
+#define SHA_STORE(n) ((PRUint32 *)hashout)[n] = SHA_HTONL(ctx->H[n])
+#if defined(HAVE_UNALIGNED_ACCESS)
+#define SHA_STORE_RESULT \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4);
+
+#elif defined(IS_LITTLE_ENDIAN) || defined(SHA1_USING_64_BIT)
+#define SHA_STORE_RESULT \
+ if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4); \
+ } else { \
+ PRUint32 tmpbuf[5]; \
+ tmpbuf[0] = SHA_HTONL(ctx->H[0]); \
+ tmpbuf[1] = SHA_HTONL(ctx->H[1]); \
+ tmpbuf[2] = SHA_HTONL(ctx->H[2]); \
+ tmpbuf[3] = SHA_HTONL(ctx->H[3]); \
+ tmpbuf[4] = SHA_HTONL(ctx->H[4]); \
+ memcpy(hashout, tmpbuf, SHA1_LENGTH); \
+ }
+
+#else
+#define SHA_STORE_RESULT \
+ if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4); \
+ } else { \
+ memcpy(hashout, ctx->H, SHA1_LENGTH); \
+ }
+#endif
+
+#endif /* _SHA_FAST_H_ */
diff --git a/security/nss/lib/freebl/shake.c b/security/nss/lib/freebl/shake.c
new file mode 100644
index 0000000000..fe20cf6f2a
--- /dev/null
+++ b/security/nss/lib/freebl/shake.c
@@ -0,0 +1,128 @@
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prtypes.h" /* for PRUintXX */
+#include "secport.h" /* for PORT_XXX */
+#include "blapi.h"
+#include "blapii.h"
+#include "blapit.h"
+#include "secerr.h"
+#include "Hacl_Hash_SHA3.h"
+
+struct SHAKEContextStr {
+ Hacl_Streaming_Keccak_state *st;
+};
+
+SHAKE_128Context *
+SHAKE_128_NewContext()
+{
+ SHAKE_128Context *ctx = PORT_New(SHAKE_128Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_Shake128);
+ return ctx;
+}
+
+SHAKE_256Context *
+SHAKE_256_NewContext()
+{
+ SHAKE_256Context *ctx = PORT_New(SHAKE_256Context);
+ ctx->st = Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_Shake256);
+ return ctx;
+}
+
+void
+SHAKE_128_DestroyContext(SHAKE_128Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHAKE_256_DestroyContext(SHAKE_256Context *ctx, PRBool freeit)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+ if (freeit) {
+ Hacl_Streaming_Keccak_free(ctx->st);
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHAKE_128_Begin(SHAKE_128Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHAKE_256_Begin(SHAKE_256Context *ctx)
+{
+ Hacl_Streaming_Keccak_reset(ctx->st);
+}
+
+void
+SHAKE_128_Absorb(SHAKE_128Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHAKE_256_Absorb(SHAKE_256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ Hacl_Streaming_Keccak_update(ctx->st, (uint8_t *)input, inputLen);
+}
+
+void
+SHAKE_128_SqueezeEnd(SHAKE_128Context *ctx, unsigned char *digest,
+ unsigned int digestLen)
+{
+ Hacl_Streaming_Keccak_squeeze(ctx->st, digest, digestLen);
+}
+
+void
+SHAKE_256_SqueezeEnd(SHAKE_256Context *ctx, unsigned char *digest,
+ unsigned int digestLen)
+{
+ Hacl_Streaming_Keccak_squeeze(ctx->st, digest, digestLen);
+}
+
+SECStatus
+SHAKE_128_HashBuf(unsigned char *dest, PRUint32 dest_length,
+ const unsigned char *src, PRUint32 src_length)
+{
+ SHAKE_128Context *ctx = SHAKE_128_NewContext();
+ SHAKE_128_Begin(ctx);
+ SHAKE_128_Absorb(ctx, src, src_length);
+ SHAKE_128_SqueezeEnd(ctx, dest, dest_length);
+ SHAKE_128_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHAKE_256_HashBuf(unsigned char *dest, PRUint32 dest_length,
+ const unsigned char *src, PRUint32 src_length)
+{
+ SHAKE_256Context *ctx = SHAKE_256_NewContext();
+ SHAKE_256_Begin(ctx);
+ SHAKE_256_Absorb(ctx, src, src_length);
+ SHAKE_256_SqueezeEnd(ctx, dest, dest_length);
+ SHAKE_256_DestroyContext(ctx, true);
+ return SECSuccess;
+}
+
+SECStatus
+SHAKE_128_Hash(unsigned char *dest, unsigned int dest_length, const char *src)
+{
+ return SHAKE_128_HashBuf(dest, dest_length, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+SHAKE_256_Hash(unsigned char *dest, unsigned int dest_length, const char *src)
+{
+ return SHAKE_256_HashBuf(dest, dest_length, (const unsigned char *)src, PORT_Strlen(src));
+}
diff --git a/security/nss/lib/freebl/shsign.h b/security/nss/lib/freebl/shsign.h
new file mode 100644
index 0000000000..d1a595a391
--- /dev/null
+++ b/security/nss/lib/freebl/shsign.h
@@ -0,0 +1,26 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHSIGN_H_
+#define _SHSIGN_H_
+
+#define SGN_SUFFIX ".chk"
+#define NSS_SIGN_CHK_MAGIC1 0xf1
+#define NSS_SIGN_CHK_MAGIC2 0xc5
+/* new hmac based signatures */
+#define NSS_SIGN_CHK_MAJOR_VERSION 0x02
+#define NSS_SIGN_CHK_MINOR_VERSION 0x01
+#define NSS_SIGN_CHK_TYPE_FLAGS 0xff000000
+#define NSS_SIGN_CHK_FLAG_HMAC 0x80000000
+
+typedef struct NSSSignChkHeaderStr NSSSignChkHeader;
+struct NSSSignChkHeaderStr {
+ unsigned char magic1;
+ unsigned char magic2;
+ unsigned char majorVersion;
+ unsigned char minorVersion;
+ unsigned char offset[4];
+ unsigned char type[4];
+};
+#endif /* _SHSIGN_H_ */
diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c
new file mode 100644
index 0000000000..15fde72b56
--- /dev/null
+++ b/security/nss/lib/freebl/shvfy.c
@@ -0,0 +1,664 @@
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "shsign.h"
+#include "prlink.h"
+#include "prio.h"
+#include "blapi.h"
+#include "seccomon.h"
+#include "secerr.h"
+#include "stdio.h"
+#include "prmem.h"
+#include "hasht.h"
+#include "pqg.h"
+#include "blapii.h"
+#include "secitem.h"
+#include "pkcs11t.h"
+
+#ifndef NSS_FIPS_DISABLED
+
+/*
+ * Most modern version of Linux support a speed optimization scheme where an
+ * application called prelink modifies programs and shared libraries to quickly
+ * load if they fit into an already designed address space. In short, prelink
+ * scans the list of programs and libraries on your system, assigns them a
+ * predefined space in the the address space, then provides the fixups to the
+ * library.
+
+ * The modification of the shared library is correctly detected by the freebl
+ * FIPS checksum scheme where we check a signed hash of the library against the
+ * library itself.
+ *
+ * The prelink command itself can reverse the process of modification and
+ * output the prestine shared library as it was before prelink made it's
+ * changes. If FREEBL_USE_PRELINK is set Freebl uses prelink to output the
+ * original copy of the shared library before prelink modified it.
+ */
+#ifdef FREEBL_USE_PRELINK
+#ifndef FREELB_PRELINK_COMMAND
+#define FREEBL_PRELINK_COMMAND "/usr/sbin/prelink -u -o -"
+#endif
+#include "private/pprio.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+
+/*
+ * This function returns an NSPR PRFileDesc * which the caller can read to
+ * obtain the prestine value of the shared library, before any OS related
+ * changes to it (usually address fixups).
+ *
+ * If prelink is installed, this
+ * file descriptor is a pipe connecting the output of
+ * /usr/sbin/prelink -u -o - {Library}
+ * and *pid returns the process id of the prelink child.
+ *
+ * If prelink is not installed, it returns a normal readonly handle to the
+ * library itself and *pid is set to '0'.
+ */
+PRFileDesc *
+bl_OpenUnPrelink(const char *shName, int *pid)
+{
+ char *command = strdup(FREEBL_PRELINK_COMMAND);
+ char *argString = NULL;
+ char **argv = NULL;
+ char *shNameArg = NULL;
+ char *cp;
+ pid_t child;
+ int argc = 0, argNext = 0;
+ struct stat statBuf;
+ int pipefd[2] = { -1, -1 };
+ int ret;
+
+ *pid = 0;
+
+ /* make sure the prelink command exists first. If not, fall back to
+ * just reading the file */
+ for (cp = command; *cp; cp++) {
+ if (*cp == ' ') {
+ *cp++ = 0;
+ argString = cp;
+ break;
+ }
+ }
+ memset(&statBuf, 0, sizeof(statBuf));
+ /* stat the file, follow the link */
+ ret = stat(command, &statBuf);
+ if (ret < 0) {
+ free(command);
+ return PR_Open(shName, PR_RDONLY, 0);
+ }
+ /* file exits, make sure it's an executable */
+ if (!S_ISREG(statBuf.st_mode) ||
+ ((statBuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
+ free(command);
+ return PR_Open(shName, PR_RDONLY, 0);
+ }
+
+ /* OK, the prelink command exists and looks correct, use it */
+ /* build the arglist while we can still malloc */
+ /* count the args if any */
+ if (argString && *argString) {
+ /* argString may have leading spaces, strip them off*/
+ for (cp = argString; *cp && *cp == ' '; cp++)
+ ;
+ argString = cp;
+ if (*cp) {
+ /* there is at least one arg.. */
+ argc = 1;
+ }
+
+ /* count the rest: Note there is no provision for escaped
+ * spaces here */
+ for (cp = argString; *cp; cp++) {
+ if (*cp == ' ') {
+ while (*cp && *cp == ' ')
+ cp++;
+ if (*cp)
+ argc++;
+ }
+ }
+ }
+
+ /* add the additional args: argv[0] (command), shName, NULL*/
+ argc += 3;
+ argv = PORT_NewArray(char *, argc);
+ if (argv == NULL) {
+ goto loser;
+ }
+
+ /* fill in the arglist */
+ argv[argNext++] = command;
+ if (argString && *argString) {
+ argv[argNext++] = argString;
+ for (cp = argString; *cp; cp++) {
+ if (*cp == ' ') {
+ *cp++ = 0;
+ while (*cp && *cp == ' ')
+ cp++;
+ if (*cp)
+ argv[argNext++] = cp;
+ }
+ }
+ }
+ /* exec doesn't advertise taking const char **argv, do the paranoid
+ * copy */
+ shNameArg = strdup(shName);
+ if (shNameArg == NULL) {
+ goto loser;
+ }
+ argv[argNext++] = shNameArg;
+ argv[argNext++] = 0;
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ goto loser;
+ }
+
+ /* use vfork() so we don't trigger the pthread_at_fork() handlers */
+ child = vfork();
+ if (child < 0)
+ goto loser;
+ if (child == 0) {
+ /* set up the file descriptors */
+ /* if we need to support BSD, this will need to be an open of
+ * /dev/null and dup2(nullFD, 0)*/
+ close(0);
+ /* associate pipefd[1] with stdout */
+ if (pipefd[1] != 1)
+ dup2(pipefd[1], 1);
+ close(2);
+ close(pipefd[0]);
+ /* should probably close the other file descriptors? */
+
+ execv(command, argv);
+ /* avoid at_exit() handlers */
+ _exit(1); /* shouldn't reach here except on an error */
+ }
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+ /* this is safe because either vfork() as full fork() semantics, and thus
+ * already has it's own address space, or because vfork() has paused
+ * the parent util the exec or exit */
+ free(command);
+ free(shNameArg);
+ PORT_Free(argv);
+
+ *pid = child;
+
+ return PR_ImportPipe(pipefd[0]);
+
+loser:
+ if (pipefd[0] != -1) {
+ close(pipefd[0]);
+ }
+ if (pipefd[1] != -1) {
+ close(pipefd[1]);
+ }
+ free(command);
+ free(shNameArg);
+ PORT_Free(argv);
+
+ return NULL;
+}
+
+/*
+ * bl_CloseUnPrelink -
+ *
+ * This closes the file descripter and reaps and children openned and crated by
+ * b;_OpenUnprelink. It's primary difference between it and just close is
+ * that it calls wait on the pid if one is supplied, preventing zombie children
+ * from hanging around.
+ */
+void
+bl_CloseUnPrelink(PRFileDesc *file, int pid)
+{
+ /* close the file descriptor */
+ PR_Close(file);
+ /* reap the child */
+ if (pid) {
+ waitpid(pid, NULL, 0);
+ }
+}
+#endif
+
+/* #define DEBUG_SHVERIFY 1 */
+
+static char *
+mkCheckFileName(const char *libName)
+{
+ int ln_len = PORT_Strlen(libName);
+ int index = ln_len + 1 - sizeof("." SHLIB_SUFFIX);
+ char *output = PORT_Alloc(ln_len + sizeof(SGN_SUFFIX));
+ if (!output) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return NULL;
+ }
+
+ if ((index > 0) &&
+ (PORT_Strncmp(&libName[index],
+ "." SHLIB_SUFFIX, sizeof("." SHLIB_SUFFIX)) == 0)) {
+ ln_len = index;
+ }
+ PORT_Memcpy(output, libName, ln_len);
+ PORT_Memcpy(&output[ln_len], SGN_SUFFIX, sizeof(SGN_SUFFIX));
+ return output;
+}
+
+static int
+decodeInt(unsigned char *buf)
+{
+ return (buf[3]) | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24);
+}
+
+static SECStatus
+readItem(PRFileDesc *fd, SECItem *item)
+{
+ unsigned char buf[4];
+ int bytesRead;
+
+ bytesRead = PR_Read(fd, buf, 4);
+ if (bytesRead != 4) {
+ return SECFailure;
+ }
+ item->len = decodeInt(buf);
+
+ item->data = PORT_Alloc(item->len);
+ if (item->data == NULL) {
+ item->len = 0;
+ return SECFailure;
+ }
+ bytesRead = PR_Read(fd, item->data, item->len);
+ if (bytesRead != item->len) {
+ PORT_Free(item->data);
+ item->data = NULL;
+ item->len = 0;
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static PRBool blapi_SHVerifyFile(const char *shName, PRBool self, PRBool rerun);
+
+static PRBool
+blapi_SHVerify(const char *name, PRFuncPtr addr, PRBool self, PRBool rerun)
+{
+ PRBool result = PR_FALSE; /* if anything goes wrong,
+ * the signature does not verify */
+ /* find our shared library name */
+ char *shName = PR_GetLibraryFilePathname(name, addr);
+ if (!shName) {
+ goto loser;
+ }
+ result = blapi_SHVerifyFile(shName, self, rerun);
+
+loser:
+ if (shName != NULL) {
+ PR_Free(shName);
+ }
+
+ return result;
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+ PRBool rerun = PR_FALSE;
+ if (name && *name == BLAPI_FIPS_RERUN_FLAG) {
+ name++;
+ rerun = PR_TRUE;
+ }
+ return blapi_SHVerify(name, addr, PR_FALSE, rerun);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *shName)
+{
+ PRBool rerun = PR_FALSE;
+ if (shName && *shName == BLAPI_FIPS_RERUN_FLAG) {
+ shName++;
+ rerun = PR_TRUE;
+ }
+ return blapi_SHVerifyFile(shName, PR_FALSE, rerun);
+}
+
+#ifndef NSS_STRICT_INTEGRITY
+/* This allows checks with old shlibsign .chk files. If NSS_STRICT_INTEGRITY
+ * is set, we don't accept DSA */
+static PRBool
+blapi_SHVerifyDSACheck(PRFileDesc *shFD, const SECHashObject *hashObj,
+ DSAPublicKey *key, const SECItem *signature)
+{
+ void *hashcx = NULL;
+ SECItem hash;
+ int bytesRead;
+ unsigned char hashBuf[HASH_LENGTH_MAX];
+ unsigned char buf[4096];
+ SECStatus rv;
+
+ hash.type = siBuffer;
+ hash.data = hashBuf;
+ hash.len = sizeof(hashBuf);
+
+ /* hash our library file */
+ hashcx = hashObj->create();
+ if (hashcx == NULL) {
+ return PR_FALSE;
+ }
+ hashObj->begin(hashcx);
+
+ while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) {
+ hashObj->update(hashcx, buf, bytesRead);
+ }
+ hashObj->end(hashcx, hash.data, &hash.len, hash.len);
+ hashObj->destroy(hashcx, PR_TRUE);
+
+ /* verify the hash against the check file */
+ rv = DSA_VerifyDigest(key, signature, &hash);
+ PORT_Memset(hashBuf, 0, sizeof hashBuf);
+ return (rv == SECSuccess) ? PR_TRUE : PR_FALSE;
+}
+#endif
+
+#ifdef NSS_STRICT_INTEGRITY
+/* don't allow MD2, MD5, SHA1 or SHA224 as your integrity hash */
+static PRBool
+blapi_HashAllowed(SECHashObject *hashObj)
+{
+ switch (hashObj->type) {
+ case HASH_AlgSHA256:
+ case HASH_AlgSHA384:
+ case HASH_AlgSHA512:
+ return PR_TRUE;
+ default:
+ break;
+ }
+ return PR_FALSE;
+}
+#endif
+
+static PRBool
+blapi_SHVerifyHMACCheck(PRFileDesc *shFD, const SECHashObject *hashObj,
+ const SECItem *key, const SECItem *signature)
+{
+ HMACContext *hmaccx = NULL;
+ SECItem hash;
+ int bytesRead;
+ unsigned char hashBuf[HASH_LENGTH_MAX];
+ unsigned char buf[4096];
+ SECStatus rv;
+ PRBool result = PR_FALSE;
+
+#ifdef NSS_STRICT_INTEGRITY
+ if (!blapi_HashAllowed(hashObj)) {
+ return PR_FALSE;
+ }
+#endif
+
+ hash.type = siBuffer;
+ hash.data = hashBuf;
+ hash.len = hashObj->length;
+
+ /* create an hmac for the library file */
+ hmaccx = HMAC_Create(hashObj, key->data, key->len, PR_TRUE);
+ if (hmaccx == NULL) {
+ return PR_FALSE;
+ }
+ HMAC_Begin(hmaccx);
+
+ while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) {
+ HMAC_Update(hmaccx, buf, bytesRead);
+ }
+ rv = HMAC_Finish(hmaccx, hash.data, &hash.len, hash.len);
+
+ HMAC_Destroy(hmaccx, PR_TRUE);
+
+ /* verify the hmac against the check file */
+ if (rv == SECSuccess) {
+ result = SECITEM_ItemsAreEqual(signature, &hash);
+ }
+ PORT_Memset(hashBuf, 0, sizeof hashBuf);
+ return result;
+}
+
+static PRBool
+blapi_SHVerifyFile(const char *shName, PRBool self, PRBool rerun)
+{
+ char *checkName = NULL;
+ PRFileDesc *checkFD = NULL;
+ PRFileDesc *shFD = NULL;
+ const SECHashObject *hashObj = NULL;
+ SECItem signature = { 0, NULL, 0 };
+ int bytesRead, offset, type;
+ SECStatus rv;
+ SECItem hmacKey = { 0, NULL, 0 };
+#ifdef FREEBL_USE_PRELINK
+ int pid = 0;
+#endif
+ PRBool result = PR_FALSE; /* if anything goes wrong,
+ * the signature does not verify */
+ NSSSignChkHeader header;
+#ifndef NSS_STRICT_INTEGRITY
+ DSAPublicKey key;
+
+ PORT_Memset(&key, 0, sizeof(key));
+#endif
+
+ /* If our integrity check was never ran or failed, fail any other
+ * integrity checks to prevent any token going into FIPS mode. */
+ if (!self && (BL_FIPSEntryOK(PR_FALSE, rerun) != SECSuccess)) {
+ return PR_FALSE;
+ }
+
+ if (!shName) {
+ goto loser;
+ }
+
+ /* figure out the name of our check file */
+ checkName = mkCheckFileName(shName);
+ if (!checkName) {
+ goto loser;
+ }
+
+ /* open the check File */
+ checkFD = PR_Open(checkName, PR_RDONLY, 0);
+ if (checkFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+ fprintf(stderr, "Failed to open the check file %s: (%d, %d)\n",
+ checkName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+ goto loser;
+ }
+
+ /* read and Verify the headerthe header */
+ bytesRead = PR_Read(checkFD, &header, sizeof(header));
+ if (bytesRead != sizeof(header)) {
+ goto loser;
+ }
+ if ((header.magic1 != NSS_SIGN_CHK_MAGIC1) ||
+ (header.magic2 != NSS_SIGN_CHK_MAGIC2)) {
+ goto loser;
+ }
+ /* we've bumped the version number so that newly signed .check
+ * files will fail nicely on old version of nss */
+ if (header.majorVersion > NSS_SIGN_CHK_MAJOR_VERSION) {
+ goto loser;
+ }
+ if (header.minorVersion < NSS_SIGN_CHK_MINOR_VERSION) {
+ goto loser;
+ }
+ type = decodeInt(header.type);
+
+ /* seek past any future header extensions */
+ offset = decodeInt(header.offset);
+ if (PR_Seek(checkFD, offset, PR_SEEK_SET) < 0) {
+ goto loser;
+ }
+
+ switch (type) {
+ case CKK_DSA:
+#ifdef NSS_STRICT_INTEGRITY
+ goto loser;
+#else
+ /* accept old dsa check files if NSS_STRICT_INTEGRITY is not set*/
+ /* read the key */
+ rv = readItem(checkFD, &key.params.prime);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.params.subPrime);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.params.base);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.publicValue);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ /* read the signature */
+ rv = readItem(checkFD, &signature);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ hashObj = HASH_GetRawHashObject(PQG_GetHashType(&key.params));
+ break;
+#endif
+ default:
+ if ((type & NSS_SIGN_CHK_TYPE_FLAGS) != NSS_SIGN_CHK_FLAG_HMAC) {
+ goto loser;
+ }
+ /* read the HMAC Key */
+ rv = readItem(checkFD, &hmacKey);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ /* read the siganture */
+ rv = readItem(checkFD, &signature);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ hashObj = HASH_GetRawHashObject(type & ~NSS_SIGN_CHK_TYPE_FLAGS);
+ }
+
+ /* done with the check file */
+ PR_Close(checkFD);
+ checkFD = NULL;
+
+ if (hashObj == NULL) {
+ goto loser;
+ }
+
+/* open our library file */
+#ifdef FREEBL_USE_PRELINK
+ shFD = bl_OpenUnPrelink(shName, &pid);
+#else
+ shFD = PR_Open(shName, PR_RDONLY, 0);
+#endif
+ if (shFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+ fprintf(stderr, "Failed to open the library file %s: (%d, %d)\n",
+ shName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+ goto loser;
+ }
+
+ switch (type) {
+ case CKK_DSA:
+#ifndef NSS_STRICT_INTEGRITY
+ result = blapi_SHVerifyDSACheck(shFD, hashObj, &key, &signature);
+#endif
+ break;
+ default:
+ if ((type & NSS_SIGN_CHK_TYPE_FLAGS) != NSS_SIGN_CHK_FLAG_HMAC) {
+ break;
+ }
+ result = blapi_SHVerifyHMACCheck(shFD, hashObj, &hmacKey, &signature);
+ break;
+ }
+
+#ifdef FREEBL_USE_PRELINK
+ bl_CloseUnPrelink(shFD, pid);
+#else
+ PR_Close(shFD);
+#endif
+ shFD = NULL;
+
+loser:
+ PORT_Memset(&header, 0, sizeof header);
+ if (checkName != NULL) {
+ PORT_Free(checkName);
+ }
+ if (checkFD != NULL) {
+ PR_Close(checkFD);
+ }
+ if (shFD != NULL) {
+ PR_Close(shFD);
+ }
+ if (hmacKey.data != NULL) {
+ SECITEM_ZfreeItem(&hmacKey, PR_FALSE);
+ }
+ if (signature.data != NULL) {
+ SECITEM_ZfreeItem(&signature, PR_FALSE);
+ }
+#ifndef NSS_STRICT_INTEGRITY
+ if (key.params.prime.data != NULL) {
+ SECITEM_ZfreeItem(&key.params.prime, PR_FALSE);
+ }
+ if (key.params.subPrime.data != NULL) {
+ SECITEM_ZfreeItem(&key.params.subPrime, PR_FALSE);
+ }
+ if (key.params.base.data != NULL) {
+ SECITEM_ZfreeItem(&key.params.base, PR_FALSE);
+ }
+ if (key.publicValue.data != NULL) {
+ SECITEM_ZfreeItem(&key.publicValue, PR_FALSE);
+ }
+#endif
+ return result;
+}
+
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+ if (name == NULL) {
+ /*
+ * If name is NULL, freebl is statically linked into softoken.
+ * softoken will call BLAPI_SHVerify next to verify itself.
+ */
+ return PR_TRUE;
+ }
+ return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE, PR_FALSE);
+}
+
+#else /* NSS_FIPS_DISABLED */
+
+PRBool
+BLAPI_SHVerifyFile(const char *shName)
+{
+ return PR_FALSE;
+}
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+ return PR_FALSE;
+}
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+ return PR_FALSE;
+}
+
+#endif /* NSS_FIPS_DISABLED */
diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c
new file mode 100644
index 0000000000..a79cf69a23
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.c
@@ -0,0 +1,879 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+#define _GNU_SOURCE 1
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <dlfcn.h>
+#include <prio.h>
+#include <prlink.h>
+#include <prlog.h>
+#include <prthread.h>
+#include <plstr.h>
+#include <prinit.h>
+#include <prlock.h>
+#include <prmem.h>
+#include <prerror.h>
+#include <prmon.h>
+#include <pratom.h>
+#include <prsystem.h>
+#include <prinrval.h>
+#include <prtime.h>
+#include <prcvar.h>
+#include <secasn1.h>
+#include <secdig.h>
+#include <secport.h>
+#include <secitem.h>
+#include <blapi.h>
+#include <assert.h>
+#include <private/pprio.h>
+
+/* Android API < 21 doesn't define RTLD_NOLOAD */
+#ifndef RTLD_NOLOAD
+#define RTLD_NOLOAD 0
+#endif
+
+#define FREEBL_NO_WEAK 1
+
+#define WEAK __attribute__((weak))
+
+#ifdef FREEBL_NO_WEAK
+
+/*
+ * This uses function pointers.
+ *
+ * CONS: A separate function is needed to
+ * fill in the function pointers.
+ *
+ * PROS: it works on all platforms.
+ * it allows for dynamically finding nspr and libutil, even once
+ * softoken is loaded and running. (NOTE: this may be a problem if
+ * we switch between the stubs and real NSPR on the fly. NSPR will
+ * do bad things if passed an _FakeArena to free or allocate from).
+ */
+#define STUB_DECLARE(ret, fn, args) \
+ typedef ret(*type_##fn) args; \
+ static type_##fn ptr_##fn = NULL
+
+#define STUB_SAFE_CALL0(fn) \
+ if (ptr_##fn) { \
+ return ptr_##fn(); \
+ }
+#define STUB_SAFE_CALL1(fn, a1) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1); \
+ }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2); \
+ }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3); \
+ }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3, a4); \
+ }
+#define STUB_SAFE_CALL5(fn, a1, a2, a3, a4, a5) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3, a4, a5); \
+ }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3, a4, a5, a6); \
+ }
+
+#define STUB_FETCH_FUNCTION(fn) \
+ ptr_##fn = (type_##fn)dlsym(lib, #fn); \
+ if (ptr_##fn == NULL) { \
+ return SECFailure; \
+ }
+
+#else
+/*
+ * this uses the loader weak attribute. it works automatically, but once
+ * freebl is loaded, the symbols are 'fixed' (later loading of NSPR or
+ * libutil will not resolve these symbols).
+ */
+
+#define STUB_DECLARE(ret, fn, args) \
+ WEAK extern ret fn args
+
+#define STUB_SAFE_CALL0(fn) \
+ if (fn) { \
+ return fn(); \
+ }
+#define STUB_SAFE_CALL1(fn, a1) \
+ if (fn) { \
+ return fn(a1); \
+ }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+ if (fn) { \
+ return fn(a1, a2); \
+ }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+ if (fn) { \
+ return fn(a1, a2, a3); \
+ }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+ if (fn) { \
+ return fn(a1, a2, a3, a4); \
+ }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+ if (fn) { \
+ return fn(a1, a2, a3, a4, a5, a6); \
+ }
+#endif
+
+STUB_DECLARE(void *, PORT_Alloc_Util, (size_t len));
+STUB_DECLARE(void *, PORT_ArenaAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void *, PORT_ArenaZAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void, PORT_Free_Util, (void *ptr));
+STUB_DECLARE(void, PORT_FreeArena_Util, (PLArenaPool * arena, PRBool zero));
+STUB_DECLARE(int, PORT_GetError_Util, (void));
+STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize));
+STUB_DECLARE(void, PORT_SetError_Util, (int value));
+STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len));
+STUB_DECLARE(void *, PORT_ZAllocAligned_Util, (size_t bytes, size_t alignment, void **mem));
+STUB_DECLARE(void *, PORT_ZAllocAlignedOffset_Util, (size_t bytes, size_t alignment, size_t offset));
+STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len));
+
+STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln));
+STUB_DECLARE(PRStatus, PR_Access, (const char *name, PRAccessHow how));
+STUB_DECLARE(PRStatus, PR_CallOnce, (PRCallOnceType * once, PRCallOnceFN func));
+STUB_DECLARE(PRStatus, PR_Close, (PRFileDesc * fd));
+STUB_DECLARE(void, PR_DestroyLock, (PRLock * lock));
+STUB_DECLARE(void, PR_DestroyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(void, PR_Free, (void *ptr));
+STUB_DECLARE(char *, PR_GetLibraryFilePathname, (const char *name, PRFuncPtr addr));
+STUB_DECLARE(PRFileDesc *, PR_ImportPipe, (PROsfd osfd));
+STUB_DECLARE(void, PR_Lock, (PRLock * lock));
+STUB_DECLARE(PRCondVar *, PR_NewCondVar, (PRLock * lock));
+STUB_DECLARE(PRLock *, PR_NewLock, (void));
+STUB_DECLARE(PRStatus, PR_NotifyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRStatus, PR_NotifyAllCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRFileDesc *, PR_Open, (const char *name, PRIntn flags, PRIntn mode));
+STUB_DECLARE(PRInt32, PR_Read, (PRFileDesc * fd, void *buf, PRInt32 amount));
+STUB_DECLARE(PROffset32, PR_Seek, (PRFileDesc * fd, PROffset32 offset, PRSeekWhence whence));
+STUB_DECLARE(PRStatus, PR_Sleep, (PRIntervalTime ticks));
+STUB_DECLARE(PRStatus, PR_Unlock, (PRLock * lock));
+STUB_DECLARE(PRStatus, PR_WaitCondVar, (PRCondVar * cvar, PRIntervalTime timeout));
+STUB_DECLARE(char *, PR_GetEnvSecure, (const char *));
+
+STUB_DECLARE(SECItem *, SECITEM_AllocItem_Util, (PLArenaPool * arena, SECItem *item, unsigned int len));
+STUB_DECLARE(SECComparison, SECITEM_CompareItem_Util, (const SECItem *a, const SECItem *b));
+STUB_DECLARE(PRBool, SECITEM_ItemsAreEqual_Util, (const SECItem *a, const SECItem *b));
+STUB_DECLARE(SECStatus, SECITEM_CopyItem_Util, (PLArenaPool * arena, SECItem *to, const SECItem *from));
+STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid));
+STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n));
+STUB_DECLARE(unsigned int, NSS_SecureMemcmpZero, (const void *mem, size_t n));
+STUB_DECLARE(void, NSS_SecureSelect, (void *dest, const void *src0, const void *src1, size_t n, unsigned char b));
+#ifndef NSS_FIPS_DISABLED
+STUB_DECLARE(PRBool, NSS_GetSystemFIPSEnabled, (void));
+#endif
+
+#define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type))
+#define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type))
+#define PORT_ZNewArray_stub(type, num) \
+ (type *)PORT_ZAlloc_stub(sizeof(type) * (num))
+#define PORT_ZNewAligned_stub(type, alignment, mem) \
+ (type *)PORT_ZAllocAlignedOffset_stub(sizeof(type), alignment, offsetof(type, mem))
+
+/*
+ * NOTE: in order to support hashing only the memory allocation stubs,
+ * the get library name stubs, and the file io stubs are needed (the latter
+ * two are for the library verification). The remaining stubs are simply to
+ * compile. Attempts to use the library for other operations without NSPR
+ * will most likely fail.
+ */
+
+/* memory */
+extern void *
+PORT_Alloc_stub(size_t len)
+{
+ STUB_SAFE_CALL1(PORT_Alloc_Util, len);
+ return malloc(len);
+}
+
+extern void
+PORT_Free_stub(void *ptr)
+{
+ STUB_SAFE_CALL1(PORT_Free_Util, ptr);
+ return free(ptr);
+}
+
+extern void *
+PORT_ZAlloc_stub(size_t len)
+{
+ STUB_SAFE_CALL1(PORT_ZAlloc_Util, len);
+ void *ptr = malloc(len);
+ if (ptr) {
+ memset(ptr, 0, len);
+ }
+ return ptr;
+}
+
+/* aligned_alloc is C11. This is an alternative to get aligned memory. */
+extern void *
+PORT_ZAllocAligned_stub(size_t bytes, size_t alignment, void **mem)
+{
+ STUB_SAFE_CALL3(PORT_ZAllocAligned_Util, bytes, alignment, mem);
+
+ /* This only works if alignement is a power of 2. */
+ if ((alignment == 0) || (alignment & (alignment - 1))) {
+ return NULL;
+ }
+
+ size_t x = alignment - 1;
+ size_t len = (bytes ? bytes : 1) + x;
+
+ if (!mem) {
+ return NULL;
+ }
+
+ /* Always allocate a non-zero amount of bytes */
+ *mem = malloc(len);
+ if (!*mem) {
+ return NULL;
+ }
+
+ memset(*mem, 0, len);
+
+ /* We're pretty sure this is non-zero, but let's assure scan-build too. */
+ void *ret = (void *)(((uintptr_t)*mem + x) & ~(uintptr_t)x);
+ assert(ret);
+
+ return ret;
+}
+
+extern void *
+PORT_ZAllocAlignedOffset_stub(size_t size, size_t alignment, size_t offset)
+{
+ STUB_SAFE_CALL3(PORT_ZAllocAlignedOffset_Util, size, alignment, offset);
+ if (offset > size) {
+ return NULL;
+ }
+
+ void *mem = NULL;
+ void *v = PORT_ZAllocAligned_stub(size, alignment, &mem);
+ if (!v) {
+ return NULL;
+ }
+
+ *((void **)((uintptr_t)v + offset)) = mem;
+ return v;
+}
+
+extern void
+PORT_ZFree_stub(void *ptr, size_t len)
+{
+ STUB_SAFE_CALL2(PORT_ZFree_Util, ptr, len);
+ memset(ptr, 0, len);
+ return free(ptr);
+}
+
+extern void
+PR_Free_stub(void *ptr)
+{
+ STUB_SAFE_CALL1(PR_Free, ptr);
+ return free(ptr);
+}
+
+/* we have defensive returns after abort(), which is marked noreturn on some
+ * platforms, making the compiler legitimately complain. */
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code-return"
+#endif
+
+/*
+ * arenas
+ *
+ */
+extern PLArenaPool *
+PORT_NewArena_stub(unsigned long chunksize)
+{
+ STUB_SAFE_CALL1(PORT_NewArena_Util, chunksize);
+ abort();
+ return NULL;
+}
+
+extern void *
+PORT_ArenaAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+ STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+ abort();
+ return NULL;
+}
+
+extern void *
+PORT_ArenaZAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+ STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+ abort();
+ return NULL;
+}
+
+extern void
+PORT_FreeArena_stub(PLArenaPool *arena, PRBool zero)
+{
+
+ STUB_SAFE_CALL2(PORT_FreeArena_Util, arena, zero);
+ abort();
+}
+
+/* io */
+extern PRFileDesc *
+PR_Open_stub(const char *name, PRIntn flags, PRIntn mode)
+{
+ int *lfd = NULL;
+ int fd;
+ int lflags = 0;
+
+ STUB_SAFE_CALL3(PR_Open, name, flags, mode);
+
+ if (flags & PR_RDWR) {
+ lflags = O_RDWR;
+ } else if (flags & PR_WRONLY) {
+ lflags = O_WRONLY;
+ } else {
+ lflags = O_RDONLY;
+ }
+
+ if (flags & PR_EXCL)
+ lflags |= O_EXCL;
+ if (flags & PR_APPEND)
+ lflags |= O_APPEND;
+ if (flags & PR_TRUNCATE)
+ lflags |= O_TRUNC;
+
+ fd = open(name, lflags, mode);
+ if (fd >= 0) {
+ lfd = PORT_New_stub(int);
+ if (lfd != NULL) {
+ *lfd = fd;
+ } else {
+ close(fd);
+ }
+ }
+ return (PRFileDesc *)lfd;
+}
+
+extern PRFileDesc *
+PR_ImportPipe_stub(PROsfd fd)
+{
+ int *lfd = NULL;
+
+ STUB_SAFE_CALL1(PR_ImportPipe, fd);
+
+ lfd = PORT_New_stub(int);
+ if (lfd != NULL) {
+ *lfd = fd;
+ }
+ return (PRFileDesc *)lfd;
+}
+
+extern PRStatus
+PR_Close_stub(PRFileDesc *fd)
+{
+ int *lfd;
+ STUB_SAFE_CALL1(PR_Close, fd);
+
+ lfd = (int *)fd;
+ close(*lfd);
+ PORT_Free_stub(lfd);
+
+ return PR_SUCCESS;
+}
+
+extern PRInt32
+PR_Read_stub(PRFileDesc *fd, void *buf, PRInt32 amount)
+{
+ int *lfd;
+ STUB_SAFE_CALL3(PR_Read, fd, buf, amount);
+
+ lfd = (int *)fd;
+ return read(*lfd, buf, amount);
+}
+
+extern PROffset32
+PR_Seek_stub(PRFileDesc *fd, PROffset32 offset, PRSeekWhence whence)
+{
+ int *lfd;
+ int lwhence = SEEK_SET;
+ STUB_SAFE_CALL3(PR_Seek, fd, offset, whence);
+ lfd = (int *)fd;
+ switch (whence) {
+ case PR_SEEK_CUR:
+ lwhence = SEEK_CUR;
+ break;
+ case PR_SEEK_END:
+ lwhence = SEEK_END;
+ break;
+ case PR_SEEK_SET:
+ break;
+ }
+
+ return lseek(*lfd, offset, lwhence);
+}
+
+PRStatus
+PR_Access_stub(const char *name, PRAccessHow how)
+{
+ int mode = F_OK;
+ int rv;
+ STUB_SAFE_CALL2(PR_Access, name, how);
+ switch (how) {
+ case PR_ACCESS_WRITE_OK:
+ mode = W_OK;
+ break;
+ case PR_ACCESS_READ_OK:
+ mode = R_OK;
+ break;
+ /* assume F_OK for all others */
+ default:
+ break;
+ }
+ rv = access(name, mode);
+ if (rv == 0) {
+ return PR_SUCCESS;
+ }
+ return PR_FAILURE;
+}
+
+/*
+ * library
+ */
+extern char *
+PR_GetLibraryFilePathname_stub(const char *name, PRFuncPtr addr)
+{
+ Dl_info dli;
+ char *result;
+
+ STUB_SAFE_CALL2(PR_GetLibraryFilePathname, name, addr);
+
+ if (dladdr((void *)addr, &dli) == 0) {
+ return NULL;
+ }
+ result = PORT_Alloc_stub(strlen(dli.dli_fname) + 1);
+ if (result != NULL) {
+ strcpy(result, dli.dli_fname);
+ }
+ return result;
+}
+
+#include <errno.h>
+
+/* errors */
+extern int
+PORT_GetError_stub(void)
+{
+ STUB_SAFE_CALL0(PORT_GetError_Util);
+ return errno;
+}
+
+extern void
+PORT_SetError_stub(int value)
+{
+ STUB_SAFE_CALL1(PORT_SetError_Util, value);
+ errno = value;
+}
+
+/* misc */
+extern void
+PR_Assert_stub(const char *s, const char *file, PRIntn ln)
+{
+ STUB_SAFE_CALL3(PR_Assert, s, file, ln);
+ fprintf(stderr, "%s line %d: %s\n", file, ln, s);
+ abort();
+}
+
+/* time */
+extern PRStatus
+PR_Sleep_stub(PRIntervalTime ticks)
+{
+ STUB_SAFE_CALL1(PR_Sleep, ticks);
+ usleep(ticks * 1000);
+ return PR_SUCCESS;
+}
+
+/* locking */
+extern PRLock *
+PR_NewLock_stub(void)
+{
+ STUB_SAFE_CALL0(PR_NewLock);
+ abort();
+ return NULL;
+}
+
+extern PRStatus
+PR_Unlock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_Unlock, lock);
+ abort();
+ return PR_FAILURE;
+}
+
+extern void
+PR_Lock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_Lock, lock);
+ abort();
+ return;
+}
+
+extern void
+PR_DestroyLock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_DestroyLock, lock);
+ abort();
+ return;
+}
+
+extern PRCondVar *
+PR_NewCondVar_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_NewCondVar, lock);
+ abort();
+ return NULL;
+}
+
+extern PRStatus
+PR_NotifyCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_NotifyCondVar, cvar);
+ abort();
+ return PR_FAILURE;
+}
+
+extern PRStatus
+PR_NotifyAllCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_NotifyAllCondVar, cvar);
+ abort();
+ return PR_FAILURE;
+}
+
+extern PRStatus
+PR_WaitCondVar_stub(PRCondVar *cvar, PRIntervalTime timeout)
+{
+ STUB_SAFE_CALL2(PR_WaitCondVar, cvar, timeout);
+ abort();
+ return PR_FAILURE;
+}
+
+extern char *
+PR_GetEnvSecure_stub(const char *var)
+{
+ STUB_SAFE_CALL1(PR_GetEnvSecure, var);
+#ifdef __USE_GNU
+ return secure_getenv(var);
+#else
+ return getenv(var);
+#endif
+}
+
+extern void
+PR_DestroyCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_DestroyCondVar, cvar);
+ abort();
+ return;
+}
+
+/*
+ * NOTE: this presupposes GCC 4.1
+ */
+extern PRStatus
+PR_CallOnce_stub(PRCallOnceType *once, PRCallOnceFN func)
+{
+ STUB_SAFE_CALL2(PR_CallOnce, once, func);
+ abort();
+ return PR_FAILURE;
+}
+
+/*
+ * SECITEMS implement Item Utilities
+ */
+extern void
+SECITEM_FreeItem_stub(SECItem *zap, PRBool freeit)
+{
+ STUB_SAFE_CALL2(SECITEM_FreeItem_Util, zap, freeit);
+ abort();
+}
+
+extern SECItem *
+SECITEM_AllocItem_stub(PLArenaPool *arena, SECItem *item, unsigned int len)
+{
+ STUB_SAFE_CALL3(SECITEM_AllocItem_Util, arena, item, len);
+ abort();
+ return NULL;
+}
+
+extern SECComparison
+SECITEM_CompareItem_stub(const SECItem *a, const SECItem *b)
+{
+ STUB_SAFE_CALL2(SECITEM_CompareItem_Util, a, b);
+ abort();
+ return SECEqual;
+}
+
+extern PRBool
+SECITEM_ItemsAreEqual_stub(const SECItem *a, const SECItem *b)
+{
+ STUB_SAFE_CALL2(SECITEM_ItemsAreEqual_Util, a, b);
+ /* two nulls are equal */
+ if (!a && !b) {
+ return PR_TRUE;
+ }
+ /* only one NULL is not equal */
+ if (!a || !b) {
+ return PR_FALSE;
+ }
+ /* we know both secitems have been set, now make sure the lengths
+ * are equal */
+ if (a->len != b->len) {
+ return PR_FALSE;
+ }
+ /* lengths are equal, safe to verify the data */
+ if (PORT_Memcmp(a->data, b->data, b->len) != 0) {
+ return PR_FALSE;
+ }
+ return PR_TRUE;
+}
+
+extern SECStatus
+SECITEM_CopyItem_stub(PLArenaPool *arena, SECItem *to, const SECItem *from)
+{
+ STUB_SAFE_CALL3(SECITEM_CopyItem_Util, arena, to, from);
+ abort();
+ return SECFailure;
+}
+
+extern SECOidTag
+SECOID_FindOIDTag_stub(const SECItem *oid)
+{
+ STUB_SAFE_CALL1(SECOID_FindOIDTag_Util, oid);
+ abort();
+ return SEC_OID_UNKNOWN;
+}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+extern void
+SECITEM_ZfreeItem_stub(SECItem *zap, PRBool freeit)
+{
+ STUB_SAFE_CALL2(SECITEM_ZfreeItem_Util, zap, freeit);
+ if (zap) {
+ if (zap->data) {
+ PORT_Memset(zap->data, 0, zap->len);
+ PORT_Free_stub(zap->data);
+ }
+ PORT_Memset(zap, 0, sizeof(SECItem));
+ if (freeit) {
+ PORT_Free_stub(zap);
+ }
+ }
+}
+
+extern int
+NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n)
+{
+ STUB_SAFE_CALL3(NSS_SecureMemcmp, a, b, n);
+ abort();
+}
+
+extern unsigned int
+NSS_SecureMemcmpZero_stub(const void *mem, size_t n)
+{
+ STUB_SAFE_CALL2(NSS_SecureMemcmpZero, mem, n);
+ abort();
+}
+
+extern void
+NSS_SecureSelect_stub(void *dest, const void *src0, const void *src1, size_t n, unsigned char b)
+{
+ STUB_SAFE_CALL5(NSS_SecureSelect, dest, src0, src1, n, b);
+ abort();
+}
+
+#ifndef NSS_FIPS_DISABLED
+PRBool
+NSS_GetSystemFIPSEnabled_stub(void)
+{
+ STUB_SAFE_CALL0(NSS_GetSystemFIPSEnabled);
+ const char *env;
+
+ /* The environment variable is active for all platforms */
+ env = PR_GetEnvSecure_stub("NSS_FIPS");
+ /* we generally accept y, Y, 1, FIPS, TRUE, and ON as turning on FIPS
+ * mode. Anything else is considered 'off' */
+ if (env && (*env == 'y' || *env == '1' || *env == 'Y' ||
+ (strcasecmp(env, "fips") == 0) ||
+ (strcasecmp(env, "true") == 0) ||
+ (strcasecmp(env, "on") == 0))) {
+ return PR_TRUE;
+ }
+
+/* currently only Linux has a system FIPS indicator. Add others here
+ * as they become available/known */
+#ifdef LINUX
+ {
+ FILE *f;
+ char d;
+ size_t size;
+ f = fopen("/proc/sys/crypto/fips_enabled", "r");
+ if (!f)
+ return PR_FALSE;
+
+ size = fread(&d, 1, 1, f);
+ fclose(f);
+ if (size != 1)
+ return PR_FALSE;
+ if (d == '1')
+ return PR_TRUE;
+ }
+#endif /* LINUX */
+ return PR_FALSE;
+}
+#endif /* NSS_FIPS_DISABLED = 0 */
+
+#ifdef FREEBL_NO_WEAK
+
+static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX;
+static const char *nssutilLibName = SHLIB_PREFIX "nssutil3." SHLIB_SUFFIX;
+
+static SECStatus
+freebl_InitNSPR(void *lib)
+{
+ STUB_FETCH_FUNCTION(PR_Free);
+ STUB_FETCH_FUNCTION(PR_Open);
+ STUB_FETCH_FUNCTION(PR_ImportPipe);
+ STUB_FETCH_FUNCTION(PR_Close);
+ STUB_FETCH_FUNCTION(PR_Read);
+ STUB_FETCH_FUNCTION(PR_Seek);
+ STUB_FETCH_FUNCTION(PR_GetLibraryFilePathname);
+ STUB_FETCH_FUNCTION(PR_Assert);
+ STUB_FETCH_FUNCTION(PR_Access);
+ STUB_FETCH_FUNCTION(PR_Sleep);
+ STUB_FETCH_FUNCTION(PR_CallOnce);
+ STUB_FETCH_FUNCTION(PR_NewCondVar);
+ STUB_FETCH_FUNCTION(PR_NotifyCondVar);
+ STUB_FETCH_FUNCTION(PR_NotifyAllCondVar);
+ STUB_FETCH_FUNCTION(PR_WaitCondVar);
+ STUB_FETCH_FUNCTION(PR_DestroyCondVar);
+ STUB_FETCH_FUNCTION(PR_NewLock);
+ STUB_FETCH_FUNCTION(PR_Unlock);
+ STUB_FETCH_FUNCTION(PR_Lock);
+ STUB_FETCH_FUNCTION(PR_DestroyLock);
+ STUB_FETCH_FUNCTION(PR_GetEnvSecure);
+ return SECSuccess;
+}
+
+static SECStatus
+freebl_InitNSSUtil(void *lib)
+{
+ STUB_FETCH_FUNCTION(PORT_Alloc_Util);
+ STUB_FETCH_FUNCTION(PORT_Free_Util);
+ STUB_FETCH_FUNCTION(PORT_ZAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_ZFree_Util);
+ STUB_FETCH_FUNCTION(PORT_NewArena_Util);
+ STUB_FETCH_FUNCTION(PORT_ArenaAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_ArenaZAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_FreeArena_Util);
+ STUB_FETCH_FUNCTION(PORT_GetError_Util);
+ STUB_FETCH_FUNCTION(PORT_SetError_Util);
+ STUB_FETCH_FUNCTION(SECITEM_FreeItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_AllocItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_CompareItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_CopyItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util);
+ STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util);
+ STUB_FETCH_FUNCTION(NSS_SecureMemcmp);
+ STUB_FETCH_FUNCTION(NSS_SecureMemcmpZero);
+ STUB_FETCH_FUNCTION(NSS_SecureSelect);
+ return SECSuccess;
+}
+
+/*
+ * fetch the library if it's loaded. For NSS it should already be loaded
+ */
+#define freebl_getLibrary(libName) \
+ dlopen(libName, RTLD_LAZY | RTLD_NOLOAD)
+
+#define freebl_releaseLibrary(lib) \
+ if (lib) \
+ dlclose(lib)
+
+static void *FREEBLnsprGlobalLib = NULL;
+static void *FREEBLnssutilGlobalLib = NULL;
+
+void __attribute((destructor)) FREEBL_unload()
+{
+ freebl_releaseLibrary(FREEBLnsprGlobalLib);
+ freebl_releaseLibrary(FREEBLnssutilGlobalLib);
+}
+#endif
+
+/*
+ * load the symbols from the real libraries if available.
+ *
+ * if force is set, explicitly load the libraries if they are not already
+ * loaded. If we could not use the real libraries, return failure.
+ */
+extern SECStatus
+FREEBL_InitStubs()
+{
+ SECStatus rv = SECSuccess;
+#ifdef FREEBL_NO_WEAK
+ void *nspr = NULL;
+ void *nssutil = NULL;
+
+ /* NSPR should be first */
+ if (!FREEBLnsprGlobalLib) {
+ nspr = freebl_getLibrary(nsprLibName);
+ if (!nspr) {
+ return SECFailure;
+ }
+ rv = freebl_InitNSPR(nspr);
+ if (rv != SECSuccess) {
+ freebl_releaseLibrary(nspr);
+ return rv;
+ }
+ FREEBLnsprGlobalLib = nspr; /* adopt */
+ }
+ /* now load NSSUTIL */
+ if (!FREEBLnssutilGlobalLib) {
+ nssutil = freebl_getLibrary(nssutilLibName);
+ if (!nssutil) {
+ return SECFailure;
+ }
+ rv = freebl_InitNSSUtil(nssutil);
+ if (rv != SECSuccess) {
+ freebl_releaseLibrary(nssutil);
+ return rv;
+ }
+ FREEBLnssutilGlobalLib = nssutil; /* adopt */
+ }
+#endif
+
+ return rv;
+}
diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h
new file mode 100644
index 0000000000..58cb9d085c
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.h
@@ -0,0 +1,72 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+
+#ifndef _STUBS_H
+#define _STUBS_H_ 1
+
+#ifdef _LIBUTIL_H_
+/* must be included before util */
+/*#error stubs.h included too late */
+#define MP_DIGITES(x) "stubs included too late"
+#endif
+
+/* hide libutil rename */
+#define _LIBUTIL_H_ 1
+
+#define PORT_Alloc PORT_Alloc_stub
+#define PORT_ArenaAlloc PORT_ArenaAlloc_stub
+#define PORT_ArenaZAlloc PORT_ArenaZAlloc_stub
+#define PORT_Free PORT_Free_stub
+#define PORT_FreeArena PORT_FreeArena_stub
+#define PORT_GetError PORT_GetError_stub
+#define PORT_NewArena PORT_NewArena_stub
+#define PORT_SetError PORT_SetError_stub
+#define PORT_ZAlloc PORT_ZAlloc_stub
+#define PORT_ZFree PORT_ZFree_stub
+#define PORT_ZAllocAligned PORT_ZAllocAligned_stub
+#define PORT_ZAllocAlignedOffset PORT_ZAllocAlignedOffset_stub
+
+#define SECITEM_AllocItem SECITEM_AllocItem_stub
+#define SECITEM_CompareItem SECITEM_CompareItem_stub
+#define SECITEM_ItemsAreEqual SECITEM_ItemsAreEqual_stub
+#define SECITEM_CopyItem SECITEM_CopyItem_stub
+#define SECITEM_FreeItem SECITEM_FreeItem_stub
+#define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub
+#define SECOID_FindOIDTag SECOID_FindOIDTag_stub
+#define NSS_SecureMemcmp NSS_SecureMemcmp_stub
+#define NSS_SecureMemcmpZero NSS_SecureMemcmpZero_stub
+#define NSS_SecureSelect NSS_SecureSelect_stub
+#define NSS_GetSystemFIPSEnabled NSS_GetSystemFIPSEnabled_stub
+
+#define PR_Assert PR_Assert_stub
+#define PR_Access PR_Access_stub
+#define PR_CallOnce PR_CallOnce_stub
+#define PR_Close PR_Close_stub
+#define PR_DestroyCondVar PR_DestroyCondVar_stub
+#define PR_DestroyLock PR_DestroyLock_stub
+#define PR_Free PR_Free_stub
+#define PR_GetLibraryFilePathname PR_GetLibraryFilePathname_stub
+#define PR_ImportPipe PR_ImportPipe_stub
+#define PR_Lock PR_Lock_stub
+#define PR_NewCondVar PR_NewCondVar_stub
+#define PR_NewLock PR_NewLock_stub
+#define PR_NotifyCondVar PR_NotifyCondVar_stub
+#define PR_NotifyAllCondVar PR_NotifyAllCondVar_stub
+#define PR_Open PR_Open_stub
+#define PR_Read PR_Read_stub
+#define PR_Seek PR_Seek_stub
+#define PR_Sleep PR_Sleep_stub
+#define PR_Unlock PR_Unlock_stub
+#define PR_WaitCondVar PR_WaitCondVar_stub
+#define PR_GetEnvSecure PR_GetEnvSecure_stub
+
+extern int FREEBL_InitStubs(void);
+
+#endif
diff --git a/security/nss/lib/freebl/sysrand.c b/security/nss/lib/freebl/sysrand.c
new file mode 100644
index 0000000000..814dd6e515
--- /dev/null
+++ b/security/nss/lib/freebl/sysrand.c
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "seccomon.h"
+
+#if defined(XP_UNIX) && defined(SEED_ONLY_DEV_URANDOM)
+#include "unix_urandom.c"
+#elif defined(XP_UNIX)
+#include "unix_rand.c"
+#endif
+#ifdef XP_WIN
+#include "win_rand.c"
+#endif
diff --git a/security/nss/lib/freebl/tlsprfalg.c b/security/nss/lib/freebl/tlsprfalg.c
new file mode 100644
index 0000000000..1e5e67886c
--- /dev/null
+++ b/security/nss/lib/freebl/tlsprfalg.c
@@ -0,0 +1,134 @@
+/* tlsprfalg.c - TLS Pseudo Random Function (PRF) implementation
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "hasht.h"
+#include "alghmac.h"
+
+#define PHASH_STATE_MAX_LEN HASH_LENGTH_MAX
+
+/* TLS P_hash function */
+SECStatus
+TLS_P_hash(HASH_HashType hashType, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ unsigned char state[PHASH_STATE_MAX_LEN];
+ unsigned char outbuf[PHASH_STATE_MAX_LEN];
+ unsigned int state_len = 0, label_len = 0, outbuf_len = 0, chunk_size;
+ unsigned int remaining;
+ unsigned char *res;
+ SECStatus status;
+ HMACContext *cx;
+ SECStatus rv = SECFailure;
+ const SECHashObject *hashObj = HASH_GetRawHashObject(hashType);
+
+ PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+ PORT_Assert((seed != NULL) && (seed->data != NULL));
+ PORT_Assert((result != NULL) && (result->data != NULL));
+
+ remaining = result->len;
+ res = result->data;
+
+ if (label != NULL)
+ label_len = PORT_Strlen(label);
+
+ cx = HMAC_Create(hashObj, secret->data, secret->len, isFIPS);
+ if (cx == NULL)
+ goto loser;
+
+ /* initialize the state = A(1) = HMAC_hash(secret, seed) */
+ HMAC_Begin(cx);
+ HMAC_Update(cx, (unsigned char *)label, label_len);
+ HMAC_Update(cx, seed->data, seed->len);
+ status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+ if (status != SECSuccess)
+ goto loser;
+
+ /* generate a block at a time until we're done */
+ while (remaining > 0) {
+
+ HMAC_Begin(cx);
+ HMAC_Update(cx, state, state_len);
+ if (label_len)
+ HMAC_Update(cx, (unsigned char *)label, label_len);
+ HMAC_Update(cx, seed->data, seed->len);
+ status = HMAC_Finish(cx, outbuf, &outbuf_len, sizeof(outbuf));
+ if (status != SECSuccess)
+ goto loser;
+
+ /* Update the state = A(i) = HMAC_hash(secret, A(i-1)) */
+ HMAC_Begin(cx);
+ HMAC_Update(cx, state, state_len);
+ status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+ if (status != SECSuccess)
+ goto loser;
+
+ chunk_size = PR_MIN(outbuf_len, remaining);
+ PORT_Memcpy(res, &outbuf, chunk_size);
+ res += chunk_size;
+ remaining -= chunk_size;
+ }
+
+ rv = SECSuccess;
+
+loser:
+ /* clear out state so it's not left on the stack */
+ if (cx)
+ HMAC_Destroy(cx, PR_TRUE);
+ PORT_Memset(state, 0, sizeof(state));
+ PORT_Memset(outbuf, 0, sizeof(outbuf));
+ return rv;
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+ SECItem *result, PRBool isFIPS)
+{
+ SECStatus rv = SECFailure, status;
+ unsigned int i;
+ SECItem tmp = { siBuffer, NULL, 0 };
+ SECItem S1;
+ SECItem S2;
+
+ PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+ PORT_Assert((seed != NULL) && (seed->data != NULL));
+ PORT_Assert((result != NULL) && (result->data != NULL));
+
+ S1.type = siBuffer;
+ S1.len = (secret->len / 2) + (secret->len & 1);
+ S1.data = secret->data;
+
+ S2.type = siBuffer;
+ S2.len = S1.len;
+ S2.data = secret->data + (secret->len - S2.len);
+
+ tmp.data = (unsigned char *)PORT_Alloc(result->len);
+ if (tmp.data == NULL)
+ goto loser;
+ tmp.len = result->len;
+
+ status = TLS_P_hash(HASH_AlgMD5, &S1, label, seed, result, isFIPS);
+ if (status != SECSuccess)
+ goto loser;
+
+ status = TLS_P_hash(HASH_AlgSHA1, &S2, label, seed, &tmp, isFIPS);
+ if (status != SECSuccess)
+ goto loser;
+
+ for (i = 0; i < result->len; i++)
+ result->data[i] ^= tmp.data[i];
+
+ rv = SECSuccess;
+
+loser:
+ if (tmp.data != NULL)
+ PORT_ZFree(tmp.data, tmp.len);
+ return rv;
+}
diff --git a/security/nss/lib/freebl/unix_rand.c b/security/nss/lib/freebl/unix_rand.c
new file mode 100644
index 0000000000..e46bac6233
--- /dev/null
+++ b/security/nss/lib/freebl/unix_rand.c
@@ -0,0 +1,811 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include "secrng.h"
+#include "secerr.h"
+#include "prerror.h"
+#include "prthread.h"
+#include "prprf.h"
+#include "prenv.h"
+
+size_t RNG_FileUpdate(const char *fileName, size_t limit);
+
+/*
+ * When copying data to the buffer we want the least signicant bytes
+ * from the input since those bits are changing the fastest. The address
+ * of least significant byte depends upon whether we are running on
+ * a big-endian or little-endian machine.
+ *
+ * Does this mean the least signicant bytes are the most significant
+ * to us? :-)
+ */
+
+static size_t
+CopyLowBits(void *dst, size_t dstlen, void *src, size_t srclen)
+{
+ union endianness {
+ PRInt32 i;
+ char c[4];
+ } u;
+
+ if (srclen <= dstlen) {
+ memcpy(dst, src, srclen);
+ return srclen;
+ }
+ u.i = 0x01020304;
+ if (u.c[0] == 0x01) {
+ /* big-endian case */
+ memcpy(dst, (char *)src + (srclen - dstlen), dstlen);
+ } else {
+ /* little-endian case */
+ memcpy(dst, src, dstlen);
+ }
+ return dstlen;
+}
+
+#ifdef SOLARIS
+
+#include <kstat.h>
+
+static const PRUint32 entropy_buf_len = 4096; /* buffer up to 4 KB */
+
+/* Buffer entropy data, and feed it to the RNG, entropy_buf_len bytes at a time.
+ * Returns error if RNG_RandomUpdate fails. Also increments *total_fed
+ * by the number of bytes successfully buffered.
+ */
+static SECStatus
+BufferEntropy(char *inbuf, PRUint32 inlen,
+ char *entropy_buf, PRUint32 *entropy_buffered,
+ PRUint32 *total_fed)
+{
+ PRUint32 tocopy = 0;
+ PRUint32 avail = 0;
+ SECStatus rv = SECSuccess;
+
+ while (inlen) {
+ avail = entropy_buf_len - *entropy_buffered;
+ if (!avail) {
+ /* Buffer is full, time to feed it to the RNG. */
+ rv = RNG_RandomUpdate(entropy_buf, entropy_buf_len);
+ if (SECSuccess != rv) {
+ break;
+ }
+ *entropy_buffered = 0;
+ avail = entropy_buf_len;
+ }
+ tocopy = PR_MIN(avail, inlen);
+ memcpy(entropy_buf + *entropy_buffered, inbuf, tocopy);
+ *entropy_buffered += tocopy;
+ inlen -= tocopy;
+ inbuf += tocopy;
+ *total_fed += tocopy;
+ }
+ return rv;
+}
+
+/* Feed kernel statistics structures and ks_data field to the RNG.
+ * Returns status as well as the number of bytes successfully fed to the RNG.
+ */
+static SECStatus
+RNG_kstat(PRUint32 *fed)
+{
+ kstat_ctl_t *kc = NULL;
+ kstat_t *ksp = NULL;
+ PRUint32 entropy_buffered = 0;
+ char *entropy_buf = NULL;
+ SECStatus rv = SECSuccess;
+
+ PORT_Assert(fed);
+ if (!fed) {
+ return SECFailure;
+ }
+ *fed = 0;
+
+ kc = kstat_open();
+ PORT_Assert(kc);
+ if (!kc) {
+ return SECFailure;
+ }
+ entropy_buf = (char *)PORT_Alloc(entropy_buf_len);
+ PORT_Assert(entropy_buf);
+ if (entropy_buf) {
+ for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
+ if (-1 == kstat_read(kc, ksp, NULL)) {
+ /* missing data from a single kstat shouldn't be fatal */
+ continue;
+ }
+ rv = BufferEntropy((char *)ksp, sizeof(kstat_t),
+ entropy_buf, &entropy_buffered,
+ fed);
+ if (SECSuccess != rv) {
+ break;
+ }
+
+ if (ksp->ks_data && ksp->ks_data_size > 0 && ksp->ks_ndata > 0) {
+ rv = BufferEntropy((char *)ksp->ks_data, ksp->ks_data_size,
+ entropy_buf, &entropy_buffered,
+ fed);
+ if (SECSuccess != rv) {
+ break;
+ }
+ }
+ }
+ if (SECSuccess == rv && entropy_buffered) {
+ /* Buffer is not empty, time to feed it to the RNG */
+ rv = RNG_RandomUpdate(entropy_buf, entropy_buffered);
+ }
+ PORT_Free(entropy_buf);
+ } else {
+ rv = SECFailure;
+ }
+ if (kstat_close(kc)) {
+ PORT_Assert(0);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+#endif
+
+#if defined(SCO) || defined(UNIXWARE) || defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(DARWIN) || defined(OPENBSD) || defined(NTO) || defined(__riscos__) || defined(__GNU__) || defined(__FreeBSD_kernel__) || defined(__NetBSD_kernel__)
+#include <sys/times.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ int ticks;
+ struct tms buffer;
+
+ ticks = times(&buffer);
+ return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /*
+ * Is this really necessary? Why not use rand48 or something?
+ */
+ si = sysconf(_SC_CHILD_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+
+ si = sysconf(_SC_STREAM_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+
+ si = sysconf(_SC_OPEN_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+
+#if defined(__sun)
+#if defined(__svr4) || defined(SVR4)
+#include <sys/systeminfo.h>
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ hrtime_t t;
+ t = gethrtime();
+ if (t) {
+ return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+ }
+ return 0;
+}
+#else /* SunOS (Sun, but not SVR4) */
+
+extern long sysconf(int name);
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /* This is not very good */
+ si = sysconf(_SC_CHILD_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+#endif /* Sun */
+
+#if defined(__hpux)
+#include <sys/unistd.h>
+
+#if defined(__ia64)
+#include <ia64/sys/inline.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ PRUint64 t;
+
+ t = _Asm_mov_from_ar(_AREG44);
+ return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+}
+#else
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ extern int ret_cr16();
+ int cr16val;
+
+ cr16val = ret_cr16();
+ return CopyLowBits(buf, maxbytes, &cr16val, sizeof(cr16val));
+}
+#endif
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /* This is not very good */
+ si = sysconf(_AES_OS_VERSION);
+ RNG_RandomUpdate(&si, sizeof(si));
+ si = sysconf(_SC_CPU_VERSION);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif /* HPUX */
+
+#if defined(_IBMR2)
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ /* XXX haven't found any yet! */
+}
+#endif /* IBM R2 */
+
+#if defined(LINUX)
+#include <sys/sysinfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+#ifndef NO_SYSINFO
+ struct sysinfo si;
+ if (sysinfo(&si) == 0) {
+ RNG_RandomUpdate(&si, sizeof(si));
+ }
+#endif
+}
+#endif /* LINUX */
+
+#if defined(NCR)
+
+#include <sys/utsname.h>
+#include <sys/systeminfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+#endif /* NCR */
+
+#if defined(sgi)
+#include <fcntl.h>
+#undef PRIVATE
+#include <sys/mman.h>
+#include <sys/syssgi.h>
+#include <sys/immu.h>
+#include <sys/systeminfo.h>
+#include <sys/utsname.h>
+#include <wait.h>
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[4096];
+
+ rv = syssgi(SGI_SYSID, &buf[0]);
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, MAXSYSIDSIZE);
+ }
+#ifdef SGI_RDUBLK
+ rv = syssgi(SGI_RDUBLK, getpid(), &buf[0], sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, sizeof(buf));
+ }
+#endif /* SGI_RDUBLK */
+ rv = syssgi(SGI_INVENT, SGI_INV_READ, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, sizeof(buf));
+ }
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbuf)
+{
+ unsigned phys_addr, raddr, cycleval;
+ static volatile unsigned *iotimer_addr = NULL;
+ static int tries = 0;
+ static int cntr_size;
+ int mfd;
+ long s0[2];
+ struct timeval tv;
+
+#ifndef SGI_CYCLECNTR_SIZE
+#define SGI_CYCLECNTR_SIZE 165 /* Size user needs to use to read CC */
+#endif
+
+ if (iotimer_addr == NULL) {
+ if (tries++ > 1) {
+ /* Don't keep trying if it didn't work */
+ return 0;
+ }
+
+ /*
+ ** For SGI machines we can use the cycle counter, if it has one,
+ ** to generate some truly random numbers
+ */
+ phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval);
+ if (phys_addr) {
+ int pgsz = getpagesize();
+ int pgoffmask = pgsz - 1;
+
+ raddr = phys_addr & ~pgoffmask;
+ mfd = open("/dev/mmem", O_RDONLY);
+ if (mfd < 0) {
+ return 0;
+ }
+ iotimer_addr = (unsigned *)
+ mmap(0, pgoffmask, PROT_READ, MAP_PRIVATE, mfd, (int)raddr);
+ if (iotimer_addr == (void *)-1) {
+ close(mfd);
+ iotimer_addr = NULL;
+ return 0;
+ }
+ iotimer_addr = (unsigned *)((__psint_t)iotimer_addr | (phys_addr & pgoffmask));
+ /*
+ * The file 'mfd' is purposefully not closed.
+ */
+ cntr_size = syssgi(SGI_CYCLECNTR_SIZE);
+ if (cntr_size < 0) {
+ struct utsname utsinfo;
+
+ /*
+ * We must be executing on a 6.0 or earlier system, since the
+ * SGI_CYCLECNTR_SIZE call is not supported.
+ *
+ * The only pre-6.1 platforms with 64-bit counters are
+ * IP19 and IP21 (Challenge, PowerChallenge, Onyx).
+ */
+ uname(&utsinfo);
+ if (!strncmp(utsinfo.machine, "IP19", 4) ||
+ !strncmp(utsinfo.machine, "IP21", 4))
+ cntr_size = 64;
+ else
+ cntr_size = 32;
+ }
+ cntr_size /= 8; /* Convert from bits to bytes */
+ }
+ }
+
+ s0[0] = *iotimer_addr;
+ if (cntr_size > 4)
+ s0[1] = *(iotimer_addr + 1);
+ memcpy(buf, (char *)&s0[0], cntr_size);
+ return CopyLowBits(buf, maxbuf, &s0, cntr_size);
+}
+#endif
+
+#if defined(sony)
+#include <sys/systeminfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* sony */
+
+#if defined(sinix)
+#include <sys/systeminfo.h>
+#include <sys/times.h>
+
+int gettimeofday(struct timeval *, struct timezone *);
+int gethostname(char *, int);
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ int ticks;
+ struct tms buffer;
+
+ ticks = times(&buffer);
+ return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* sinix */
+
+#if defined(nec_ews)
+#include <sys/systeminfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* nec_ews */
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbytes)
+{
+ struct timeval tv;
+ int n = 0;
+ int c;
+
+ n = GetHighResClock(buf, maxbytes);
+ maxbytes -= n;
+
+ (void)gettimeofday(&tv, 0);
+ c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_usec, sizeof(tv.tv_usec));
+ n += c;
+ maxbytes -= c;
+ c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_sec, sizeof(tv.tv_sec));
+ n += c;
+ return n;
+}
+
+#ifdef DARWIN
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE
+#include <crt_externs.h>
+#endif
+#endif
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ char buf[BUFSIZ];
+ size_t bytes;
+ const char *const *cp;
+ char *randfile;
+#ifdef DARWIN
+#if TARGET_OS_IPHONE
+ /* iOS does not expose a way to access environ. */
+ char **environ = NULL;
+#else
+ char **environ = *_NSGetEnviron();
+#endif
+#else
+ extern char **environ;
+#endif
+ static const char *const files[] = {
+ "/etc/passwd",
+ "/etc/utmp",
+ "/tmp",
+ "/var/tmp",
+ "/usr/tmp",
+ 0
+ };
+
+ GiveSystemInfo();
+
+ bytes = RNG_GetNoise(buf, sizeof(buf));
+ RNG_RandomUpdate(buf, bytes);
+
+ /*
+ * Pass the C environment and the addresses of the pointers to the
+ * hash function. This makes the random number function depend on the
+ * execution environment of the user and on the platform the program
+ * is running on.
+ */
+ if (environ != NULL) {
+ cp = (const char *const *)environ;
+ while (*cp) {
+ RNG_RandomUpdate(*cp, strlen(*cp));
+ cp++;
+ }
+ RNG_RandomUpdate(environ, (char *)cp - (char *)environ);
+ }
+
+ /* Give in system information */
+ if (gethostname(buf, sizeof(buf)) == 0) {
+ RNG_RandomUpdate(buf, strlen(buf));
+ }
+
+ /* grab some data from system's PRNG before any other files. */
+ bytes = RNG_FileUpdate("/dev/urandom", SYSTEM_RNG_SEED_COUNT);
+ if (!bytes) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ }
+
+ /* If the user points us to a random file, pass it through the rng */
+ randfile = PR_GetEnvSecure("NSRANDFILE");
+ if ((randfile != NULL) && (randfile[0] != '\0')) {
+ char *randCountString = PR_GetEnvSecure("NSRANDCOUNT");
+ int randCount = randCountString ? atoi(randCountString) : 0;
+ if (randCount != 0) {
+ RNG_FileUpdate(randfile, randCount);
+ } else {
+ RNG_FileForRNG(randfile);
+ }
+ }
+
+ /* pass other files through */
+ for (cp = files; *cp; cp++)
+ RNG_FileForRNG(*cp);
+
+#if defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD) || defined(DARWIN) || defined(LINUX) || defined(HPUX)
+ if (bytes)
+ return;
+#endif
+
+#ifdef SOLARIS
+ if (!bytes) {
+ /* On Solaris 8, /dev/urandom isn't available, so we use libkstat. */
+ PRUint32 kstat_bytes = 0;
+ if (SECSuccess != RNG_kstat(&kstat_bytes)) {
+ PORT_Assert(0);
+ }
+ bytes += kstat_bytes;
+ PORT_Assert(bytes);
+ }
+#endif
+}
+
+#define TOTAL_FILE_LIMIT 1000000 /* one million */
+
+size_t
+RNG_FileUpdate(const char *fileName, size_t limit)
+{
+ FILE *file;
+ int fd;
+ int bytes;
+ size_t fileBytes = 0;
+ struct stat stat_buf;
+ unsigned char buffer[BUFSIZ];
+ static size_t totalFileBytes = 0;
+
+ /* suppress valgrind warnings due to holes in struct stat */
+ memset(&stat_buf, 0, sizeof(stat_buf));
+
+ if (stat((char *)fileName, &stat_buf) < 0)
+ return fileBytes;
+ RNG_RandomUpdate(&stat_buf, sizeof(stat_buf));
+
+ file = fopen(fileName, "r");
+ if (file != NULL) {
+ /* Read from the underlying file descriptor directly to bypass stdio
+ * buffering and avoid reading more bytes than we need from
+ * /dev/urandom. NOTE: we can't use fread with unbuffered I/O because
+ * fread may return EOF in unbuffered I/O mode on Android.
+ *
+ * Moreover, we read into a buffer of size BUFSIZ, so buffered I/O
+ * has no performance advantage. */
+ fd = fileno(file);
+ /* 'file' was just opened, so this should not fail. */
+ PORT_Assert(fd != -1);
+ while (limit > fileBytes && fd != -1) {
+ bytes = PR_MIN(sizeof buffer, limit - fileBytes);
+ bytes = read(fd, buffer, bytes);
+ if (bytes <= 0)
+ break;
+ RNG_RandomUpdate(buffer, bytes);
+ fileBytes += bytes;
+ totalFileBytes += bytes;
+ /* after TOTAL_FILE_LIMIT has been reached, only read in first
+ ** buffer of data from each subsequent file.
+ */
+ if (totalFileBytes > TOTAL_FILE_LIMIT)
+ break;
+ }
+ fclose(file);
+ }
+ /*
+ * Pass yet another snapshot of our highest resolution clock into
+ * the hash function.
+ */
+ bytes = RNG_GetNoise(buffer, sizeof(buffer));
+ RNG_RandomUpdate(buffer, bytes);
+ return fileBytes;
+}
+
+void
+RNG_FileForRNG(const char *fileName)
+{
+ RNG_FileUpdate(fileName, TOTAL_FILE_LIMIT);
+}
+
+#define _POSIX_PTHREAD_SEMANTICS
+#include <dirent.h>
+
+PRBool
+ReadFileOK(char *dir, char *file)
+{
+ struct stat stat_buf;
+ char filename[PATH_MAX];
+ int count = snprintf(filename, sizeof filename, "%s/%s", dir, file);
+
+ if (count <= 0) {
+ return PR_FALSE; /* name too long, can't read it anyway */
+ }
+
+ if (stat(filename, &stat_buf) < 0)
+ return PR_FALSE; /* can't stat, probably can't read it then as well */
+ return S_ISREG(stat_buf.st_mode) ? PR_TRUE : PR_FALSE;
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ FILE *file;
+ int fd;
+ int bytes;
+ size_t fileBytes = 0;
+ unsigned char *buffer = dest;
+
+ file = fopen("/dev/urandom", "r");
+ if (file == NULL) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return 0;
+ }
+ /* Read from the underlying file descriptor directly to bypass stdio
+ * buffering and avoid reading more bytes than we need from /dev/urandom.
+ * NOTE: we can't use fread with unbuffered I/O because fread may return
+ * EOF in unbuffered I/O mode on Android.
+ */
+ fd = fileno(file);
+ /* 'file' was just opened, so this should not fail. */
+ PORT_Assert(fd != -1);
+ while (maxLen > fileBytes && fd != -1) {
+ bytes = maxLen - fileBytes;
+ bytes = read(fd, buffer, bytes);
+ if (bytes <= 0)
+ break;
+ fileBytes += bytes;
+ buffer += bytes;
+ }
+ fclose(file);
+ if (fileBytes != maxLen) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM); /* system RNG failed */
+ fileBytes = 0;
+ }
+ return fileBytes;
+}
diff --git a/security/nss/lib/freebl/unix_urandom.c b/security/nss/lib/freebl/unix_urandom.c
new file mode 100644
index 0000000000..73006cdbb4
--- /dev/null
+++ b/security/nss/lib/freebl/unix_urandom.c
@@ -0,0 +1,84 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include "secerr.h"
+#include "secrng.h"
+#include "prprf.h"
+
+/* syscall getentropy() is limited to retrieving 256 bytes */
+#define GETENTROPY_MAX_BYTES 256
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ PRUint8 bytes[SYSTEM_RNG_SEED_COUNT];
+ size_t numBytes = RNG_SystemRNG(bytes, SYSTEM_RNG_SEED_COUNT);
+ if (!numBytes) {
+ /* error is set */
+ return;
+ }
+ RNG_RandomUpdate(bytes, numBytes);
+ PORT_Memset(bytes, 0, sizeof bytes);
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ int fd;
+ int bytes;
+ size_t fileBytes = 0;
+ unsigned char *buffer = dest;
+
+#if defined(__OpenBSD__) || (defined(__FreeBSD__) && __FreeBSD_version >= 1200000) || (defined(LINUX) && defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 25))))
+ int result;
+
+ while (fileBytes < maxLen) {
+ size_t getBytes = maxLen - fileBytes;
+ if (getBytes > GETENTROPY_MAX_BYTES) {
+ getBytes = GETENTROPY_MAX_BYTES;
+ }
+ result = getentropy(buffer, getBytes);
+ if (result == 0) { /* success */
+ fileBytes += getBytes;
+ buffer += getBytes;
+ } else {
+ break;
+ }
+ }
+ if (fileBytes == maxLen) { /* success */
+ return maxLen;
+ }
+ /* If we failed with an error other than ENOSYS, it means the destination
+ * buffer is not writeable. We don't need to try writing to it again. */
+ if (errno != ENOSYS) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return 0;
+ }
+ /* ENOSYS means the kernel doesn't support getentropy()/getrandom().
+ * Reset the number of bytes to get and fall back to /dev/urandom. */
+ fileBytes = 0;
+#endif
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return 0;
+ }
+ while (fileBytes < maxLen) {
+ bytes = read(fd, buffer, maxLen - fileBytes);
+ if (bytes <= 0) {
+ break;
+ }
+ fileBytes += bytes;
+ buffer += bytes;
+ }
+ (void)close(fd);
+ if (fileBytes != maxLen) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return 0;
+ }
+ return fileBytes;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.c b/security/nss/lib/freebl/verified/Hacl_Chacha20.c
new file mode 100644
index 0000000000..ade205ce60
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.c
@@ -0,0 +1,227 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_Chacha20.h"
+
+const uint32_t
+ Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U };
+
+static inline void
+quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+ uint32_t sta = st[a];
+ uint32_t stb0 = st[b];
+ uint32_t std0 = st[d];
+ uint32_t sta10 = sta + stb0;
+ uint32_t std10 = std0 ^ sta10;
+ uint32_t std2 = std10 << (uint32_t)16U | std10 >> (uint32_t)16U;
+ st[a] = sta10;
+ st[d] = std2;
+ uint32_t sta0 = st[c];
+ uint32_t stb1 = st[d];
+ uint32_t std3 = st[b];
+ uint32_t sta11 = sta0 + stb1;
+ uint32_t std11 = std3 ^ sta11;
+ uint32_t std20 = std11 << (uint32_t)12U | std11 >> (uint32_t)20U;
+ st[c] = sta11;
+ st[b] = std20;
+ uint32_t sta2 = st[a];
+ uint32_t stb2 = st[b];
+ uint32_t std4 = st[d];
+ uint32_t sta12 = sta2 + stb2;
+ uint32_t std12 = std4 ^ sta12;
+ uint32_t std21 = std12 << (uint32_t)8U | std12 >> (uint32_t)24U;
+ st[a] = sta12;
+ st[d] = std21;
+ uint32_t sta3 = st[c];
+ uint32_t stb = st[d];
+ uint32_t std = st[b];
+ uint32_t sta1 = sta3 + stb;
+ uint32_t std1 = std ^ sta1;
+ uint32_t std22 = std1 << (uint32_t)7U | std1 >> (uint32_t)25U;
+ st[c] = sta1;
+ st[b] = std22;
+}
+
+static inline void
+double_round(uint32_t *st)
+{
+ quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U);
+ quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U);
+ quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U);
+ quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U);
+ quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U);
+ quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U);
+ quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U);
+ quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U);
+}
+
+static inline void
+rounds(uint32_t *st)
+{
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+}
+
+static inline void
+chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(uint32_t));
+ uint32_t ctr_u32 = ctr;
+ k[12U] = k[12U] + ctr_u32;
+ rounds(k);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint32_t *os = k;
+ uint32_t x = k[i] + ctx[i];
+ os[i] = x;);
+ k[12U] = k[12U] + ctr_u32;
+}
+
+static const uint32_t
+ chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U };
+
+void
+Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint32_t *os = ctx;
+ uint32_t x = chacha20_constants[i];
+ os[i] = x;);
+ KRML_MAYBE_FOR8(i,
+ (uint32_t)0U,
+ (uint32_t)8U,
+ (uint32_t)1U,
+ uint32_t *os = ctx + (uint32_t)4U;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ ctx[12U] = ctr;
+ KRML_MAYBE_FOR3(i,
+ (uint32_t)0U,
+ (uint32_t)3U,
+ (uint32_t)1U,
+ uint32_t *os = ctx + (uint32_t)13U;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+}
+
+static void
+chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, uint8_t *text)
+{
+ uint32_t k[16U] = { 0U };
+ chacha20_core(k, ctx, incr);
+ uint32_t bl[16U] = { 0U };
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint32_t *os = bl;
+ uint8_t *bj = text + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint32_t *os = bl;
+ uint32_t x = bl[i] ^ k[i];
+ os[i] = x;);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ store32_le(out + i * (uint32_t)4U, bl[i]););
+}
+
+static inline void
+chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, uint8_t *text)
+{
+ uint8_t plain[64U] = { 0U };
+ memcpy(plain, text, len * sizeof(uint8_t));
+ chacha20_encrypt_block(ctx, plain, incr, plain);
+ memcpy(out, plain, len * sizeof(uint8_t));
+}
+
+void
+Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text)
+{
+ uint32_t rem = len % (uint32_t)64U;
+ uint32_t nb = len / (uint32_t)64U;
+ uint32_t rem1 = len % (uint32_t)64U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ chacha20_encrypt_block(ctx, out + i * (uint32_t)64U, i, text + i * (uint32_t)64U);
+ }
+ if (rem1 > (uint32_t)0U) {
+ chacha20_encrypt_last(ctx, rem, out + nb * (uint32_t)64U, nb, text + nb * (uint32_t)64U);
+ }
+}
+
+void
+Hacl_Chacha20_chacha20_encrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ uint32_t ctx[16U] = { 0U };
+ Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr);
+ Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, text);
+}
+
+void
+Hacl_Chacha20_chacha20_decrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ uint32_t ctx[16U] = { 0U };
+ Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr);
+ Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, cipher);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/Hacl_Chacha20.h
new file mode 100644
index 0000000000..3da477d0ce
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.h
@@ -0,0 +1,60 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20_H
+#define __Hacl_Chacha20_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+void
+Hacl_Chacha20_chacha20_encrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_chacha20_decrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c
new file mode 100644
index 0000000000..3d0f27131d
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c
@@ -0,0 +1,1216 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_128.h"
+
+#include "internal/Hacl_Poly1305_128.h"
+#include "internal/Hacl_Krmllib.h"
+#include "libintvector.h"
+
+static inline void
+poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 *pre0 = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc0 = ctx;
+ uint32_t sz_block = (uint32_t)32U;
+ uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
+ uint8_t *t00 = blocks;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)32U;
+ uint8_t *text0 = t00;
+ Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc0, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t00 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f25 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f25;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f0;
+ e[1U] = f1;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *rn = pre0 + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn5 = pre0 + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 f110 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 f120 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 f130 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 f140 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a11 =
+ Lib_IntVector_Intrinsics_vec128_add64(a1,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a21 =
+ Lib_IntVector_Intrinsics_vec128_add64(a2,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a31 =
+ Lib_IntVector_Intrinsics_vec128_add64(a3,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a41 =
+ Lib_IntVector_Intrinsics_vec128_add64(a4,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec128 t01 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o00 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ acc0[0U] = o00;
+ acc0[1U] = o10;
+ acc0[2U] = o20;
+ acc0[3U] = o30;
+ acc0[4U] = o40;
+ Lib_IntVector_Intrinsics_vec128 f100 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24);
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc0, pre0);
+ }
+ uint32_t len1 = n * (uint32_t)16U - len0;
+ uint8_t *t10 = blocks + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem1 = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t10 + i * (uint32_t)16U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = t10 + nb * (uint32_t)16U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(uint8_t));
+ if (r > (uint32_t)0U) {
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_128(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U };
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_128_poly1305_init(ctx, k);
+ if (aadlen != (uint32_t)0U) {
+ poly1305_padded_128(ctx, aadlen, aad);
+ }
+ if (mlen != (uint32_t)0U) {
+ poly1305_padded_128(ctx, mlen, m);
+ }
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_128_poly1305_finish(out, k, ctx);
+}
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_128_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_128(key, aadlen, aad, mlen, cipher, mac);
+}
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_128_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;);
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h
new file mode 100644
index 0000000000..031b5717ca
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h
@@ -0,0 +1,104 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20Poly1305_128_H
+#define __Hacl_Chacha20Poly1305_128_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Poly1305_128.h"
+#include "Hacl_Chacha20_Vec128.h"
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_128_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_128_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20Poly1305_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
new file mode 100644
index 0000000000..2bd071245e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
@@ -0,0 +1,1218 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_256.h"
+
+#include "internal/Hacl_Poly1305_256.h"
+#include "internal/Hacl_Krmllib.h"
+#include "libintvector.h"
+
+static inline void
+poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
+ uint32_t sz_block = (uint32_t)64U;
+ uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
+ uint8_t *t00 = blocks;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)64U;
+ uint8_t *text0 = t00;
+ Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t00 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o00 = o5;
+ Lib_IntVector_Intrinsics_vec256 o11 = o10;
+ Lib_IntVector_Intrinsics_vec256 o21 = o20;
+ Lib_IntVector_Intrinsics_vec256 o31 = o30;
+ Lib_IntVector_Intrinsics_vec256 o41 = o40;
+ e[0U] = o00;
+ e[1U] = o11;
+ e[2U] = o21;
+ e[3U] = o31;
+ e[4U] = o41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a21 =
+ Lib_IntVector_Intrinsics_vec256_add64(a2,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a31 =
+ Lib_IntVector_Intrinsics_vec256_add64(a3,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a41 =
+ Lib_IntVector_Intrinsics_vec256_add64(a4,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t01 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o01 = x02;
+ Lib_IntVector_Intrinsics_vec256 o12 = x12;
+ Lib_IntVector_Intrinsics_vec256 o22 = x21;
+ Lib_IntVector_Intrinsics_vec256 o32 = x32;
+ Lib_IntVector_Intrinsics_vec256 o42 = x42;
+ acc0[0U] = o01;
+ acc0[1U] = o12;
+ acc0[2U] = o22;
+ acc0[3U] = o32;
+ acc0[4U] = o42;
+ Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
+ }
+ uint32_t len1 = n * (uint32_t)16U - len0;
+ uint8_t *t10 = blocks + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem1 = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t10 + i * (uint32_t)16U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = t10 + nb * (uint32_t)16U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(uint8_t));
+ if (r > (uint32_t)0U) {
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_256(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_256_poly1305_init(ctx, k);
+ if (aadlen != (uint32_t)0U) {
+ poly1305_padded_256(ctx, aadlen, aad);
+ }
+ if (mlen != (uint32_t)0U) {
+ poly1305_padded_256(ctx, mlen, m);
+ }
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
+}
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_256_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
+}
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_256_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;);
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h
new file mode 100644
index 0000000000..dc2de098d5
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h
@@ -0,0 +1,104 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20Poly1305_256_H
+#define __Hacl_Chacha20Poly1305_256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Poly1305_256.h"
+#include "Hacl_Chacha20_Vec256.h"
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_256_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_256_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20Poly1305_256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c
new file mode 100644
index 0000000000..e83a93bed5
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c
@@ -0,0 +1,630 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_32.h"
+
+#include "internal/Hacl_Krmllib.h"
+
+static inline void
+poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ uint64_t *pre0 = ctx + (uint32_t)5U;
+ uint64_t *acc0 = ctx;
+ uint32_t nb = n * (uint32_t)16U / (uint32_t)16U;
+ uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = blocks + i * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = blocks + nb * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ uint64_t mask = b;
+ uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(uint8_t));
+ if (r > (uint32_t)0U) {
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_32(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ uint64_t ctx[25U] = { 0U };
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_32_poly1305_init(ctx, k);
+ if (aadlen != (uint32_t)0U) {
+ poly1305_padded_32(ctx, aadlen, aad);
+ }
+ if (mlen != (uint32_t)0U) {
+ poly1305_padded_32(ctx, mlen, m);
+ }
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_32_poly1305_finish(out, k, ctx);
+}
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_32_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_chacha20_encrypt(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, mac);
+}
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_32_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;);
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h
new file mode 100644
index 0000000000..492c18f0b8
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h
@@ -0,0 +1,104 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20Poly1305_32_H
+#define __Hacl_Chacha20Poly1305_32_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Poly1305_32.h"
+#include "Hacl_Chacha20.h"
+
+/**
+Encrypt a message `m` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the message.
+@param m Pointer to `mlen` bytes of memory where the message is read from.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is written to.
+@param mac Pointer to 16 bytes of memory where the mac is written to.
+*/
+void
+Hacl_Chacha20Poly1305_32_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+/**
+Decrypt a ciphertext `cipher` with key `k`.
+
+The arguments `k`, `n`, `aadlen`, and `aad` are same in encryption/decryption.
+Note: Encryption and decryption can be executed in-place, i.e., `m` and `cipher` can point to the same memory.
+
+If decryption succeeds, the resulting plaintext is stored in `m` and the function returns the success code 0.
+If decryption fails, the array `m` remains unchanged and the function returns the error code 1.
+
+@param k Pointer to 32 bytes of memory where the AEAD key is read from.
+@param n Pointer to 12 bytes of memory where the AEAD nonce is read from.
+@param aadlen Length of the associated data.
+@param aad Pointer to `aadlen` bytes of memory where the associated data is read from.
+
+@param mlen Length of the ciphertext.
+@param m Pointer to `mlen` bytes of memory where the message is written to.
+@param cipher Pointer to `mlen` bytes of memory where the ciphertext is read from.
+@param mac Pointer to 16 bytes of memory where the mac is read from.
+
+@returns 0 on succeess; 1 on failure.
+*/
+uint32_t
+Hacl_Chacha20Poly1305_32_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20Poly1305_32_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c
new file mode 100644
index 0000000000..a15820273c
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c
@@ -0,0 +1,821 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20_Vec128.h"
+
+#include "internal/Hacl_Chacha20.h"
+#include "libintvector.h"
+
+static inline void
+double_round_128(Lib_IntVector_Intrinsics_vec128 *st)
+{
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std0 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std0, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std1 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std1, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std2 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std2, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std3 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std3, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std4 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std4, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std5 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std5, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std6 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std6, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std7 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std7, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std8 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std8, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std9 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std9, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std10 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std10, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std11 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std11, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std12 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std12, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std13 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std13, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std14 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std14, (uint32_t)7U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std15 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std15, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std16 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std16, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std17 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std17, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std18 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std18, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std19 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std19, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std20 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std20, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std21 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std21, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std22 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std22, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std23 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std23, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std24 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std24, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std25 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std25, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std26 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std26, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std27 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std27, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std28 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std28, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std29 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std29, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std30 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std30, (uint32_t)7U);
+}
+
+static inline void
+chacha20_core_128(
+ Lib_IntVector_Intrinsics_vec128 *k,
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec128));
+ uint32_t ctr_u32 = (uint32_t)4U * ctr;
+ Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32);
+ k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128 *os = k;
+ Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]);
+ os[i] = x;);
+ k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
+}
+
+static inline void
+chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ uint32_t ctx1[16U] = { 0U };
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1;
+ uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+ os[i] = x;);
+ KRML_MAYBE_FOR8(i,
+ (uint32_t)0U,
+ (uint32_t)8U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1 + (uint32_t)4U;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ ctx1[12U] = ctr;
+ KRML_MAYBE_FOR3(i,
+ (uint32_t)0U,
+ (uint32_t)3U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1 + (uint32_t)13U;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128 *os = ctx;
+ uint32_t x = ctx1[i];
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x);
+ os[i] = x0;);
+ Lib_IntVector_Intrinsics_vec128
+ ctr1 =
+ Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)0U,
+ (uint32_t)1U,
+ (uint32_t)2U,
+ (uint32_t)3U);
+ Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U];
+ ctx[12U] = Lib_IntVector_Intrinsics_vec128_add32(c12, ctr1);
+}
+
+void
+Hacl_Chacha20_Vec128_chacha20_encrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_init_128(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)256U;
+ uint32_t nb = len / (uint32_t)256U;
+ uint32_t rem1 = len % (uint32_t)256U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)256U;
+ uint8_t *uu____1 = text + i * (uint32_t)256U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_core_128(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec128 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec128 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec128 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec128 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0__0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v2__0 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v1__0 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v3__0 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__0;
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__2;
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3;
+ Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3;
+ Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3;
+ Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3;
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__4;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__4;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__4;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__4;
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5;
+ Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5;
+ Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5;
+ Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5;
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__6;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__6;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__6;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__6;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i0,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y););
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)256U;
+ uint8_t *uu____3 = text + nb * (uint32_t)256U;
+ uint8_t plain[256U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uint8_t));
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_core_128(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec128 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec128 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec128 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec128 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0__0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v2__0 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v1__0 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v3__0 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__0;
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__2;
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3;
+ Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3;
+ Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3;
+ Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3;
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__4;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__4;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__4;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__4;
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5;
+ Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5;
+ Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5;
+ Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5;
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__6;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__6;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__6;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__6;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y););
+ memcpy(uu____2, plain, rem * sizeof(uint8_t));
+ }
+}
+
+void
+Hacl_Chacha20_Vec128_chacha20_decrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_init_128(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)256U;
+ uint32_t nb = len / (uint32_t)256U;
+ uint32_t rem1 = len % (uint32_t)256U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)256U;
+ uint8_t *uu____1 = cipher + i * (uint32_t)256U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_core_128(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec128 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec128 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec128 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec128 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0__0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v2__0 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v1__0 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v3__0 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__0;
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__2;
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3;
+ Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3;
+ Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3;
+ Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3;
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__4;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__4;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__4;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__4;
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5;
+ Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5;
+ Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5;
+ Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5;
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__6;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__6;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__6;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__6;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i0,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y););
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)256U;
+ uint8_t *uu____3 = cipher + nb * (uint32_t)256U;
+ uint8_t plain[256U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uint8_t));
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U };
+ chacha20_core_128(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec128 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec128 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec128 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec128 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0__0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v2__0 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v1__0 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v3__0 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__0;
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__2;
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11);
+ Lib_IntVector_Intrinsics_vec128
+ v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3;
+ Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3;
+ Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3;
+ Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3;
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__4;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__4;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__4;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__4;
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15);
+ Lib_IntVector_Intrinsics_vec128
+ v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5;
+ Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5;
+ Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5;
+ Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5;
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__6;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__6;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__6;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__6;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y););
+ memcpy(uu____2, plain, rem * sizeof(uint8_t));
+ }
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h
new file mode 100644
index 0000000000..192fec9294
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h
@@ -0,0 +1,60 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20_Vec128_H
+#define __Hacl_Chacha20_Vec128_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+void
+Hacl_Chacha20_Vec128_chacha20_encrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_Vec128_chacha20_decrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20_Vec128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c
new file mode 100644
index 0000000000..e184598e4a
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c
@@ -0,0 +1,1209 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20_Vec256.h"
+
+#include "internal/Hacl_Chacha20.h"
+#include "libintvector.h"
+
+static inline void
+double_round_256(Lib_IntVector_Intrinsics_vec256 *st)
+{
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U);
+}
+
+static inline void
+chacha20_core_256(
+ Lib_IntVector_Intrinsics_vec256 *k,
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec256));
+ uint32_t ctr_u32 = (uint32_t)8U * ctr;
+ Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32);
+ k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256 *os = k;
+ Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]);
+ os[i] = x;);
+ k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
+}
+
+static inline void
+chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ uint32_t ctx1[16U] = { 0U };
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1;
+ uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+ os[i] = x;);
+ KRML_MAYBE_FOR8(i,
+ (uint32_t)0U,
+ (uint32_t)8U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1 + (uint32_t)4U;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ ctx1[12U] = ctr;
+ KRML_MAYBE_FOR3(i,
+ (uint32_t)0U,
+ (uint32_t)3U,
+ (uint32_t)1U,
+ uint32_t *os = ctx1 + (uint32_t)13U;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;);
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256 *os = ctx;
+ uint32_t x = ctx1[i];
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x);
+ os[i] = x0;);
+ Lib_IntVector_Intrinsics_vec256
+ ctr1 =
+ Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U,
+ (uint32_t)1U,
+ (uint32_t)2U,
+ (uint32_t)3U,
+ (uint32_t)4U,
+ (uint32_t)5U,
+ (uint32_t)6U,
+ (uint32_t)7U);
+ Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U];
+ ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1);
+}
+
+void
+Hacl_Chacha20_Vec256_chacha20_encrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_init_256(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)512U;
+ uint32_t nb = len / (uint32_t)512U;
+ uint32_t rem1 = len % (uint32_t)512U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)512U;
+ uint8_t *uu____1 = text + i * (uint32_t)512U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_core_256(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec256 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec256 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec256 v00 = st0;
+ Lib_IntVector_Intrinsics_vec256 v16 = st1;
+ Lib_IntVector_Intrinsics_vec256 v20 = st2;
+ Lib_IntVector_Intrinsics_vec256 v30 = st3;
+ Lib_IntVector_Intrinsics_vec256 v40 = st4;
+ Lib_IntVector_Intrinsics_vec256 v50 = st5;
+ Lib_IntVector_Intrinsics_vec256 v60 = st6;
+ Lib_IntVector_Intrinsics_vec256 v70 = st7;
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256 v0_0 = v0_;
+ Lib_IntVector_Intrinsics_vec256 v1_0 = v1_;
+ Lib_IntVector_Intrinsics_vec256 v2_0 = v2_;
+ Lib_IntVector_Intrinsics_vec256 v3_0 = v3_;
+ Lib_IntVector_Intrinsics_vec256 v4_0 = v4_;
+ Lib_IntVector_Intrinsics_vec256 v5_0 = v5_;
+ Lib_IntVector_Intrinsics_vec256 v6_0 = v6_;
+ Lib_IntVector_Intrinsics_vec256 v7_0 = v7_;
+ Lib_IntVector_Intrinsics_vec256
+ v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1;
+ Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1;
+ Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1;
+ Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1;
+ Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1;
+ Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1;
+ Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1;
+ Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1;
+ Lib_IntVector_Intrinsics_vec256
+ v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256
+ v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2;
+ Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2;
+ Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2;
+ Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2;
+ Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2;
+ Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2;
+ Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2;
+ Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2;
+ Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20;
+ Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20;
+ Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20;
+ Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20;
+ Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20;
+ Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20;
+ Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20;
+ Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20;
+ Lib_IntVector_Intrinsics_vec256 v0 = v0_3;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2_3;
+ Lib_IntVector_Intrinsics_vec256 v2 = v1_3;
+ Lib_IntVector_Intrinsics_vec256 v3 = v3_3;
+ Lib_IntVector_Intrinsics_vec256 v4 = v4_3;
+ Lib_IntVector_Intrinsics_vec256 v5 = v6_3;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5_3;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7_3;
+ Lib_IntVector_Intrinsics_vec256 v01 = st8;
+ Lib_IntVector_Intrinsics_vec256 v110 = st9;
+ Lib_IntVector_Intrinsics_vec256 v21 = st10;
+ Lib_IntVector_Intrinsics_vec256 v31 = st11;
+ Lib_IntVector_Intrinsics_vec256 v41 = st12;
+ Lib_IntVector_Intrinsics_vec256 v51 = st13;
+ Lib_IntVector_Intrinsics_vec256 v61 = st14;
+ Lib_IntVector_Intrinsics_vec256 v71 = st15;
+ Lib_IntVector_Intrinsics_vec256
+ v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4;
+ Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4;
+ Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4;
+ Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4;
+ Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4;
+ Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4;
+ Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4;
+ Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4;
+ Lib_IntVector_Intrinsics_vec256
+ v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256
+ v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11;
+ Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11;
+ Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11;
+ Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11;
+ Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11;
+ Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11;
+ Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11;
+ Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11;
+ Lib_IntVector_Intrinsics_vec256
+ v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256
+ v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21;
+ Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21;
+ Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21;
+ Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21;
+ Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21;
+ Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21;
+ Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21;
+ Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21;
+ Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22;
+ Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22;
+ Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22;
+ Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22;
+ Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22;
+ Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22;
+ Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22;
+ Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22;
+ Lib_IntVector_Intrinsics_vec256 v8 = v0_6;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2_6;
+ Lib_IntVector_Intrinsics_vec256 v10 = v1_6;
+ Lib_IntVector_Intrinsics_vec256 v11 = v3_6;
+ Lib_IntVector_Intrinsics_vec256 v12 = v4_6;
+ Lib_IntVector_Intrinsics_vec256 v13 = v6_6;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5_6;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7_6;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i0,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y););
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)512U;
+ uint8_t *uu____3 = text + nb * (uint32_t)512U;
+ uint8_t plain[512U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uint8_t));
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_core_256(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec256 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec256 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec256 v00 = st0;
+ Lib_IntVector_Intrinsics_vec256 v16 = st1;
+ Lib_IntVector_Intrinsics_vec256 v20 = st2;
+ Lib_IntVector_Intrinsics_vec256 v30 = st3;
+ Lib_IntVector_Intrinsics_vec256 v40 = st4;
+ Lib_IntVector_Intrinsics_vec256 v50 = st5;
+ Lib_IntVector_Intrinsics_vec256 v60 = st6;
+ Lib_IntVector_Intrinsics_vec256 v70 = st7;
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256 v0_0 = v0_;
+ Lib_IntVector_Intrinsics_vec256 v1_0 = v1_;
+ Lib_IntVector_Intrinsics_vec256 v2_0 = v2_;
+ Lib_IntVector_Intrinsics_vec256 v3_0 = v3_;
+ Lib_IntVector_Intrinsics_vec256 v4_0 = v4_;
+ Lib_IntVector_Intrinsics_vec256 v5_0 = v5_;
+ Lib_IntVector_Intrinsics_vec256 v6_0 = v6_;
+ Lib_IntVector_Intrinsics_vec256 v7_0 = v7_;
+ Lib_IntVector_Intrinsics_vec256
+ v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1;
+ Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1;
+ Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1;
+ Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1;
+ Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1;
+ Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1;
+ Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1;
+ Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1;
+ Lib_IntVector_Intrinsics_vec256
+ v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256
+ v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2;
+ Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2;
+ Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2;
+ Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2;
+ Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2;
+ Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2;
+ Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2;
+ Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2;
+ Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20;
+ Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20;
+ Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20;
+ Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20;
+ Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20;
+ Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20;
+ Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20;
+ Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20;
+ Lib_IntVector_Intrinsics_vec256 v0 = v0_3;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2_3;
+ Lib_IntVector_Intrinsics_vec256 v2 = v1_3;
+ Lib_IntVector_Intrinsics_vec256 v3 = v3_3;
+ Lib_IntVector_Intrinsics_vec256 v4 = v4_3;
+ Lib_IntVector_Intrinsics_vec256 v5 = v6_3;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5_3;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7_3;
+ Lib_IntVector_Intrinsics_vec256 v01 = st8;
+ Lib_IntVector_Intrinsics_vec256 v110 = st9;
+ Lib_IntVector_Intrinsics_vec256 v21 = st10;
+ Lib_IntVector_Intrinsics_vec256 v31 = st11;
+ Lib_IntVector_Intrinsics_vec256 v41 = st12;
+ Lib_IntVector_Intrinsics_vec256 v51 = st13;
+ Lib_IntVector_Intrinsics_vec256 v61 = st14;
+ Lib_IntVector_Intrinsics_vec256 v71 = st15;
+ Lib_IntVector_Intrinsics_vec256
+ v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4;
+ Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4;
+ Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4;
+ Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4;
+ Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4;
+ Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4;
+ Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4;
+ Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4;
+ Lib_IntVector_Intrinsics_vec256
+ v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256
+ v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11;
+ Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11;
+ Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11;
+ Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11;
+ Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11;
+ Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11;
+ Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11;
+ Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11;
+ Lib_IntVector_Intrinsics_vec256
+ v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256
+ v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21;
+ Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21;
+ Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21;
+ Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21;
+ Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21;
+ Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21;
+ Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21;
+ Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21;
+ Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22;
+ Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22;
+ Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22;
+ Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22;
+ Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22;
+ Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22;
+ Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22;
+ Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22;
+ Lib_IntVector_Intrinsics_vec256 v8 = v0_6;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2_6;
+ Lib_IntVector_Intrinsics_vec256 v10 = v1_6;
+ Lib_IntVector_Intrinsics_vec256 v11 = v3_6;
+ Lib_IntVector_Intrinsics_vec256 v12 = v4_6;
+ Lib_IntVector_Intrinsics_vec256 v13 = v6_6;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5_6;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7_6;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y););
+ memcpy(uu____2, plain, rem * sizeof(uint8_t));
+ }
+}
+
+void
+Hacl_Chacha20_Vec256_chacha20_decrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_init_256(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)512U;
+ uint32_t nb = len / (uint32_t)512U;
+ uint32_t rem1 = len % (uint32_t)512U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)512U;
+ uint8_t *uu____1 = cipher + i * (uint32_t)512U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_core_256(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec256 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec256 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec256 v00 = st0;
+ Lib_IntVector_Intrinsics_vec256 v16 = st1;
+ Lib_IntVector_Intrinsics_vec256 v20 = st2;
+ Lib_IntVector_Intrinsics_vec256 v30 = st3;
+ Lib_IntVector_Intrinsics_vec256 v40 = st4;
+ Lib_IntVector_Intrinsics_vec256 v50 = st5;
+ Lib_IntVector_Intrinsics_vec256 v60 = st6;
+ Lib_IntVector_Intrinsics_vec256 v70 = st7;
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256 v0_0 = v0_;
+ Lib_IntVector_Intrinsics_vec256 v1_0 = v1_;
+ Lib_IntVector_Intrinsics_vec256 v2_0 = v2_;
+ Lib_IntVector_Intrinsics_vec256 v3_0 = v3_;
+ Lib_IntVector_Intrinsics_vec256 v4_0 = v4_;
+ Lib_IntVector_Intrinsics_vec256 v5_0 = v5_;
+ Lib_IntVector_Intrinsics_vec256 v6_0 = v6_;
+ Lib_IntVector_Intrinsics_vec256 v7_0 = v7_;
+ Lib_IntVector_Intrinsics_vec256
+ v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1;
+ Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1;
+ Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1;
+ Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1;
+ Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1;
+ Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1;
+ Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1;
+ Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1;
+ Lib_IntVector_Intrinsics_vec256
+ v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256
+ v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2;
+ Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2;
+ Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2;
+ Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2;
+ Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2;
+ Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2;
+ Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2;
+ Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2;
+ Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20;
+ Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20;
+ Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20;
+ Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20;
+ Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20;
+ Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20;
+ Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20;
+ Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20;
+ Lib_IntVector_Intrinsics_vec256 v0 = v0_3;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2_3;
+ Lib_IntVector_Intrinsics_vec256 v2 = v1_3;
+ Lib_IntVector_Intrinsics_vec256 v3 = v3_3;
+ Lib_IntVector_Intrinsics_vec256 v4 = v4_3;
+ Lib_IntVector_Intrinsics_vec256 v5 = v6_3;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5_3;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7_3;
+ Lib_IntVector_Intrinsics_vec256 v01 = st8;
+ Lib_IntVector_Intrinsics_vec256 v110 = st9;
+ Lib_IntVector_Intrinsics_vec256 v21 = st10;
+ Lib_IntVector_Intrinsics_vec256 v31 = st11;
+ Lib_IntVector_Intrinsics_vec256 v41 = st12;
+ Lib_IntVector_Intrinsics_vec256 v51 = st13;
+ Lib_IntVector_Intrinsics_vec256 v61 = st14;
+ Lib_IntVector_Intrinsics_vec256 v71 = st15;
+ Lib_IntVector_Intrinsics_vec256
+ v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4;
+ Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4;
+ Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4;
+ Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4;
+ Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4;
+ Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4;
+ Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4;
+ Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4;
+ Lib_IntVector_Intrinsics_vec256
+ v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256
+ v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11;
+ Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11;
+ Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11;
+ Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11;
+ Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11;
+ Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11;
+ Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11;
+ Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11;
+ Lib_IntVector_Intrinsics_vec256
+ v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256
+ v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21;
+ Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21;
+ Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21;
+ Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21;
+ Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21;
+ Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21;
+ Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21;
+ Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21;
+ Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22;
+ Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22;
+ Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22;
+ Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22;
+ Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22;
+ Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22;
+ Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22;
+ Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22;
+ Lib_IntVector_Intrinsics_vec256 v8 = v0_6;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2_6;
+ Lib_IntVector_Intrinsics_vec256 v10 = v1_6;
+ Lib_IntVector_Intrinsics_vec256 v11 = v3_6;
+ Lib_IntVector_Intrinsics_vec256 v12 = v4_6;
+ Lib_IntVector_Intrinsics_vec256 v13 = v6_6;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5_6;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7_6;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i0,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y););
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)512U;
+ uint8_t *uu____3 = cipher + nb * (uint32_t)512U;
+ uint8_t plain[512U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uint8_t));
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U };
+ chacha20_core_256(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec256 st0 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 st1 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 st2 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 st3 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 st4 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 st5 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 st6 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 st7 = k[7U];
+ Lib_IntVector_Intrinsics_vec256 st8 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 st9 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 st10 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 st11 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 st12 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 st13 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 st14 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 st15 = k[15U];
+ Lib_IntVector_Intrinsics_vec256 v00 = st0;
+ Lib_IntVector_Intrinsics_vec256 v16 = st1;
+ Lib_IntVector_Intrinsics_vec256 v20 = st2;
+ Lib_IntVector_Intrinsics_vec256 v30 = st3;
+ Lib_IntVector_Intrinsics_vec256 v40 = st4;
+ Lib_IntVector_Intrinsics_vec256 v50 = st5;
+ Lib_IntVector_Intrinsics_vec256 v60 = st6;
+ Lib_IntVector_Intrinsics_vec256 v70 = st7;
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256 v0_0 = v0_;
+ Lib_IntVector_Intrinsics_vec256 v1_0 = v1_;
+ Lib_IntVector_Intrinsics_vec256 v2_0 = v2_;
+ Lib_IntVector_Intrinsics_vec256 v3_0 = v3_;
+ Lib_IntVector_Intrinsics_vec256 v4_0 = v4_;
+ Lib_IntVector_Intrinsics_vec256 v5_0 = v5_;
+ Lib_IntVector_Intrinsics_vec256 v6_0 = v6_;
+ Lib_IntVector_Intrinsics_vec256 v7_0 = v7_;
+ Lib_IntVector_Intrinsics_vec256
+ v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1;
+ Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1;
+ Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1;
+ Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1;
+ Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1;
+ Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1;
+ Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1;
+ Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1;
+ Lib_IntVector_Intrinsics_vec256
+ v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10);
+ Lib_IntVector_Intrinsics_vec256
+ v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10);
+ Lib_IntVector_Intrinsics_vec256
+ v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10);
+ Lib_IntVector_Intrinsics_vec256
+ v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256
+ v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10);
+ Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2;
+ Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2;
+ Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2;
+ Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2;
+ Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2;
+ Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2;
+ Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2;
+ Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2;
+ Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20;
+ Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20;
+ Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20;
+ Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20;
+ Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20;
+ Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20;
+ Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20;
+ Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20;
+ Lib_IntVector_Intrinsics_vec256 v0 = v0_3;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2_3;
+ Lib_IntVector_Intrinsics_vec256 v2 = v1_3;
+ Lib_IntVector_Intrinsics_vec256 v3 = v3_3;
+ Lib_IntVector_Intrinsics_vec256 v4 = v4_3;
+ Lib_IntVector_Intrinsics_vec256 v5 = v6_3;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5_3;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7_3;
+ Lib_IntVector_Intrinsics_vec256 v01 = st8;
+ Lib_IntVector_Intrinsics_vec256 v110 = st9;
+ Lib_IntVector_Intrinsics_vec256 v21 = st10;
+ Lib_IntVector_Intrinsics_vec256 v31 = st11;
+ Lib_IntVector_Intrinsics_vec256 v41 = st12;
+ Lib_IntVector_Intrinsics_vec256 v51 = st13;
+ Lib_IntVector_Intrinsics_vec256 v61 = st14;
+ Lib_IntVector_Intrinsics_vec256 v71 = st15;
+ Lib_IntVector_Intrinsics_vec256
+ v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4;
+ Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4;
+ Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4;
+ Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4;
+ Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4;
+ Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4;
+ Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4;
+ Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4;
+ Lib_IntVector_Intrinsics_vec256
+ v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5);
+ Lib_IntVector_Intrinsics_vec256
+ v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5);
+ Lib_IntVector_Intrinsics_vec256
+ v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5);
+ Lib_IntVector_Intrinsics_vec256
+ v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256
+ v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5);
+ Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11;
+ Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11;
+ Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11;
+ Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11;
+ Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11;
+ Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11;
+ Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11;
+ Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11;
+ Lib_IntVector_Intrinsics_vec256
+ v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12);
+ Lib_IntVector_Intrinsics_vec256
+ v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12);
+ Lib_IntVector_Intrinsics_vec256
+ v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12);
+ Lib_IntVector_Intrinsics_vec256
+ v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256
+ v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12);
+ Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21;
+ Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21;
+ Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21;
+ Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21;
+ Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21;
+ Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21;
+ Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21;
+ Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21;
+ Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22;
+ Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22;
+ Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22;
+ Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22;
+ Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22;
+ Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22;
+ Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22;
+ Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22;
+ Lib_IntVector_Intrinsics_vec256 v8 = v0_6;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2_6;
+ Lib_IntVector_Intrinsics_vec256 v10 = v1_6;
+ Lib_IntVector_Intrinsics_vec256 v11 = v3_6;
+ Lib_IntVector_Intrinsics_vec256 v12 = v4_6;
+ Lib_IntVector_Intrinsics_vec256 v13 = v6_6;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5_6;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7_6;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y););
+ memcpy(uu____2, plain, rem * sizeof(uint8_t));
+ }
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h
new file mode 100644
index 0000000000..5ee5f39262
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h
@@ -0,0 +1,60 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Chacha20_Vec256_H
+#define __Hacl_Chacha20_Vec256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+void
+Hacl_Chacha20_Vec256_chacha20_encrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_Vec256_chacha20_decrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Chacha20_Vec256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c
new file mode 100644
index 0000000000..a485d04c63
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c
@@ -0,0 +1,320 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_Curve25519_51.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum25519_51.h"
+
+static const uint8_t g25519[32U] = { (uint8_t)9U };
+
+static void
+point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_uint128 *tmp2)
+{
+ uint64_t *nq = p01_tmp1;
+ uint64_t *nq_p1 = p01_tmp1 + (uint32_t)10U;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U;
+ uint64_t *x1 = q;
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)5U;
+ uint64_t *z3 = nq_p1 + (uint32_t)5U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)5U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)10U;
+ Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
+ Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
+ uint64_t *x3 = nq_p1;
+ uint64_t *z31 = nq_p1 + (uint32_t)5U;
+ uint64_t *d0 = dc;
+ uint64_t *c0 = dc + (uint32_t)5U;
+ Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31);
+ Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31);
+ Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2);
+ Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0);
+ Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0);
+ uint64_t *a1 = tmp1;
+ uint64_t *b1 = tmp1 + (uint32_t)5U;
+ uint64_t *d = tmp1 + (uint32_t)10U;
+ uint64_t *c = tmp1 + (uint32_t)15U;
+ uint64_t *ab1 = tmp1;
+ uint64_t *dc1 = tmp1 + (uint32_t)10U;
+ Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2);
+ Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2);
+ a1[0U] = c[0U];
+ a1[1U] = c[1U];
+ a1[2U] = c[2U];
+ a1[3U] = c[3U];
+ a1[4U] = c[4U];
+ Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
+ Hacl_Impl_Curve25519_Field51_fmul1(b1, c, (uint64_t)121665U);
+ Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d);
+ Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2);
+ Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2);
+}
+
+static void
+point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2)
+{
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)5U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)5U;
+ uint64_t *d = tmp1 + (uint32_t)10U;
+ uint64_t *c = tmp1 + (uint32_t)15U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)10U;
+ Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2);
+ Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2);
+ Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2);
+ a[0U] = c[0U];
+ a[1U] = c[1U];
+ a[2U] = c[2U];
+ a[3U] = c[3U];
+ a[4U] = c[4U];
+ Hacl_Impl_Curve25519_Field51_fsub(c, d, c);
+ Hacl_Impl_Curve25519_Field51_fmul1(b, c, (uint64_t)121665U);
+ Hacl_Impl_Curve25519_Field51_fadd(b, b, d);
+ Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2);
+}
+
+static void
+montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
+{
+ FStar_UInt128_uint128 tmp2[10U];
+ for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i)
+ tmp2[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+ uint64_t p01_tmp1_swap[41U] = { 0U };
+ uint64_t *p0 = p01_tmp1_swap;
+ uint64_t *p01 = p01_tmp1_swap;
+ uint64_t *p03 = p01;
+ uint64_t *p11 = p01 + (uint32_t)10U;
+ memcpy(p11, init, (uint32_t)10U * sizeof(uint64_t));
+ uint64_t *x0 = p03;
+ uint64_t *z0 = p03 + (uint32_t)5U;
+ x0[0U] = (uint64_t)1U;
+ x0[1U] = (uint64_t)0U;
+ x0[2U] = (uint64_t)0U;
+ x0[3U] = (uint64_t)0U;
+ x0[4U] = (uint64_t)0U;
+ z0[0U] = (uint64_t)0U;
+ z0[1U] = (uint64_t)0U;
+ z0[2U] = (uint64_t)0U;
+ z0[3U] = (uint64_t)0U;
+ z0[4U] = (uint64_t)0U;
+ uint64_t *p01_tmp1 = p01_tmp1_swap;
+ uint64_t *p01_tmp11 = p01_tmp1_swap;
+ uint64_t *nq1 = p01_tmp1_swap;
+ uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U;
+ uint64_t *swap = p01_tmp1_swap + (uint32_t)40U;
+ Hacl_Impl_Curve25519_Field51_cswap2((uint64_t)1U, nq1, nq_p11);
+ point_add_and_double(init, p01_tmp11, tmp2);
+ swap[0U] = (uint64_t)1U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) {
+ uint64_t *p01_tmp12 = p01_tmp1_swap;
+ uint64_t *swap1 = p01_tmp1_swap + (uint32_t)40U;
+ uint64_t *nq2 = p01_tmp12;
+ uint64_t *nq_p12 = p01_tmp12 + (uint32_t)10U;
+ uint64_t
+ bit =
+ (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U);
+ uint64_t sw = swap1[0U] ^ bit;
+ Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12);
+ point_add_and_double(init, p01_tmp12, tmp2);
+ swap1[0U] = bit;
+ }
+ uint64_t sw = swap[0U];
+ Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11);
+ uint64_t *nq10 = p01_tmp1;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U;
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ memcpy(out, p0, (uint32_t)10U * sizeof(uint64_t));
+}
+
+void
+Hacl_Curve25519_51_fsquare_times(
+ uint64_t *o,
+ uint64_t *inp,
+ FStar_UInt128_uint128 *tmp,
+ uint32_t n)
+{
+ Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp);
+ for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) {
+ Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp);
+ }
+}
+
+void
+Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp)
+{
+ uint64_t t1[20U] = { 0U };
+ uint64_t *a1 = t1;
+ uint64_t *b1 = t1 + (uint32_t)5U;
+ uint64_t *t010 = t1 + (uint32_t)15U;
+ FStar_UInt128_uint128 *tmp10 = tmp;
+ Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, (uint32_t)1U);
+ Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)2U);
+ Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp);
+ Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp);
+ Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)1U);
+ Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
+ Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, (uint32_t)5U);
+ Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp);
+ uint64_t *b10 = t1 + (uint32_t)5U;
+ uint64_t *c10 = t1 + (uint32_t)10U;
+ uint64_t *t011 = t1 + (uint32_t)15U;
+ FStar_UInt128_uint128 *tmp11 = tmp;
+ Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)10U);
+ Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
+ Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, (uint32_t)20U);
+ Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp);
+ Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, (uint32_t)10U);
+ Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp);
+ Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)50U);
+ Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp);
+ uint64_t *b11 = t1 + (uint32_t)5U;
+ uint64_t *c1 = t1 + (uint32_t)10U;
+ uint64_t *t01 = t1 + (uint32_t)15U;
+ FStar_UInt128_uint128 *tmp1 = tmp;
+ Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, (uint32_t)100U);
+ Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp);
+ Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)50U);
+ Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp);
+ Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)5U);
+ uint64_t *a = t1;
+ uint64_t *t0 = t1 + (uint32_t)15U;
+ Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp);
+}
+
+static void
+encode_point(uint8_t *o, uint64_t *i)
+{
+ uint64_t *x = i;
+ uint64_t *z = i + (uint32_t)5U;
+ uint64_t tmp[5U] = { 0U };
+ uint64_t u64s[4U] = { 0U };
+ FStar_UInt128_uint128 tmp_w[10U];
+ for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i)
+ tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+ Hacl_Curve25519_51_finv(tmp, z, tmp_w);
+ Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w);
+ Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp);
+ KRML_MAYBE_FOR4(i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ store64_le(o + i0 * (uint32_t)8U, u64s[i0]););
+}
+
+/**
+Compute the scalar multiple of a point.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where the public point is read from.
+*/
+void
+Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint64_t init[10U] = { 0U };
+ uint64_t tmp[4U] = { 0U };
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = tmp;
+ uint8_t *bj = pub + i * (uint32_t)8U;
+ uint64_t u = load64_le(bj);
+ uint64_t r = u;
+ uint64_t x = r;
+ os[i] = x;);
+ uint64_t tmp3 = tmp[3U];
+ tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU;
+ uint64_t *x = init;
+ uint64_t *z = init + (uint32_t)5U;
+ z[0U] = (uint64_t)1U;
+ z[1U] = (uint64_t)0U;
+ z[2U] = (uint64_t)0U;
+ z[3U] = (uint64_t)0U;
+ z[4U] = (uint64_t)0U;
+ uint64_t f0l = tmp[0U] & (uint64_t)0x7ffffffffffffU;
+ uint64_t f0h = tmp[0U] >> (uint32_t)51U;
+ uint64_t f1l = (tmp[1U] & (uint64_t)0x3fffffffffU) << (uint32_t)13U;
+ uint64_t f1h = tmp[1U] >> (uint32_t)38U;
+ uint64_t f2l = (tmp[2U] & (uint64_t)0x1ffffffU) << (uint32_t)26U;
+ uint64_t f2h = tmp[2U] >> (uint32_t)25U;
+ uint64_t f3l = (tmp[3U] & (uint64_t)0xfffU) << (uint32_t)39U;
+ uint64_t f3h = tmp[3U] >> (uint32_t)12U;
+ x[0U] = f0l;
+ x[1U] = f0h | f1l;
+ x[2U] = f1h | f2l;
+ x[3U] = f2h | f3l;
+ x[4U] = f3h;
+ montgomery_ladder(init, priv, init);
+ encode_point(out, init);
+}
+
+/**
+Calculate a public point from a secret/private key.
+
+This computes a scalar multiplication of the secret/private key with the curve's basepoint.
+
+@param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+*/
+void
+Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv)
+{
+ uint8_t basepoint[32U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t *os = basepoint;
+ uint8_t x = g25519[i];
+ os[i] = x;
+ }
+ Hacl_Curve25519_51_scalarmult(pub, priv, basepoint);
+}
+
+/**
+Execute the diffie-hellmann key exchange.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where **our** secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where **their** public point is read from.
+*/
+bool
+Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint8_t zeros[32U] = { 0U };
+ Hacl_Curve25519_51_scalarmult(out, priv, pub);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ bool r = z == (uint8_t)255U;
+ return !r;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h
new file mode 100644
index 0000000000..dc666dc9c4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h
@@ -0,0 +1,72 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Curve25519_51_H
+#define __Hacl_Curve25519_51_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+
+/**
+Compute the scalar multiple of a point.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where the public point is read from.
+*/
+void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+/**
+Calculate a public point from a secret/private key.
+
+This computes a scalar multiplication of the secret/private key with the curve's basepoint.
+
+@param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+*/
+void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv);
+
+/**
+Execute the diffie-hellmann key exchange.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where **our** secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where **their** public point is read from.
+*/
+bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Curve25519_51_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c
new file mode 100644
index 0000000000..6dbdf736f9
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c
@@ -0,0 +1,424 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Curve25519_64.h"
+
+#include "internal/Vale.h"
+#include "internal/Hacl_Krmllib.h"
+#include "config.h"
+#include "curve25519-inline.h"
+
+static inline void
+add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ add_scalar(out, f1, f2);
+#else
+ uint64_t uu____0 = add_scalar_e(out, f1, f2);
+#endif
+}
+
+static inline void
+fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fadd(out, f1, f2);
+#else
+ uint64_t uu____0 = fadd_e(out, f1, f2);
+#endif
+}
+
+static inline void
+fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fsub(out, f1, f2);
+#else
+ uint64_t uu____0 = fsub_e(out, f1, f2);
+#endif
+}
+
+static inline void
+fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fmul(out, f1, f2, tmp);
+#else
+ uint64_t uu____0 = fmul_e(tmp, f1, out, f2);
+#endif
+}
+
+static inline void
+fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fmul2(out, f1, f2, tmp);
+#else
+ uint64_t uu____0 = fmul2_e(tmp, f1, out, f2);
+#endif
+}
+
+static inline void
+fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fmul_scalar(out, f1, f2);
+#else
+ uint64_t uu____0 = fmul_scalar_e(out, f1, f2);
+#endif
+}
+
+static inline void
+fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fsqr(out, f1, tmp);
+#else
+ uint64_t uu____0 = fsqr_e(tmp, f1, out);
+#endif
+}
+
+static inline void
+fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ fsqr2(out, f, tmp);
+#else
+ uint64_t uu____0 = fsqr2_e(tmp, f, out);
+#endif
+}
+
+static inline void
+cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2)
+{
+#if HACL_CAN_COMPILE_INLINE_ASM
+ cswap2(bit, p1, p2);
+#else
+ uint64_t uu____0 = cswap2_e(bit, p1, p2);
+#endif
+}
+
+static const uint8_t g25519[32U] = { (uint8_t)9U };
+
+static void
+point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2)
+{
+ uint64_t *nq = p01_tmp1;
+ uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U;
+ uint64_t *x1 = q;
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)4U;
+ uint64_t *z3 = nq_p1 + (uint32_t)4U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)4U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)8U;
+ fadd0(a, x2, z2);
+ fsub0(b, x2, z2);
+ uint64_t *x3 = nq_p1;
+ uint64_t *z31 = nq_p1 + (uint32_t)4U;
+ uint64_t *d0 = dc;
+ uint64_t *c0 = dc + (uint32_t)4U;
+ fadd0(c0, x3, z31);
+ fsub0(d0, x3, z31);
+ fmul20(dc, dc, ab, tmp2);
+ fadd0(x3, d0, c0);
+ fsub0(z31, d0, c0);
+ uint64_t *a1 = tmp1;
+ uint64_t *b1 = tmp1 + (uint32_t)4U;
+ uint64_t *d = tmp1 + (uint32_t)8U;
+ uint64_t *c = tmp1 + (uint32_t)12U;
+ uint64_t *ab1 = tmp1;
+ uint64_t *dc1 = tmp1 + (uint32_t)8U;
+ fsqr20(dc1, ab1, tmp2);
+ fsqr20(nq_p1, nq_p1, tmp2);
+ a1[0U] = c[0U];
+ a1[1U] = c[1U];
+ a1[2U] = c[2U];
+ a1[3U] = c[3U];
+ fsub0(c, d, c);
+ fmul_scalar0(b1, c, (uint64_t)121665U);
+ fadd0(b1, b1, d);
+ fmul20(nq, dc1, ab1, tmp2);
+ fmul0(z3, z3, x1, tmp2);
+}
+
+static void
+point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2)
+{
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)4U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)4U;
+ uint64_t *d = tmp1 + (uint32_t)8U;
+ uint64_t *c = tmp1 + (uint32_t)12U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)8U;
+ fadd0(a, x2, z2);
+ fsub0(b, x2, z2);
+ fsqr20(dc, ab, tmp2);
+ a[0U] = c[0U];
+ a[1U] = c[1U];
+ a[2U] = c[2U];
+ a[3U] = c[3U];
+ fsub0(c, d, c);
+ fmul_scalar0(b, c, (uint64_t)121665U);
+ fadd0(b, b, d);
+ fmul20(nq, dc, ab, tmp2);
+}
+
+static void
+montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
+{
+ uint64_t tmp2[16U] = { 0U };
+ uint64_t p01_tmp1_swap[33U] = { 0U };
+ uint64_t *p0 = p01_tmp1_swap;
+ uint64_t *p01 = p01_tmp1_swap;
+ uint64_t *p03 = p01;
+ uint64_t *p11 = p01 + (uint32_t)8U;
+ memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t));
+ uint64_t *x0 = p03;
+ uint64_t *z0 = p03 + (uint32_t)4U;
+ x0[0U] = (uint64_t)1U;
+ x0[1U] = (uint64_t)0U;
+ x0[2U] = (uint64_t)0U;
+ x0[3U] = (uint64_t)0U;
+ z0[0U] = (uint64_t)0U;
+ z0[1U] = (uint64_t)0U;
+ z0[2U] = (uint64_t)0U;
+ z0[3U] = (uint64_t)0U;
+ uint64_t *p01_tmp1 = p01_tmp1_swap;
+ uint64_t *p01_tmp11 = p01_tmp1_swap;
+ uint64_t *nq1 = p01_tmp1_swap;
+ uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U;
+ uint64_t *swap = p01_tmp1_swap + (uint32_t)32U;
+ cswap20((uint64_t)1U, nq1, nq_p11);
+ point_add_and_double(init, p01_tmp11, tmp2);
+ swap[0U] = (uint64_t)1U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) {
+ uint64_t *p01_tmp12 = p01_tmp1_swap;
+ uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U;
+ uint64_t *nq2 = p01_tmp12;
+ uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U;
+ uint64_t
+ bit =
+ (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U);
+ uint64_t sw = swap1[0U] ^ bit;
+ cswap20(sw, nq2, nq_p12);
+ point_add_and_double(init, p01_tmp12, tmp2);
+ swap1[0U] = bit;
+ }
+ uint64_t sw = swap[0U];
+ cswap20(sw, nq1, nq_p11);
+ uint64_t *nq10 = p01_tmp1;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U;
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t));
+}
+
+static void
+fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n)
+{
+ fsqr0(o, inp, tmp);
+ for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) {
+ fsqr0(o, o, tmp);
+ }
+}
+
+static void
+finv(uint64_t *o, uint64_t *i, uint64_t *tmp)
+{
+ uint64_t t1[16U] = { 0U };
+ uint64_t *a1 = t1;
+ uint64_t *b1 = t1 + (uint32_t)4U;
+ uint64_t *t010 = t1 + (uint32_t)12U;
+ uint64_t *tmp10 = tmp;
+ fsquare_times(a1, i, tmp10, (uint32_t)1U);
+ fsquare_times(t010, a1, tmp10, (uint32_t)2U);
+ fmul0(b1, t010, i, tmp);
+ fmul0(a1, b1, a1, tmp);
+ fsquare_times(t010, a1, tmp10, (uint32_t)1U);
+ fmul0(b1, t010, b1, tmp);
+ fsquare_times(t010, b1, tmp10, (uint32_t)5U);
+ fmul0(b1, t010, b1, tmp);
+ uint64_t *b10 = t1 + (uint32_t)4U;
+ uint64_t *c10 = t1 + (uint32_t)8U;
+ uint64_t *t011 = t1 + (uint32_t)12U;
+ uint64_t *tmp11 = tmp;
+ fsquare_times(t011, b10, tmp11, (uint32_t)10U);
+ fmul0(c10, t011, b10, tmp);
+ fsquare_times(t011, c10, tmp11, (uint32_t)20U);
+ fmul0(t011, t011, c10, tmp);
+ fsquare_times(t011, t011, tmp11, (uint32_t)10U);
+ fmul0(b10, t011, b10, tmp);
+ fsquare_times(t011, b10, tmp11, (uint32_t)50U);
+ fmul0(c10, t011, b10, tmp);
+ uint64_t *b11 = t1 + (uint32_t)4U;
+ uint64_t *c1 = t1 + (uint32_t)8U;
+ uint64_t *t01 = t1 + (uint32_t)12U;
+ uint64_t *tmp1 = tmp;
+ fsquare_times(t01, c1, tmp1, (uint32_t)100U);
+ fmul0(t01, t01, c1, tmp);
+ fsquare_times(t01, t01, tmp1, (uint32_t)50U);
+ fmul0(t01, t01, b11, tmp);
+ fsquare_times(t01, t01, tmp1, (uint32_t)5U);
+ uint64_t *a = t1;
+ uint64_t *t0 = t1 + (uint32_t)12U;
+ fmul0(o, t0, a, tmp);
+}
+
+static void
+store_felem(uint64_t *b, uint64_t *f)
+{
+ uint64_t f30 = f[3U];
+ uint64_t top_bit0 = f30 >> (uint32_t)63U;
+ f[3U] = f30 & (uint64_t)0x7fffffffffffffffU;
+ add_scalar0(f, f, (uint64_t)19U * top_bit0);
+ uint64_t f31 = f[3U];
+ uint64_t top_bit = f31 >> (uint32_t)63U;
+ f[3U] = f31 & (uint64_t)0x7fffffffffffffffU;
+ add_scalar0(f, f, (uint64_t)19U * top_bit);
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t f3 = f[3U];
+ uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU);
+ uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU);
+ uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU);
+ uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU);
+ uint64_t mask = ((m0 & m1) & m2) & m3;
+ uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU);
+ uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU);
+ uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU);
+ uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU);
+ uint64_t o0 = f0_;
+ uint64_t o1 = f1_;
+ uint64_t o2 = f2_;
+ uint64_t o3 = f3_;
+ b[0U] = o0;
+ b[1U] = o1;
+ b[2U] = o2;
+ b[3U] = o3;
+}
+
+static void
+encode_point(uint8_t *o, uint64_t *i)
+{
+ uint64_t *x = i;
+ uint64_t *z = i + (uint32_t)4U;
+ uint64_t tmp[4U] = { 0U };
+ uint64_t u64s[4U] = { 0U };
+ uint64_t tmp_w[16U] = { 0U };
+ finv(tmp, z, tmp_w);
+ fmul0(tmp, tmp, x, tmp_w);
+ store_felem(u64s, tmp);
+ KRML_MAYBE_FOR4(i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ store64_le(o + i0 * (uint32_t)8U, u64s[i0]););
+}
+
+/**
+Compute the scalar multiple of a point.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where the public point is read from.
+*/
+void
+Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint64_t init[8U] = { 0U };
+ uint64_t tmp[4U] = { 0U };
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = tmp;
+ uint8_t *bj = pub + i * (uint32_t)8U;
+ uint64_t u = load64_le(bj);
+ uint64_t r = u;
+ uint64_t x = r;
+ os[i] = x;);
+ uint64_t tmp3 = tmp[3U];
+ tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU;
+ uint64_t *x = init;
+ uint64_t *z = init + (uint32_t)4U;
+ z[0U] = (uint64_t)1U;
+ z[1U] = (uint64_t)0U;
+ z[2U] = (uint64_t)0U;
+ z[3U] = (uint64_t)0U;
+ x[0U] = tmp[0U];
+ x[1U] = tmp[1U];
+ x[2U] = tmp[2U];
+ x[3U] = tmp[3U];
+ montgomery_ladder(init, priv, init);
+ encode_point(out, init);
+}
+
+/**
+Calculate a public point from a secret/private key.
+
+This computes a scalar multiplication of the secret/private key with the curve's basepoint.
+
+@param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+*/
+void
+Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv)
+{
+ uint8_t basepoint[32U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t *os = basepoint;
+ uint8_t x = g25519[i];
+ os[i] = x;
+ }
+ Hacl_Curve25519_64_scalarmult(pub, priv, basepoint);
+}
+
+/**
+Execute the diffie-hellmann key exchange.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where **our** secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where **their** public point is read from.
+*/
+bool
+Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint8_t zeros[32U] = { 0U };
+ Hacl_Curve25519_64_scalarmult(out, priv, pub);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ bool r = z == (uint8_t)255U;
+ return !r;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h
new file mode 100644
index 0000000000..777e57cf86
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h
@@ -0,0 +1,72 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Curve25519_64_H
+#define __Hacl_Curve25519_64_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+
+/**
+Compute the scalar multiple of a point.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where the public point is read from.
+*/
+void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+/**
+Calculate a public point from a secret/private key.
+
+This computes a scalar multiplication of the secret/private key with the curve's basepoint.
+
+@param pub Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where the secret/private key is read from.
+*/
+void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv);
+
+/**
+Execute the diffie-hellmann key exchange.
+
+@param out Pointer to 32 bytes of memory, allocated by the caller, where the resulting point is written to.
+@param priv Pointer to 32 bytes of memory where **our** secret/private key is read from.
+@param pub Pointer to 32 bytes of memory where **their** public point is read from.
+*/
+bool Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Curve25519_64_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.c b/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.c
new file mode 100644
index 0000000000..3cd1091267
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.c
@@ -0,0 +1,713 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_Hash_SHA3.h"
+
+static uint32_t
+block_len(Spec_Hash_Definitions_hash_alg a)
+{
+ switch (a) {
+ case Spec_Hash_Definitions_SHA3_224: {
+ return (uint32_t)144U;
+ }
+ case Spec_Hash_Definitions_SHA3_256: {
+ return (uint32_t)136U;
+ }
+ case Spec_Hash_Definitions_SHA3_384: {
+ return (uint32_t)104U;
+ }
+ case Spec_Hash_Definitions_SHA3_512: {
+ return (uint32_t)72U;
+ }
+ case Spec_Hash_Definitions_Shake128: {
+ return (uint32_t)168U;
+ }
+ case Spec_Hash_Definitions_Shake256: {
+ return (uint32_t)136U;
+ }
+ default: {
+ KRML_HOST_EPRINTF("KaRaMeL incomplete match at %s:%d\n", __FILE__, __LINE__);
+ KRML_HOST_EXIT(253U);
+ }
+ }
+}
+
+static uint32_t
+hash_len(Spec_Hash_Definitions_hash_alg a)
+{
+ switch (a) {
+ case Spec_Hash_Definitions_SHA3_224: {
+ return (uint32_t)28U;
+ }
+ case Spec_Hash_Definitions_SHA3_256: {
+ return (uint32_t)32U;
+ }
+ case Spec_Hash_Definitions_SHA3_384: {
+ return (uint32_t)48U;
+ }
+ case Spec_Hash_Definitions_SHA3_512: {
+ return (uint32_t)64U;
+ }
+ default: {
+ KRML_HOST_EPRINTF("KaRaMeL incomplete match at %s:%d\n", __FILE__, __LINE__);
+ KRML_HOST_EXIT(253U);
+ }
+ }
+}
+
+void
+Hacl_Hash_SHA3_update_multi_sha3(
+ Spec_Hash_Definitions_hash_alg a,
+ uint64_t *s,
+ uint8_t *blocks,
+ uint32_t n_blocks)
+{
+ for (uint32_t i = (uint32_t)0U; i < n_blocks; i++) {
+ uint8_t *block = blocks + i * block_len(a);
+ Hacl_Impl_SHA3_absorb_inner(block_len(a), block, s);
+ }
+}
+
+void
+Hacl_Hash_SHA3_update_last_sha3(
+ Spec_Hash_Definitions_hash_alg a,
+ uint64_t *s,
+ uint8_t *input,
+ uint32_t input_len)
+{
+ uint8_t suffix;
+ if (a == Spec_Hash_Definitions_Shake128 || a == Spec_Hash_Definitions_Shake256) {
+ suffix = (uint8_t)0x1fU;
+ } else {
+ suffix = (uint8_t)0x06U;
+ }
+ uint32_t len = block_len(a);
+ if (input_len == len) {
+ Hacl_Impl_SHA3_absorb_inner(len, input, s);
+ uint8_t *uu____0 = input + input_len;
+ uint8_t lastBlock_[200U] = { 0U };
+ uint8_t *lastBlock = lastBlock_;
+ memcpy(lastBlock, uu____0, (uint32_t)0U * sizeof(uint8_t));
+ lastBlock[0U] = suffix;
+ Hacl_Impl_SHA3_loadState(len, lastBlock, s);
+ if (!((suffix & (uint8_t)0x80U) == (uint8_t)0U) && (uint32_t)0U == len - (uint32_t)1U) {
+ Hacl_Impl_SHA3_state_permute(s);
+ }
+ uint8_t nextBlock_[200U] = { 0U };
+ uint8_t *nextBlock = nextBlock_;
+ nextBlock[len - (uint32_t)1U] = (uint8_t)0x80U;
+ Hacl_Impl_SHA3_loadState(len, nextBlock, s);
+ Hacl_Impl_SHA3_state_permute(s);
+ return;
+ }
+ uint8_t lastBlock_[200U] = { 0U };
+ uint8_t *lastBlock = lastBlock_;
+ memcpy(lastBlock, input, input_len * sizeof(uint8_t));
+ lastBlock[input_len] = suffix;
+ Hacl_Impl_SHA3_loadState(len, lastBlock, s);
+ if (!((suffix & (uint8_t)0x80U) == (uint8_t)0U) && input_len == len - (uint32_t)1U) {
+ Hacl_Impl_SHA3_state_permute(s);
+ }
+ uint8_t nextBlock_[200U] = { 0U };
+ uint8_t *nextBlock = nextBlock_;
+ nextBlock[len - (uint32_t)1U] = (uint8_t)0x80U;
+ Hacl_Impl_SHA3_loadState(len, nextBlock, s);
+ Hacl_Impl_SHA3_state_permute(s);
+}
+
+typedef struct hash_buf2_s {
+ Hacl_Streaming_Keccak_hash_buf fst;
+ Hacl_Streaming_Keccak_hash_buf snd;
+} hash_buf2;
+
+Spec_Hash_Definitions_hash_alg
+Hacl_Streaming_Keccak_get_alg(Hacl_Streaming_Keccak_state *s)
+{
+ Hacl_Streaming_Keccak_state scrut = *s;
+ Hacl_Streaming_Keccak_hash_buf block_state = scrut.block_state;
+ return block_state.fst;
+}
+
+Hacl_Streaming_Keccak_state *
+Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_hash_alg a)
+{
+ KRML_CHECK_SIZE(sizeof(uint8_t), block_len(a));
+ uint8_t *buf0 = (uint8_t *)KRML_HOST_CALLOC(block_len(a), sizeof(uint8_t));
+ uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC((uint32_t)25U, sizeof(uint64_t));
+ Hacl_Streaming_Keccak_hash_buf block_state = { .fst = a, .snd = buf };
+ Hacl_Streaming_Keccak_state
+ s = { .block_state = block_state, .buf = buf0, .total_len = (uint64_t)(uint32_t)0U };
+ Hacl_Streaming_Keccak_state
+ *p = (Hacl_Streaming_Keccak_state *)KRML_HOST_MALLOC(sizeof(Hacl_Streaming_Keccak_state));
+ p[0U] = s;
+ uint64_t *s1 = block_state.snd;
+ memset(s1, 0U, (uint32_t)25U * sizeof(uint64_t));
+ return p;
+}
+
+void
+Hacl_Streaming_Keccak_free(Hacl_Streaming_Keccak_state *s)
+{
+ Hacl_Streaming_Keccak_state scrut = *s;
+ uint8_t *buf = scrut.buf;
+ Hacl_Streaming_Keccak_hash_buf block_state = scrut.block_state;
+ uint64_t *s1 = block_state.snd;
+ KRML_HOST_FREE(s1);
+ KRML_HOST_FREE(buf);
+ KRML_HOST_FREE(s);
+}
+
+Hacl_Streaming_Keccak_state *
+Hacl_Streaming_Keccak_copy(Hacl_Streaming_Keccak_state *s0)
+{
+ Hacl_Streaming_Keccak_state scrut0 = *s0;
+ Hacl_Streaming_Keccak_hash_buf block_state0 = scrut0.block_state;
+ uint8_t *buf0 = scrut0.buf;
+ uint64_t total_len0 = scrut0.total_len;
+ Spec_Hash_Definitions_hash_alg i = block_state0.fst;
+ KRML_CHECK_SIZE(sizeof(uint8_t), block_len(i));
+ uint8_t *buf1 = (uint8_t *)KRML_HOST_CALLOC(block_len(i), sizeof(uint8_t));
+ memcpy(buf1, buf0, block_len(i) * sizeof(uint8_t));
+ uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC((uint32_t)25U, sizeof(uint64_t));
+ Hacl_Streaming_Keccak_hash_buf block_state = { .fst = i, .snd = buf };
+ hash_buf2 scrut = { .fst = block_state0, .snd = block_state };
+ uint64_t *s_dst = scrut.snd.snd;
+ uint64_t *s_src = scrut.fst.snd;
+ memcpy(s_dst, s_src, (uint32_t)25U * sizeof(uint64_t));
+ Hacl_Streaming_Keccak_state
+ s = { .block_state = block_state, .buf = buf1, .total_len = total_len0 };
+ Hacl_Streaming_Keccak_state
+ *p = (Hacl_Streaming_Keccak_state *)KRML_HOST_MALLOC(sizeof(Hacl_Streaming_Keccak_state));
+ p[0U] = s;
+ return p;
+}
+
+void
+Hacl_Streaming_Keccak_reset(Hacl_Streaming_Keccak_state *s)
+{
+ Hacl_Streaming_Keccak_state scrut = *s;
+ uint8_t *buf = scrut.buf;
+ Hacl_Streaming_Keccak_hash_buf block_state = scrut.block_state;
+ Spec_Hash_Definitions_hash_alg i = block_state.fst;
+ KRML_HOST_IGNORE(i);
+ uint64_t *s1 = block_state.snd;
+ memset(s1, 0U, (uint32_t)25U * sizeof(uint64_t));
+ Hacl_Streaming_Keccak_state
+ tmp = { .block_state = block_state, .buf = buf, .total_len = (uint64_t)(uint32_t)0U };
+ s[0U] = tmp;
+}
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_update(Hacl_Streaming_Keccak_state *p, uint8_t *data, uint32_t len)
+{
+ Hacl_Streaming_Keccak_state s = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state = s.block_state;
+ uint64_t total_len = s.total_len;
+ Spec_Hash_Definitions_hash_alg i = block_state.fst;
+ if ((uint64_t)len > (uint64_t)0xFFFFFFFFFFFFFFFFU - total_len) {
+ return Hacl_Streaming_Types_MaximumLengthExceeded;
+ }
+ uint32_t sz;
+ if (total_len % (uint64_t)block_len(i) == (uint64_t)0U && total_len > (uint64_t)0U) {
+ sz = block_len(i);
+ } else {
+ sz = (uint32_t)(total_len % (uint64_t)block_len(i));
+ }
+ if (len <= block_len(i) - sz) {
+ Hacl_Streaming_Keccak_state s1 = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state1 = s1.block_state;
+ uint8_t *buf = s1.buf;
+ uint64_t total_len1 = s1.total_len;
+ uint32_t sz1;
+ if (total_len1 % (uint64_t)block_len(i) == (uint64_t)0U && total_len1 > (uint64_t)0U) {
+ sz1 = block_len(i);
+ } else {
+ sz1 = (uint32_t)(total_len1 % (uint64_t)block_len(i));
+ }
+ uint8_t *buf2 = buf + sz1;
+ memcpy(buf2, data, len * sizeof(uint8_t));
+ uint64_t total_len2 = total_len1 + (uint64_t)len;
+ *p =
+ ((Hacl_Streaming_Keccak_state){
+ .block_state = block_state1,
+ .buf = buf,
+ .total_len = total_len2 });
+ } else if (sz == (uint32_t)0U) {
+ Hacl_Streaming_Keccak_state s1 = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state1 = s1.block_state;
+ uint8_t *buf = s1.buf;
+ uint64_t total_len1 = s1.total_len;
+ uint32_t sz1;
+ if (total_len1 % (uint64_t)block_len(i) == (uint64_t)0U && total_len1 > (uint64_t)0U) {
+ sz1 = block_len(i);
+ } else {
+ sz1 = (uint32_t)(total_len1 % (uint64_t)block_len(i));
+ }
+ if (!(sz1 == (uint32_t)0U)) {
+ Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
+ uint64_t *s2 = block_state1.snd;
+ Hacl_Hash_SHA3_update_multi_sha3(a1, s2, buf, block_len(i) / block_len(a1));
+ }
+ uint32_t ite;
+ if ((uint64_t)len % (uint64_t)block_len(i) == (uint64_t)0U && (uint64_t)len > (uint64_t)0U) {
+ ite = block_len(i);
+ } else {
+ ite = (uint32_t)((uint64_t)len % (uint64_t)block_len(i));
+ }
+ uint32_t n_blocks = (len - ite) / block_len(i);
+ uint32_t data1_len = n_blocks * block_len(i);
+ uint32_t data2_len = len - data1_len;
+ uint8_t *data1 = data;
+ uint8_t *data2 = data + data1_len;
+ Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
+ uint64_t *s2 = block_state1.snd;
+ Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1));
+ uint8_t *dst = buf;
+ memcpy(dst, data2, data2_len * sizeof(uint8_t));
+ *p =
+ ((Hacl_Streaming_Keccak_state){
+ .block_state = block_state1,
+ .buf = buf,
+ .total_len = total_len1 + (uint64_t)len });
+ } else {
+ uint32_t diff = block_len(i) - sz;
+ uint8_t *data1 = data;
+ uint8_t *data2 = data + diff;
+ Hacl_Streaming_Keccak_state s1 = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state10 = s1.block_state;
+ uint8_t *buf0 = s1.buf;
+ uint64_t total_len10 = s1.total_len;
+ uint32_t sz10;
+ if (total_len10 % (uint64_t)block_len(i) == (uint64_t)0U && total_len10 > (uint64_t)0U) {
+ sz10 = block_len(i);
+ } else {
+ sz10 = (uint32_t)(total_len10 % (uint64_t)block_len(i));
+ }
+ uint8_t *buf2 = buf0 + sz10;
+ memcpy(buf2, data1, diff * sizeof(uint8_t));
+ uint64_t total_len2 = total_len10 + (uint64_t)diff;
+ *p =
+ ((Hacl_Streaming_Keccak_state){
+ .block_state = block_state10,
+ .buf = buf0,
+ .total_len = total_len2 });
+ Hacl_Streaming_Keccak_state s10 = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state1 = s10.block_state;
+ uint8_t *buf = s10.buf;
+ uint64_t total_len1 = s10.total_len;
+ uint32_t sz1;
+ if (total_len1 % (uint64_t)block_len(i) == (uint64_t)0U && total_len1 > (uint64_t)0U) {
+ sz1 = block_len(i);
+ } else {
+ sz1 = (uint32_t)(total_len1 % (uint64_t)block_len(i));
+ }
+ if (!(sz1 == (uint32_t)0U)) {
+ Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
+ uint64_t *s2 = block_state1.snd;
+ Hacl_Hash_SHA3_update_multi_sha3(a1, s2, buf, block_len(i) / block_len(a1));
+ }
+ uint32_t ite;
+ if (
+ (uint64_t)(len - diff) % (uint64_t)block_len(i) == (uint64_t)0U && (uint64_t)(len - diff) > (uint64_t)0U) {
+ ite = block_len(i);
+ } else {
+ ite = (uint32_t)((uint64_t)(len - diff) % (uint64_t)block_len(i));
+ }
+ uint32_t n_blocks = (len - diff - ite) / block_len(i);
+ uint32_t data1_len = n_blocks * block_len(i);
+ uint32_t data2_len = len - diff - data1_len;
+ uint8_t *data11 = data2;
+ uint8_t *data21 = data2 + data1_len;
+ Spec_Hash_Definitions_hash_alg a1 = block_state1.fst;
+ uint64_t *s2 = block_state1.snd;
+ Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data11, data1_len / block_len(a1));
+ uint8_t *dst = buf;
+ memcpy(dst, data21, data2_len * sizeof(uint8_t));
+ *p =
+ ((Hacl_Streaming_Keccak_state){
+ .block_state = block_state1,
+ .buf = buf,
+ .total_len = total_len1 + (uint64_t)(len - diff) });
+ }
+ return Hacl_Streaming_Types_Success;
+}
+
+static void
+finish_(
+ Spec_Hash_Definitions_hash_alg a,
+ Hacl_Streaming_Keccak_state *p,
+ uint8_t *dst,
+ uint32_t l)
+{
+ Hacl_Streaming_Keccak_state scrut0 = *p;
+ Hacl_Streaming_Keccak_hash_buf block_state = scrut0.block_state;
+ uint8_t *buf_ = scrut0.buf;
+ uint64_t total_len = scrut0.total_len;
+ uint32_t r;
+ if (total_len % (uint64_t)block_len(a) == (uint64_t)0U && total_len > (uint64_t)0U) {
+ r = block_len(a);
+ } else {
+ r = (uint32_t)(total_len % (uint64_t)block_len(a));
+ }
+ uint8_t *buf_1 = buf_;
+ uint64_t buf[25U] = { 0U };
+ Hacl_Streaming_Keccak_hash_buf tmp_block_state = { .fst = a, .snd = buf };
+ hash_buf2 scrut = { .fst = block_state, .snd = tmp_block_state };
+ uint64_t *s_dst = scrut.snd.snd;
+ uint64_t *s_src = scrut.fst.snd;
+ memcpy(s_dst, s_src, (uint32_t)25U * sizeof(uint64_t));
+ uint32_t ite;
+ if (r % block_len(a) == (uint32_t)0U && r > (uint32_t)0U) {
+ ite = block_len(a);
+ } else {
+ ite = r % block_len(a);
+ }
+ uint8_t *buf_last = buf_1 + r - ite;
+ uint8_t *buf_multi = buf_1;
+ Spec_Hash_Definitions_hash_alg a1 = tmp_block_state.fst;
+ uint64_t *s0 = tmp_block_state.snd;
+ Hacl_Hash_SHA3_update_multi_sha3(a1, s0, buf_multi, (uint32_t)0U / block_len(a1));
+ Spec_Hash_Definitions_hash_alg a10 = tmp_block_state.fst;
+ uint64_t *s1 = tmp_block_state.snd;
+ Hacl_Hash_SHA3_update_last_sha3(a10, s1, buf_last, r);
+ Spec_Hash_Definitions_hash_alg a11 = tmp_block_state.fst;
+ uint64_t *s = tmp_block_state.snd;
+ if (a11 == Spec_Hash_Definitions_Shake128 || a11 == Spec_Hash_Definitions_Shake256) {
+ Hacl_Impl_SHA3_squeeze(s, block_len(a11), l, dst);
+ return;
+ }
+ Hacl_Impl_SHA3_squeeze(s, block_len(a11), hash_len(a11), dst);
+}
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_finish(Hacl_Streaming_Keccak_state *s, uint8_t *dst)
+{
+ Spec_Hash_Definitions_hash_alg a1 = Hacl_Streaming_Keccak_get_alg(s);
+ if (a1 == Spec_Hash_Definitions_Shake128 || a1 == Spec_Hash_Definitions_Shake256) {
+ return Hacl_Streaming_Types_InvalidAlgorithm;
+ }
+ finish_(a1, s, dst, hash_len(a1));
+ return Hacl_Streaming_Types_Success;
+}
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_squeeze(Hacl_Streaming_Keccak_state *s, uint8_t *dst, uint32_t l)
+{
+ Spec_Hash_Definitions_hash_alg a1 = Hacl_Streaming_Keccak_get_alg(s);
+ if (!(a1 == Spec_Hash_Definitions_Shake128 || a1 == Spec_Hash_Definitions_Shake256)) {
+ return Hacl_Streaming_Types_InvalidAlgorithm;
+ }
+ if (l == (uint32_t)0U) {
+ return Hacl_Streaming_Types_InvalidLength;
+ }
+ finish_(a1, s, dst, l);
+ return Hacl_Streaming_Types_Success;
+}
+
+uint32_t
+Hacl_Streaming_Keccak_block_len(Hacl_Streaming_Keccak_state *s)
+{
+ Spec_Hash_Definitions_hash_alg a1 = Hacl_Streaming_Keccak_get_alg(s);
+ return block_len(a1);
+}
+
+uint32_t
+Hacl_Streaming_Keccak_hash_len(Hacl_Streaming_Keccak_state *s)
+{
+ Spec_Hash_Definitions_hash_alg a1 = Hacl_Streaming_Keccak_get_alg(s);
+ return hash_len(a1);
+}
+
+bool
+Hacl_Streaming_Keccak_is_shake(Hacl_Streaming_Keccak_state *s)
+{
+ Spec_Hash_Definitions_hash_alg uu____0 = Hacl_Streaming_Keccak_get_alg(s);
+ return uu____0 == Spec_Hash_Definitions_Shake128 || uu____0 == Spec_Hash_Definitions_Shake256;
+}
+
+void
+Hacl_SHA3_shake128_hacl(
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint32_t outputByteLen,
+ uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)1344U,
+ (uint32_t)256U,
+ inputByteLen,
+ input,
+ (uint8_t)0x1FU,
+ outputByteLen,
+ output);
+}
+
+void
+Hacl_SHA3_shake256_hacl(
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint32_t outputByteLen,
+ uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)1088U,
+ (uint32_t)512U,
+ inputByteLen,
+ input,
+ (uint8_t)0x1FU,
+ outputByteLen,
+ output);
+}
+
+void
+Hacl_SHA3_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)1152U,
+ (uint32_t)448U,
+ inputByteLen,
+ input,
+ (uint8_t)0x06U,
+ (uint32_t)28U,
+ output);
+}
+
+void
+Hacl_SHA3_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)1088U,
+ (uint32_t)512U,
+ inputByteLen,
+ input,
+ (uint8_t)0x06U,
+ (uint32_t)32U,
+ output);
+}
+
+void
+Hacl_SHA3_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)832U,
+ (uint32_t)768U,
+ inputByteLen,
+ input,
+ (uint8_t)0x06U,
+ (uint32_t)48U,
+ output);
+}
+
+void
+Hacl_SHA3_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+{
+ Hacl_Impl_SHA3_keccak((uint32_t)576U,
+ (uint32_t)1024U,
+ inputByteLen,
+ input,
+ (uint8_t)0x06U,
+ (uint32_t)64U,
+ output);
+}
+
+static const uint32_t
+ keccak_rotc[24U] = {
+ (uint32_t)1U, (uint32_t)3U, (uint32_t)6U, (uint32_t)10U, (uint32_t)15U, (uint32_t)21U,
+ (uint32_t)28U, (uint32_t)36U, (uint32_t)45U, (uint32_t)55U, (uint32_t)2U, (uint32_t)14U,
+ (uint32_t)27U, (uint32_t)41U, (uint32_t)56U, (uint32_t)8U, (uint32_t)25U, (uint32_t)43U,
+ (uint32_t)62U, (uint32_t)18U, (uint32_t)39U, (uint32_t)61U, (uint32_t)20U, (uint32_t)44U
+ };
+
+static const uint32_t
+ keccak_piln[24U] = {
+ (uint32_t)10U, (uint32_t)7U, (uint32_t)11U, (uint32_t)17U, (uint32_t)18U, (uint32_t)3U,
+ (uint32_t)5U, (uint32_t)16U, (uint32_t)8U, (uint32_t)21U, (uint32_t)24U, (uint32_t)4U,
+ (uint32_t)15U, (uint32_t)23U, (uint32_t)19U, (uint32_t)13U, (uint32_t)12U, (uint32_t)2U,
+ (uint32_t)20U, (uint32_t)14U, (uint32_t)22U, (uint32_t)9U, (uint32_t)6U, (uint32_t)1U
+ };
+
+static const uint64_t
+ keccak_rndc[24U] = {
+ (uint64_t)0x0000000000000001U, (uint64_t)0x0000000000008082U, (uint64_t)0x800000000000808aU,
+ (uint64_t)0x8000000080008000U, (uint64_t)0x000000000000808bU, (uint64_t)0x0000000080000001U,
+ (uint64_t)0x8000000080008081U, (uint64_t)0x8000000000008009U, (uint64_t)0x000000000000008aU,
+ (uint64_t)0x0000000000000088U, (uint64_t)0x0000000080008009U, (uint64_t)0x000000008000000aU,
+ (uint64_t)0x000000008000808bU, (uint64_t)0x800000000000008bU, (uint64_t)0x8000000000008089U,
+ (uint64_t)0x8000000000008003U, (uint64_t)0x8000000000008002U, (uint64_t)0x8000000000000080U,
+ (uint64_t)0x000000000000800aU, (uint64_t)0x800000008000000aU, (uint64_t)0x8000000080008081U,
+ (uint64_t)0x8000000000008080U, (uint64_t)0x0000000080000001U, (uint64_t)0x8000000080008008U
+ };
+
+void
+Hacl_Impl_SHA3_state_permute(uint64_t *s)
+{
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)24U; i0++) {
+ uint64_t _C[5U] = { 0U };
+ KRML_MAYBE_FOR5(i,
+ (uint32_t)0U,
+ (uint32_t)5U,
+ (uint32_t)1U,
+ _C[i] =
+ s[i + (uint32_t)0U] ^
+ (s[i + (uint32_t)5U] ^ (s[i + (uint32_t)10U] ^ (s[i + (uint32_t)15U] ^ s[i + (uint32_t)20U]))););
+ KRML_MAYBE_FOR5(i1,
+ (uint32_t)0U,
+ (uint32_t)5U,
+ (uint32_t)1U,
+ uint64_t uu____0 = _C[(i1 + (uint32_t)1U) % (uint32_t)5U];
+ uint64_t
+ _D =
+ _C[(i1 + (uint32_t)4U) % (uint32_t)5U] ^ (uu____0 << (uint32_t)1U | uu____0 >> (uint32_t)63U);
+ KRML_MAYBE_FOR5(i,
+ (uint32_t)0U,
+ (uint32_t)5U,
+ (uint32_t)1U,
+ s[i1 + (uint32_t)5U * i] = s[i1 + (uint32_t)5U * i] ^ _D;););
+ uint64_t x = s[1U];
+ uint64_t current = x;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)24U; i++) {
+ uint32_t _Y = keccak_piln[i];
+ uint32_t r = keccak_rotc[i];
+ uint64_t temp = s[_Y];
+ uint64_t uu____1 = current;
+ s[_Y] = uu____1 << r | uu____1 >> ((uint32_t)64U - r);
+ current = temp;
+ }
+ KRML_MAYBE_FOR5(i,
+ (uint32_t)0U,
+ (uint32_t)5U,
+ (uint32_t)1U,
+ uint64_t
+ v0 =
+ s[(uint32_t)0U + (uint32_t)5U * i] ^ (~s[(uint32_t)1U + (uint32_t)5U * i] & s[(uint32_t)2U + (uint32_t)5U * i]);
+ uint64_t
+ v1 =
+ s[(uint32_t)1U + (uint32_t)5U * i] ^ (~s[(uint32_t)2U + (uint32_t)5U * i] & s[(uint32_t)3U + (uint32_t)5U * i]);
+ uint64_t
+ v2 =
+ s[(uint32_t)2U + (uint32_t)5U * i] ^ (~s[(uint32_t)3U + (uint32_t)5U * i] & s[(uint32_t)4U + (uint32_t)5U * i]);
+ uint64_t
+ v3 =
+ s[(uint32_t)3U + (uint32_t)5U * i] ^ (~s[(uint32_t)4U + (uint32_t)5U * i] & s[(uint32_t)0U + (uint32_t)5U * i]);
+ uint64_t
+ v4 =
+ s[(uint32_t)4U + (uint32_t)5U * i] ^ (~s[(uint32_t)0U + (uint32_t)5U * i] & s[(uint32_t)1U + (uint32_t)5U * i]);
+ s[(uint32_t)0U + (uint32_t)5U * i] = v0;
+ s[(uint32_t)1U + (uint32_t)5U * i] = v1;
+ s[(uint32_t)2U + (uint32_t)5U * i] = v2;
+ s[(uint32_t)3U + (uint32_t)5U * i] = v3;
+ s[(uint32_t)4U + (uint32_t)5U * i] = v4;);
+ uint64_t c = keccak_rndc[i0];
+ s[0U] = s[0U] ^ c;
+ }
+}
+
+void
+Hacl_Impl_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s)
+{
+ uint8_t block[200U] = { 0U };
+ memcpy(block, input, rateInBytes * sizeof(uint8_t));
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)25U; i++) {
+ uint64_t u = load64_le(block + i * (uint32_t)8U);
+ uint64_t x = u;
+ s[i] = s[i] ^ x;
+ }
+}
+
+static void
+storeState(uint32_t rateInBytes, uint64_t *s, uint8_t *res)
+{
+ uint8_t block[200U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)25U; i++) {
+ uint64_t sj = s[i];
+ store64_le(block + i * (uint32_t)8U, sj);
+ }
+ memcpy(res, block, rateInBytes * sizeof(uint8_t));
+}
+
+void
+Hacl_Impl_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s)
+{
+ Hacl_Impl_SHA3_loadState(rateInBytes, block, s);
+ Hacl_Impl_SHA3_state_permute(s);
+}
+
+static void
+absorb(
+ uint64_t *s,
+ uint32_t rateInBytes,
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint8_t delimitedSuffix)
+{
+ uint32_t n_blocks = inputByteLen / rateInBytes;
+ uint32_t rem = inputByteLen % rateInBytes;
+ for (uint32_t i = (uint32_t)0U; i < n_blocks; i++) {
+ uint8_t *block = input + i * rateInBytes;
+ Hacl_Impl_SHA3_absorb_inner(rateInBytes, block, s);
+ }
+ uint8_t *last = input + n_blocks * rateInBytes;
+ uint8_t lastBlock_[200U] = { 0U };
+ uint8_t *lastBlock = lastBlock_;
+ memcpy(lastBlock, last, rem * sizeof(uint8_t));
+ lastBlock[rem] = delimitedSuffix;
+ Hacl_Impl_SHA3_loadState(rateInBytes, lastBlock, s);
+ if (!((delimitedSuffix & (uint8_t)0x80U) == (uint8_t)0U) && rem == rateInBytes - (uint32_t)1U) {
+ Hacl_Impl_SHA3_state_permute(s);
+ }
+ uint8_t nextBlock_[200U] = { 0U };
+ uint8_t *nextBlock = nextBlock_;
+ nextBlock[rateInBytes - (uint32_t)1U] = (uint8_t)0x80U;
+ Hacl_Impl_SHA3_loadState(rateInBytes, nextBlock, s);
+ Hacl_Impl_SHA3_state_permute(s);
+}
+
+void
+Hacl_Impl_SHA3_squeeze(
+ uint64_t *s,
+ uint32_t rateInBytes,
+ uint32_t outputByteLen,
+ uint8_t *output)
+{
+ uint32_t outBlocks = outputByteLen / rateInBytes;
+ uint32_t remOut = outputByteLen % rateInBytes;
+ uint8_t *last = output + outputByteLen - remOut;
+ uint8_t *blocks = output;
+ for (uint32_t i = (uint32_t)0U; i < outBlocks; i++) {
+ storeState(rateInBytes, s, blocks + i * rateInBytes);
+ Hacl_Impl_SHA3_state_permute(s);
+ }
+ storeState(remOut, s, last);
+}
+
+void
+Hacl_Impl_SHA3_keccak(
+ uint32_t rate,
+ uint32_t capacity,
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint8_t delimitedSuffix,
+ uint32_t outputByteLen,
+ uint8_t *output)
+{
+ uint32_t rateInBytes = rate / (uint32_t)8U;
+ uint64_t s[25U] = { 0U };
+ absorb(s, rateInBytes, inputByteLen, input, delimitedSuffix);
+ Hacl_Impl_SHA3_squeeze(s, rateInBytes, outputByteLen, output);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.h b/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.h
new file mode 100644
index 0000000000..fffdc10878
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Hash_SHA3.h
@@ -0,0 +1,121 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Hash_SHA3_H
+#define __Hacl_Hash_SHA3_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Streaming_Types.h"
+
+typedef struct Hacl_Streaming_Keccak_hash_buf_s {
+ Spec_Hash_Definitions_hash_alg fst;
+ uint64_t *snd;
+} Hacl_Streaming_Keccak_hash_buf;
+
+typedef struct Hacl_Streaming_Keccak_state_s {
+ Hacl_Streaming_Keccak_hash_buf block_state;
+ uint8_t *buf;
+ uint64_t total_len;
+} Hacl_Streaming_Keccak_state;
+
+Spec_Hash_Definitions_hash_alg Hacl_Streaming_Keccak_get_alg(Hacl_Streaming_Keccak_state *s);
+
+Hacl_Streaming_Keccak_state *Hacl_Streaming_Keccak_malloc(Spec_Hash_Definitions_hash_alg a);
+
+void Hacl_Streaming_Keccak_free(Hacl_Streaming_Keccak_state *s);
+
+Hacl_Streaming_Keccak_state *Hacl_Streaming_Keccak_copy(Hacl_Streaming_Keccak_state *s0);
+
+void Hacl_Streaming_Keccak_reset(Hacl_Streaming_Keccak_state *s);
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_update(Hacl_Streaming_Keccak_state *p, uint8_t *data, uint32_t len);
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_finish(Hacl_Streaming_Keccak_state *s, uint8_t *dst);
+
+Hacl_Streaming_Types_error_code
+Hacl_Streaming_Keccak_squeeze(Hacl_Streaming_Keccak_state *s, uint8_t *dst, uint32_t l);
+
+uint32_t Hacl_Streaming_Keccak_block_len(Hacl_Streaming_Keccak_state *s);
+
+uint32_t Hacl_Streaming_Keccak_hash_len(Hacl_Streaming_Keccak_state *s);
+
+bool Hacl_Streaming_Keccak_is_shake(Hacl_Streaming_Keccak_state *s);
+
+void
+Hacl_SHA3_shake128_hacl(
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint32_t outputByteLen,
+ uint8_t *output);
+
+void
+Hacl_SHA3_shake256_hacl(
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint32_t outputByteLen,
+ uint8_t *output);
+
+void Hacl_SHA3_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+
+void Hacl_SHA3_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+
+void Hacl_SHA3_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+
+void Hacl_SHA3_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+
+void Hacl_Impl_SHA3_absorb_inner(uint32_t rateInBytes, uint8_t *block, uint64_t *s);
+
+void
+Hacl_Impl_SHA3_squeeze(
+ uint64_t *s,
+ uint32_t rateInBytes,
+ uint32_t outputByteLen,
+ uint8_t *output);
+
+void
+Hacl_Impl_SHA3_keccak(
+ uint32_t rate,
+ uint32_t capacity,
+ uint32_t inputByteLen,
+ uint8_t *input,
+ uint8_t delimitedSuffix,
+ uint32_t outputByteLen,
+ uint8_t *output);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Hash_SHA3_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics.h b/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics.h
new file mode 100644
index 0000000000..e414abf30a
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics.h
@@ -0,0 +1,83 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_IntTypes_Intrinsics_H
+#define __Hacl_IntTypes_Intrinsics_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+
+static inline uint32_t
+Hacl_IntTypes_Intrinsics_add_carry_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r)
+{
+ uint64_t res = (uint64_t)x + (uint64_t)cin + (uint64_t)y;
+ uint32_t c = (uint32_t)(res >> (uint32_t)32U);
+ r[0U] = (uint32_t)res;
+ return c;
+}
+
+static inline uint32_t
+Hacl_IntTypes_Intrinsics_sub_borrow_u32(uint32_t cin, uint32_t x, uint32_t y, uint32_t *r)
+{
+ uint64_t res = (uint64_t)x - (uint64_t)y - (uint64_t)cin;
+ uint32_t c = (uint32_t)(res >> (uint32_t)32U) & (uint32_t)1U;
+ r[0U] = (uint32_t)res;
+ return c;
+}
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_add_carry_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+ uint64_t res = x + cin + y;
+ uint64_t
+ c = (~FStar_UInt64_gte_mask(res, x) | (FStar_UInt64_eq_mask(res, x) & cin)) & (uint64_t)1U;
+ r[0U] = res;
+ return c;
+}
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_sub_borrow_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+ uint64_t res = x - y - cin;
+ uint64_t
+ c =
+ ((FStar_UInt64_gte_mask(res, x) & ~FStar_UInt64_eq_mask(res, x)) | (FStar_UInt64_eq_mask(res, x) & cin)) & (uint64_t)1U;
+ r[0U] = res;
+ return c;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_IntTypes_Intrinsics_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics_128.h b/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics_128.h
new file mode 100644
index 0000000000..d1284d201f
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_IntTypes_Intrinsics_128.h
@@ -0,0 +1,72 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_IntTypes_Intrinsics_128_H
+#define __Hacl_IntTypes_Intrinsics_128_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_128_add_carry_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+ FStar_UInt128_uint128
+ res =
+ FStar_UInt128_add_mod(FStar_UInt128_add_mod(FStar_UInt128_uint64_to_uint128(x),
+ FStar_UInt128_uint64_to_uint128(cin)),
+ FStar_UInt128_uint64_to_uint128(y));
+ uint64_t c = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U));
+ r[0U] = FStar_UInt128_uint128_to_uint64(res);
+ return c;
+}
+
+static inline uint64_t
+Hacl_IntTypes_Intrinsics_128_sub_borrow_u64(uint64_t cin, uint64_t x, uint64_t y, uint64_t *r)
+{
+ FStar_UInt128_uint128
+ res =
+ FStar_UInt128_sub_mod(FStar_UInt128_sub_mod(FStar_UInt128_uint64_to_uint128(x),
+ FStar_UInt128_uint64_to_uint128(y)),
+ FStar_UInt128_uint64_to_uint128(cin));
+ uint64_t
+ c =
+ FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U)) & (uint64_t)1U;
+ r[0U] = FStar_UInt128_uint128_to_uint64(res);
+ return c;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_IntTypes_Intrinsics_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/Hacl_Krmllib.h
new file mode 100644
index 0000000000..7cdfb200d0
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Krmllib.h
@@ -0,0 +1,59 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Krmllib_H
+#define __Hacl_Krmllib_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+static KRML_NOINLINE uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b);
+
+static KRML_NOINLINE uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Krmllib_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_P256.c b/security/nss/lib/freebl/verified/Hacl_P256.c
new file mode 100644
index 0000000000..892daa30ca
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P256.c
@@ -0,0 +1,1829 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_P256.h"
+
+#include "internal/Hacl_P256_PrecompTable.h"
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum_Base.h"
+#include "lib_intrinsics.h"
+
+static inline uint64_t
+bn_is_zero_mask4(uint64_t *f)
+{
+ uint64_t bn_zero[4U] = { 0U };
+ uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t uu____0 = FStar_UInt64_eq_mask(f[i], bn_zero[i]);
+ mask = uu____0 & mask;);
+ uint64_t mask1 = mask;
+ uint64_t res = mask1;
+ return res;
+}
+
+static inline bool
+bn_is_zero_vartime4(uint64_t *f)
+{
+ uint64_t m = bn_is_zero_mask4(f);
+ return m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+static inline uint64_t
+bn_is_eq_mask4(uint64_t *a, uint64_t *b)
+{
+ uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t uu____0 = FStar_UInt64_eq_mask(a[i], b[i]);
+ mask = uu____0 & mask;);
+ uint64_t mask1 = mask;
+ return mask1;
+}
+
+static inline bool
+bn_is_eq_vartime4(uint64_t *a, uint64_t *b)
+{
+ uint64_t m = bn_is_eq_mask4(a, b);
+ return m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+static inline void
+bn_cmovznz4(uint64_t *res, uint64_t cin, uint64_t *x, uint64_t *y)
+{
+ uint64_t mask = ~FStar_UInt64_eq_mask(cin, (uint64_t)0U);
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t uu____0 = x[i];
+ uint64_t x1 = uu____0 ^ (mask & (y[i] ^ uu____0));
+ os[i] = x1;);
+}
+
+static inline void
+bn_add_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t *y)
+{
+ uint64_t c0 = (uint64_t)0U;
+ {
+ uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U;
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t1, t20, res_i0);
+ uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t10, t21, res_i1);
+ uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t11, t22, res_i2);
+ uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, t12, t2, res_i);
+ }
+ uint64_t c00 = c0;
+ uint64_t tmp[4U] = { 0U };
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+ uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+ uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+ uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+ }
+ uint64_t c1 = c;
+ uint64_t c2 = c00 - c1;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+ os[i] = x1;);
+}
+
+static inline uint64_t
+bn_sub4(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+ uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+ uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+ uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+ }
+ uint64_t c0 = c;
+ return c0;
+}
+
+static inline void
+bn_sub_mod4(uint64_t *res, uint64_t *n, uint64_t *x, uint64_t *y)
+{
+ uint64_t c0 = (uint64_t)0U;
+ {
+ uint64_t t1 = x[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = y[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res + (uint32_t)4U * (uint32_t)0U;
+ c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t1, t20, res_i0);
+ uint64_t t10 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t10, t21, res_i1);
+ uint64_t t11 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t11, t22, res_i2);
+ uint64_t t12 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = y[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c0 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c0, t12, t2, res_i);
+ }
+ uint64_t c00 = c0;
+ uint64_t tmp[4U] = { 0U };
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0);
+ uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1);
+ uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2);
+ uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i);
+ }
+ uint64_t c1 = c;
+ KRML_HOST_IGNORE(c1);
+ uint64_t c2 = (uint64_t)0U - c00;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t x1 = (c2 & tmp[i]) | (~c2 & res[i]);
+ os[i] = x1;);
+}
+
+static inline void
+bn_mul4(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ memset(res, 0U, (uint32_t)8U * sizeof(uint64_t));
+ KRML_MAYBE_FOR4(
+ i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t bj = y[i0];
+ uint64_t *res_j = res + i0;
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t a_i = x[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res_j + (uint32_t)4U * (uint32_t)0U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0);
+ uint64_t a_i0 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1);
+ uint64_t a_i1 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2);
+ uint64_t a_i2 = x[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res_j + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i);
+ } uint64_t r = c;
+ res[(uint32_t)4U + i0] = r;);
+}
+
+static inline void
+bn_sqr4(uint64_t *res, uint64_t *x)
+{
+ memset(res, 0U, (uint32_t)8U * sizeof(uint64_t));
+ KRML_MAYBE_FOR4(
+ i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *ab = x;
+ uint64_t a_j = x[i0];
+ uint64_t *res_j = res + i0;
+ uint64_t c = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U; i++) {
+ uint64_t a_i = ab[(uint32_t)4U * i];
+ uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0);
+ uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1);
+ uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2);
+ uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i);
+ } for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) {
+ uint64_t a_i = ab[i];
+ uint64_t *res_i = res_j + i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i);
+ } uint64_t r = c;
+ res[i0 + i0] = r;);
+ uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64((uint32_t)8U, res, res, res);
+ KRML_HOST_IGNORE(c0);
+ uint64_t tmp[8U] = { 0U };
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(x[i], x[i]);
+ uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U));
+ uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
+ tmp[(uint32_t)2U * i] = lo;
+ tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;);
+ uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64((uint32_t)8U, res, tmp, res);
+ KRML_HOST_IGNORE(c1);
+}
+
+static inline void
+bn_to_bytes_be4(uint8_t *res, uint64_t *f)
+{
+ uint8_t tmp[32U] = { 0U };
+ KRML_HOST_IGNORE(tmp);
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ store64_be(res + i * (uint32_t)8U, f[(uint32_t)4U - i - (uint32_t)1U]););
+}
+
+static inline void
+bn_from_bytes_be4(uint64_t *res, uint8_t *b)
+{
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t u = load64_be(b + ((uint32_t)4U - i - (uint32_t)1U) * (uint32_t)8U);
+ uint64_t x = u;
+ os[i] = x;);
+}
+
+static inline void
+bn2_to_bytes_be4(uint8_t *res, uint64_t *x, uint64_t *y)
+{
+ bn_to_bytes_be4(res, x);
+ bn_to_bytes_be4(res + (uint32_t)32U, y);
+}
+
+static inline void
+make_prime(uint64_t *n)
+{
+ n[0U] = (uint64_t)0xffffffffffffffffU;
+ n[1U] = (uint64_t)0xffffffffU;
+ n[2U] = (uint64_t)0x0U;
+ n[3U] = (uint64_t)0xffffffff00000001U;
+}
+
+static inline void
+make_order(uint64_t *n)
+{
+ n[0U] = (uint64_t)0xf3b9cac2fc632551U;
+ n[1U] = (uint64_t)0xbce6faada7179e84U;
+ n[2U] = (uint64_t)0xffffffffffffffffU;
+ n[3U] = (uint64_t)0xffffffff00000000U;
+}
+
+static inline void
+make_a_coeff(uint64_t *a)
+{
+ a[0U] = (uint64_t)0xfffffffffffffffcU;
+ a[1U] = (uint64_t)0x3ffffffffU;
+ a[2U] = (uint64_t)0x0U;
+ a[3U] = (uint64_t)0xfffffffc00000004U;
+}
+
+static inline void
+make_b_coeff(uint64_t *b)
+{
+ b[0U] = (uint64_t)0xd89cdf6229c4bddfU;
+ b[1U] = (uint64_t)0xacf005cd78843090U;
+ b[2U] = (uint64_t)0xe5a220abf7212ed6U;
+ b[3U] = (uint64_t)0xdc30061d04874834U;
+}
+
+static inline void
+make_g_x(uint64_t *n)
+{
+ n[0U] = (uint64_t)0x79e730d418a9143cU;
+ n[1U] = (uint64_t)0x75ba95fc5fedb601U;
+ n[2U] = (uint64_t)0x79fb732b77622510U;
+ n[3U] = (uint64_t)0x18905f76a53755c6U;
+}
+
+static inline void
+make_g_y(uint64_t *n)
+{
+ n[0U] = (uint64_t)0xddf25357ce95560aU;
+ n[1U] = (uint64_t)0x8b4ab8e4ba19e45cU;
+ n[2U] = (uint64_t)0xd2e88688dd21f325U;
+ n[3U] = (uint64_t)0x8571ff1825885d85U;
+}
+
+static inline void
+make_fmont_R2(uint64_t *n)
+{
+ n[0U] = (uint64_t)0x3U;
+ n[1U] = (uint64_t)0xfffffffbffffffffU;
+ n[2U] = (uint64_t)0xfffffffffffffffeU;
+ n[3U] = (uint64_t)0x4fffffffdU;
+}
+
+static inline void
+make_fzero(uint64_t *n)
+{
+ n[0U] = (uint64_t)0U;
+ n[1U] = (uint64_t)0U;
+ n[2U] = (uint64_t)0U;
+ n[3U] = (uint64_t)0U;
+}
+
+static inline void
+make_fone(uint64_t *n)
+{
+ n[0U] = (uint64_t)0x1U;
+ n[1U] = (uint64_t)0xffffffff00000000U;
+ n[2U] = (uint64_t)0xffffffffffffffffU;
+ n[3U] = (uint64_t)0xfffffffeU;
+}
+
+static inline uint64_t
+bn_is_lt_prime_mask4(uint64_t *f)
+{
+ uint64_t tmp[4U] = { 0U };
+ make_prime(tmp);
+ uint64_t c = bn_sub4(tmp, f, tmp);
+ return (uint64_t)0U - c;
+}
+
+static inline uint64_t
+feq_mask(uint64_t *a, uint64_t *b)
+{
+ uint64_t r = bn_is_eq_mask4(a, b);
+ return r;
+}
+
+static inline void
+fadd0(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t n[4U] = { 0U };
+ make_prime(n);
+ bn_add_mod4(res, n, x, y);
+}
+
+static inline void
+fsub0(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t n[4U] = { 0U };
+ make_prime(n);
+ bn_sub_mod4(res, n, x, y);
+}
+
+static inline void
+fnegate_conditional_vartime(uint64_t *f, bool is_negate)
+{
+ uint64_t zero[4U] = { 0U };
+ if (is_negate) {
+ fsub0(f, zero, f);
+ }
+}
+
+static inline void
+mont_reduction(uint64_t *res, uint64_t *x)
+{
+ uint64_t n[4U] = { 0U };
+ make_prime(n);
+ uint64_t c0 = (uint64_t)0U;
+ KRML_MAYBE_FOR4(
+ i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t qj = (uint64_t)1U * x[i0];
+ uint64_t *res_j0 = x + i0;
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t a_i = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res_j0 + (uint32_t)4U * (uint32_t)0U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0);
+ uint64_t a_i0 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1);
+ uint64_t a_i1 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2);
+ uint64_t a_i2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i);
+ } uint64_t r = c;
+ uint64_t c1 = r;
+ uint64_t *resb = x + (uint32_t)4U + i0;
+ uint64_t res_j = x[(uint32_t)4U + i0];
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
+ memcpy(res, x + (uint32_t)4U, (uint32_t)4U * sizeof(uint64_t));
+ uint64_t c00 = c0;
+ uint64_t tmp[4U] = { 0U };
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+ uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+ uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+ uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+ }
+ uint64_t c1 = c;
+ uint64_t c2 = c00 - c1;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+ os[i] = x1;);
+}
+
+static inline void
+fmul0(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t tmp[8U] = { 0U };
+ bn_mul4(tmp, x, y);
+ mont_reduction(res, tmp);
+}
+
+static inline void
+fsqr0(uint64_t *res, uint64_t *x)
+{
+ uint64_t tmp[8U] = { 0U };
+ bn_sqr4(tmp, x);
+ mont_reduction(res, tmp);
+}
+
+static inline void
+from_mont(uint64_t *res, uint64_t *a)
+{
+ uint64_t tmp[8U] = { 0U };
+ memcpy(tmp, a, (uint32_t)4U * sizeof(uint64_t));
+ mont_reduction(res, tmp);
+}
+
+static inline void
+to_mont(uint64_t *res, uint64_t *a)
+{
+ uint64_t r2modn[4U] = { 0U };
+ make_fmont_R2(r2modn);
+ fmul0(res, a, r2modn);
+}
+
+static inline void
+fmul_by_b_coeff(uint64_t *res, uint64_t *x)
+{
+ uint64_t b_coeff[4U] = { 0U };
+ make_b_coeff(b_coeff);
+ fmul0(res, b_coeff, x);
+}
+
+static inline void
+fcube(uint64_t *res, uint64_t *x)
+{
+ fsqr0(res, x);
+ fmul0(res, res, x);
+}
+
+static inline void
+finv(uint64_t *res, uint64_t *a)
+{
+ uint64_t tmp[16U] = { 0U };
+ uint64_t *x30 = tmp;
+ uint64_t *x2 = tmp + (uint32_t)4U;
+ uint64_t *tmp1 = tmp + (uint32_t)8U;
+ uint64_t *tmp2 = tmp + (uint32_t)12U;
+ memcpy(x2, a, (uint32_t)4U * sizeof(uint64_t));
+ {
+ fsqr0(x2, x2);
+ }
+ fmul0(x2, x2, a);
+ memcpy(x30, x2, (uint32_t)4U * sizeof(uint64_t));
+ {
+ fsqr0(x30, x30);
+ }
+ fmul0(x30, x30, a);
+ memcpy(tmp1, x30, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, fsqr0(tmp1, tmp1););
+ fmul0(tmp1, tmp1, x30);
+ memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, fsqr0(tmp2, tmp2););
+ fmul0(tmp2, tmp2, tmp1);
+ memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, fsqr0(tmp1, tmp1););
+ fmul0(tmp1, tmp1, x30);
+ memcpy(x30, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR15(i, (uint32_t)0U, (uint32_t)15U, (uint32_t)1U, fsqr0(x30, x30););
+ fmul0(x30, x30, tmp1);
+ memcpy(tmp1, x30, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(tmp1, tmp1););
+ fmul0(tmp1, tmp1, x2);
+ memcpy(x2, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ fsqr0(x2, x2);
+ }
+ fmul0(x2, x2, a);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)128U; i++) {
+ fsqr0(x2, x2);
+ }
+ fmul0(x2, x2, tmp1);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ fsqr0(x2, x2);
+ }
+ fmul0(x2, x2, tmp1);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)30U; i++) {
+ fsqr0(x2, x2);
+ }
+ fmul0(x2, x2, x30);
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(x2, x2););
+ fmul0(tmp1, x2, a);
+ memcpy(res, tmp1, (uint32_t)4U * sizeof(uint64_t));
+}
+
+static inline void
+fsqrt(uint64_t *res, uint64_t *a)
+{
+ uint64_t tmp[8U] = { 0U };
+ uint64_t *tmp1 = tmp;
+ uint64_t *tmp2 = tmp + (uint32_t)4U;
+ memcpy(tmp1, a, (uint32_t)4U * sizeof(uint64_t));
+ {
+ fsqr0(tmp1, tmp1);
+ }
+ fmul0(tmp1, tmp1, a);
+ memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, fsqr0(tmp2, tmp2););
+ fmul0(tmp2, tmp2, tmp1);
+ memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, fsqr0(tmp1, tmp1););
+ fmul0(tmp1, tmp1, tmp2);
+ memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR8(i, (uint32_t)0U, (uint32_t)8U, (uint32_t)1U, fsqr0(tmp2, tmp2););
+ fmul0(tmp2, tmp2, tmp1);
+ memcpy(tmp1, tmp2, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR16(i, (uint32_t)0U, (uint32_t)16U, (uint32_t)1U, fsqr0(tmp1, tmp1););
+ fmul0(tmp1, tmp1, tmp2);
+ memcpy(tmp2, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ fsqr0(tmp2, tmp2);
+ }
+ fmul0(tmp2, tmp2, a);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)96U; i++) {
+ fsqr0(tmp2, tmp2);
+ }
+ fmul0(tmp2, tmp2, a);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)94U; i++) {
+ fsqr0(tmp2, tmp2);
+ }
+ memcpy(res, tmp2, (uint32_t)4U * sizeof(uint64_t));
+}
+
+static inline void
+make_base_point(uint64_t *p)
+{
+ uint64_t *x = p;
+ uint64_t *y = p + (uint32_t)4U;
+ uint64_t *z = p + (uint32_t)8U;
+ make_g_x(x);
+ make_g_y(y);
+ make_fone(z);
+}
+
+static inline void
+make_point_at_inf(uint64_t *p)
+{
+ uint64_t *x = p;
+ uint64_t *y = p + (uint32_t)4U;
+ uint64_t *z = p + (uint32_t)8U;
+ make_fzero(x);
+ make_fone(y);
+ make_fzero(z);
+}
+
+static inline bool
+is_point_at_inf_vartime(uint64_t *p)
+{
+ uint64_t *pz = p + (uint32_t)8U;
+ return bn_is_zero_vartime4(pz);
+}
+
+static inline void
+to_aff_point(uint64_t *res, uint64_t *p)
+{
+ uint64_t zinv[4U] = { 0U };
+ uint64_t *px = p;
+ uint64_t *py = p + (uint32_t)4U;
+ uint64_t *pz = p + (uint32_t)8U;
+ uint64_t *x = res;
+ uint64_t *y = res + (uint32_t)4U;
+ finv(zinv, pz);
+ fmul0(x, px, zinv);
+ fmul0(y, py, zinv);
+ from_mont(x, x);
+ from_mont(y, y);
+}
+
+static inline void
+to_aff_point_x(uint64_t *res, uint64_t *p)
+{
+ uint64_t zinv[4U] = { 0U };
+ uint64_t *px = p;
+ uint64_t *pz = p + (uint32_t)8U;
+ finv(zinv, pz);
+ fmul0(res, px, zinv);
+ from_mont(res, res);
+}
+
+static inline void
+to_proj_point(uint64_t *res, uint64_t *p)
+{
+ uint64_t *px = p;
+ uint64_t *py = p + (uint32_t)4U;
+ uint64_t *rx = res;
+ uint64_t *ry = res + (uint32_t)4U;
+ uint64_t *rz = res + (uint32_t)8U;
+ to_mont(rx, px);
+ to_mont(ry, py);
+ make_fone(rz);
+}
+
+static inline bool
+is_on_curve_vartime(uint64_t *p)
+{
+ uint64_t rp[4U] = { 0U };
+ uint64_t tx[4U] = { 0U };
+ uint64_t ty[4U] = { 0U };
+ uint64_t *px = p;
+ uint64_t *py = p + (uint32_t)4U;
+ to_mont(tx, px);
+ to_mont(ty, py);
+ uint64_t tmp[4U] = { 0U };
+ fcube(rp, tx);
+ make_a_coeff(tmp);
+ fmul0(tmp, tmp, tx);
+ fadd0(rp, tmp, rp);
+ make_b_coeff(tmp);
+ fadd0(rp, tmp, rp);
+ fsqr0(ty, ty);
+ uint64_t r = feq_mask(ty, rp);
+ bool r0 = r == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ return r0;
+}
+
+static inline void
+aff_point_store(uint8_t *res, uint64_t *p)
+{
+ uint64_t *px = p;
+ uint64_t *py = p + (uint32_t)4U;
+ bn2_to_bytes_be4(res, px, py);
+}
+
+static inline void
+point_store(uint8_t *res, uint64_t *p)
+{
+ uint64_t aff_p[8U] = { 0U };
+ to_aff_point(aff_p, p);
+ aff_point_store(res, aff_p);
+}
+
+static inline bool
+aff_point_load_vartime(uint64_t *p, uint8_t *b)
+{
+ uint8_t *p_x = b;
+ uint8_t *p_y = b + (uint32_t)32U;
+ uint64_t *bn_p_x = p;
+ uint64_t *bn_p_y = p + (uint32_t)4U;
+ bn_from_bytes_be4(bn_p_x, p_x);
+ bn_from_bytes_be4(bn_p_y, p_y);
+ uint64_t *px = p;
+ uint64_t *py = p + (uint32_t)4U;
+ uint64_t lessX = bn_is_lt_prime_mask4(px);
+ uint64_t lessY = bn_is_lt_prime_mask4(py);
+ uint64_t res = lessX & lessY;
+ bool is_xy_valid = res == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ if (!is_xy_valid) {
+ return false;
+ }
+ return is_on_curve_vartime(p);
+}
+
+static inline bool
+load_point_vartime(uint64_t *p, uint8_t *b)
+{
+ uint64_t p_aff[8U] = { 0U };
+ bool res = aff_point_load_vartime(p_aff, b);
+ if (res) {
+ to_proj_point(p, p_aff);
+ }
+ return res;
+}
+
+static inline bool
+aff_point_decompress_vartime(uint64_t *x, uint64_t *y, uint8_t *s)
+{
+ uint8_t s0 = s[0U];
+ uint8_t s01 = s0;
+ if (!(s01 == (uint8_t)0x02U || s01 == (uint8_t)0x03U)) {
+ return false;
+ }
+ uint8_t *xb = s + (uint32_t)1U;
+ bn_from_bytes_be4(x, xb);
+ uint64_t is_x_valid = bn_is_lt_prime_mask4(x);
+ bool is_x_valid1 = is_x_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ bool is_y_odd = s01 == (uint8_t)0x03U;
+ if (!is_x_valid1) {
+ return false;
+ }
+ uint64_t y2M[4U] = { 0U };
+ uint64_t xM[4U] = { 0U };
+ uint64_t yM[4U] = { 0U };
+ to_mont(xM, x);
+ uint64_t tmp[4U] = { 0U };
+ fcube(y2M, xM);
+ make_a_coeff(tmp);
+ fmul0(tmp, tmp, xM);
+ fadd0(y2M, tmp, y2M);
+ make_b_coeff(tmp);
+ fadd0(y2M, tmp, y2M);
+ fsqrt(yM, y2M);
+ from_mont(y, yM);
+ fsqr0(yM, yM);
+ uint64_t r = feq_mask(yM, y2M);
+ bool is_y_valid = r == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ bool is_y_valid0 = is_y_valid;
+ if (!is_y_valid0) {
+ return false;
+ }
+ uint64_t is_y_odd1 = y[0U] & (uint64_t)1U;
+ bool is_y_odd2 = is_y_odd1 == (uint64_t)1U;
+ fnegate_conditional_vartime(y, is_y_odd2 != is_y_odd);
+ return true;
+}
+
+static inline void
+point_double(uint64_t *res, uint64_t *p)
+{
+ uint64_t tmp[20U] = { 0U };
+ uint64_t *x = p;
+ uint64_t *z = p + (uint32_t)8U;
+ uint64_t *x3 = res;
+ uint64_t *y3 = res + (uint32_t)4U;
+ uint64_t *z3 = res + (uint32_t)8U;
+ uint64_t *t0 = tmp;
+ uint64_t *t1 = tmp + (uint32_t)4U;
+ uint64_t *t2 = tmp + (uint32_t)8U;
+ uint64_t *t3 = tmp + (uint32_t)12U;
+ uint64_t *t4 = tmp + (uint32_t)16U;
+ uint64_t *x1 = p;
+ uint64_t *y = p + (uint32_t)4U;
+ uint64_t *z1 = p + (uint32_t)8U;
+ fsqr0(t0, x1);
+ fsqr0(t1, y);
+ fsqr0(t2, z1);
+ fmul0(t3, x1, y);
+ fadd0(t3, t3, t3);
+ fmul0(t4, y, z1);
+ fmul0(z3, x, z);
+ fadd0(z3, z3, z3);
+ fmul_by_b_coeff(y3, t2);
+ fsub0(y3, y3, z3);
+ fadd0(x3, y3, y3);
+ fadd0(y3, x3, y3);
+ fsub0(x3, t1, y3);
+ fadd0(y3, t1, y3);
+ fmul0(y3, x3, y3);
+ fmul0(x3, x3, t3);
+ fadd0(t3, t2, t2);
+ fadd0(t2, t2, t3);
+ fmul_by_b_coeff(z3, z3);
+ fsub0(z3, z3, t2);
+ fsub0(z3, z3, t0);
+ fadd0(t3, z3, z3);
+ fadd0(z3, z3, t3);
+ fadd0(t3, t0, t0);
+ fadd0(t0, t3, t0);
+ fsub0(t0, t0, t2);
+ fmul0(t0, t0, z3);
+ fadd0(y3, y3, t0);
+ fadd0(t0, t4, t4);
+ fmul0(z3, t0, z3);
+ fsub0(x3, x3, z3);
+ fmul0(z3, t0, t1);
+ fadd0(z3, z3, z3);
+ fadd0(z3, z3, z3);
+}
+
+static inline void
+point_add(uint64_t *res, uint64_t *p, uint64_t *q)
+{
+ uint64_t tmp[36U] = { 0U };
+ uint64_t *t0 = tmp;
+ uint64_t *t1 = tmp + (uint32_t)24U;
+ uint64_t *x3 = t1;
+ uint64_t *y3 = t1 + (uint32_t)4U;
+ uint64_t *z3 = t1 + (uint32_t)8U;
+ uint64_t *t01 = t0;
+ uint64_t *t11 = t0 + (uint32_t)4U;
+ uint64_t *t2 = t0 + (uint32_t)8U;
+ uint64_t *t3 = t0 + (uint32_t)12U;
+ uint64_t *t4 = t0 + (uint32_t)16U;
+ uint64_t *t5 = t0 + (uint32_t)20U;
+ uint64_t *x1 = p;
+ uint64_t *y1 = p + (uint32_t)4U;
+ uint64_t *z10 = p + (uint32_t)8U;
+ uint64_t *x20 = q;
+ uint64_t *y20 = q + (uint32_t)4U;
+ uint64_t *z20 = q + (uint32_t)8U;
+ fmul0(t01, x1, x20);
+ fmul0(t11, y1, y20);
+ fmul0(t2, z10, z20);
+ fadd0(t3, x1, y1);
+ fadd0(t4, x20, y20);
+ fmul0(t3, t3, t4);
+ fadd0(t4, t01, t11);
+ uint64_t *y10 = p + (uint32_t)4U;
+ uint64_t *z11 = p + (uint32_t)8U;
+ uint64_t *y2 = q + (uint32_t)4U;
+ uint64_t *z21 = q + (uint32_t)8U;
+ fsub0(t3, t3, t4);
+ fadd0(t4, y10, z11);
+ fadd0(t5, y2, z21);
+ fmul0(t4, t4, t5);
+ fadd0(t5, t11, t2);
+ fsub0(t4, t4, t5);
+ uint64_t *x10 = p;
+ uint64_t *z1 = p + (uint32_t)8U;
+ uint64_t *x2 = q;
+ uint64_t *z2 = q + (uint32_t)8U;
+ fadd0(x3, x10, z1);
+ fadd0(y3, x2, z2);
+ fmul0(x3, x3, y3);
+ fadd0(y3, t01, t2);
+ fsub0(y3, x3, y3);
+ fmul_by_b_coeff(z3, t2);
+ fsub0(x3, y3, z3);
+ fadd0(z3, x3, x3);
+ fadd0(x3, x3, z3);
+ fsub0(z3, t11, x3);
+ fadd0(x3, t11, x3);
+ fmul_by_b_coeff(y3, y3);
+ fadd0(t11, t2, t2);
+ fadd0(t2, t11, t2);
+ fsub0(y3, y3, t2);
+ fsub0(y3, y3, t01);
+ fadd0(t11, y3, y3);
+ fadd0(y3, t11, y3);
+ fadd0(t11, t01, t01);
+ fadd0(t01, t11, t01);
+ fsub0(t01, t01, t2);
+ fmul0(t11, t4, y3);
+ fmul0(t2, t01, y3);
+ fmul0(y3, x3, z3);
+ fadd0(y3, y3, t2);
+ fmul0(x3, t3, x3);
+ fsub0(x3, x3, t11);
+ fmul0(z3, t4, z3);
+ fmul0(t11, t3, t01);
+ fadd0(z3, z3, t11);
+ memcpy(res, t1, (uint32_t)12U * sizeof(uint64_t));
+}
+
+static inline void
+point_mul(uint64_t *res, uint64_t *scalar, uint64_t *p)
+{
+ uint64_t table[192U] = { 0U };
+ uint64_t tmp[12U] = { 0U };
+ uint64_t *t0 = table;
+ uint64_t *t1 = table + (uint32_t)12U;
+ make_point_at_inf(t0);
+ memcpy(t1, p, (uint32_t)12U * sizeof(uint64_t));
+ KRML_MAYBE_FOR7(i,
+ (uint32_t)0U,
+ (uint32_t)7U,
+ (uint32_t)1U,
+ uint64_t *t11 = table + (i + (uint32_t)1U) * (uint32_t)12U;
+ point_double(tmp, t11);
+ memcpy(table + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U,
+ tmp,
+ (uint32_t)12U * sizeof(uint64_t));
+ uint64_t *t2 = table + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U;
+ point_add(tmp, p, t2);
+ memcpy(table + ((uint32_t)2U * i + (uint32_t)3U) * (uint32_t)12U,
+ tmp,
+ (uint32_t)12U * sizeof(uint64_t)););
+ make_point_at_inf(res);
+ uint64_t tmp0[12U] = { 0U };
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)64U; i0++) {
+ KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, point_double(res, res););
+ uint32_t k = (uint32_t)256U - (uint32_t)4U * i0 - (uint32_t)4U;
+ uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar, k, (uint32_t)4U);
+ memcpy(tmp0, (uint64_t *)table, (uint32_t)12U * sizeof(uint64_t));
+ KRML_MAYBE_FOR15(i1,
+ (uint32_t)0U,
+ (uint32_t)15U,
+ (uint32_t)1U,
+ uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i1 + (uint32_t)1U));
+ const uint64_t *res_j = table + (i1 + (uint32_t)1U) * (uint32_t)12U;
+ KRML_MAYBE_FOR12(i,
+ (uint32_t)0U,
+ (uint32_t)12U,
+ (uint32_t)1U,
+ uint64_t *os = tmp0;
+ uint64_t x = (c & res_j[i]) | (~c & tmp0[i]);
+ os[i] = x;););
+ point_add(res, res, tmp0);
+ }
+}
+
+static inline void
+precomp_get_consttime(const uint64_t *table, uint64_t bits_l, uint64_t *tmp)
+{
+ memcpy(tmp, (uint64_t *)table, (uint32_t)12U * sizeof(uint64_t));
+ KRML_MAYBE_FOR15(i0,
+ (uint32_t)0U,
+ (uint32_t)15U,
+ (uint32_t)1U,
+ uint64_t c = FStar_UInt64_eq_mask(bits_l, (uint64_t)(i0 + (uint32_t)1U));
+ const uint64_t *res_j = table + (i0 + (uint32_t)1U) * (uint32_t)12U;
+ KRML_MAYBE_FOR12(i,
+ (uint32_t)0U,
+ (uint32_t)12U,
+ (uint32_t)1U,
+ uint64_t *os = tmp;
+ uint64_t x = (c & res_j[i]) | (~c & tmp[i]);
+ os[i] = x;););
+}
+
+static inline void
+point_mul_g(uint64_t *res, uint64_t *scalar)
+{
+ uint64_t q1[12U] = { 0U };
+ make_base_point(q1);
+ uint64_t
+ q2[12U] = {
+ (uint64_t)1499621593102562565U, (uint64_t)16692369783039433128U,
+ (uint64_t)15337520135922861848U, (uint64_t)5455737214495366228U,
+ (uint64_t)17827017231032529600U, (uint64_t)12413621606240782649U,
+ (uint64_t)2290483008028286132U, (uint64_t)15752017553340844820U,
+ (uint64_t)4846430910634234874U, (uint64_t)10861682798464583253U,
+ (uint64_t)15404737222404363049U, (uint64_t)363586619281562022U
+ };
+ uint64_t
+ q3[12U] = {
+ (uint64_t)14619254753077084366U, (uint64_t)13913835116514008593U,
+ (uint64_t)15060744674088488145U, (uint64_t)17668414598203068685U,
+ (uint64_t)10761169236902342334U, (uint64_t)15467027479157446221U,
+ (uint64_t)14989185522423469618U, (uint64_t)14354539272510107003U,
+ (uint64_t)14298211796392133693U, (uint64_t)13270323784253711450U,
+ (uint64_t)13380964971965046957U, (uint64_t)8686204248456909699U
+ };
+ uint64_t
+ q4[12U] = {
+ (uint64_t)7870395003430845958U, (uint64_t)18001862936410067720U,
+ (uint64_t)8006461232116967215U, (uint64_t)5921313779532424762U,
+ (uint64_t)10702113371959864307U, (uint64_t)8070517410642379879U,
+ (uint64_t)7139806720777708306U, (uint64_t)8253938546650739833U,
+ (uint64_t)17490482834545705718U, (uint64_t)1065249776797037500U,
+ (uint64_t)5018258455937968775U, (uint64_t)14100621120178668337U
+ };
+ uint64_t *r1 = scalar;
+ uint64_t *r2 = scalar + (uint32_t)1U;
+ uint64_t *r3 = scalar + (uint32_t)2U;
+ uint64_t *r4 = scalar + (uint32_t)3U;
+ make_point_at_inf(res);
+ uint64_t tmp[12U] = { 0U };
+ KRML_MAYBE_FOR16(i,
+ (uint32_t)0U,
+ (uint32_t)16U,
+ (uint32_t)1U,
+ KRML_MAYBE_FOR4(i0, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, point_double(res, res););
+ uint32_t k = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U;
+ uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r4, k, (uint32_t)4U);
+ precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4, bits_l, tmp);
+ point_add(res, res, tmp);
+ uint32_t k0 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U;
+ uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r3, k0, (uint32_t)4U);
+ precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4, bits_l0, tmp);
+ point_add(res, res, tmp);
+ uint32_t k1 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U;
+ uint64_t bits_l1 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r2, k1, (uint32_t)4U);
+ precomp_get_consttime(Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4, bits_l1, tmp);
+ point_add(res, res, tmp);
+ uint32_t k2 = (uint32_t)64U - (uint32_t)4U * i - (uint32_t)4U;
+ uint64_t bits_l2 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)1U, r1, k2, (uint32_t)4U);
+ precomp_get_consttime(Hacl_P256_PrecompTable_precomp_basepoint_table_w4, bits_l2, tmp);
+ point_add(res, res, tmp););
+ KRML_HOST_IGNORE(q1);
+ KRML_HOST_IGNORE(q2);
+ KRML_HOST_IGNORE(q3);
+ KRML_HOST_IGNORE(q4);
+}
+
+static inline void
+point_mul_double_g(uint64_t *res, uint64_t *scalar1, uint64_t *scalar2, uint64_t *q2)
+{
+ uint64_t q1[12U] = { 0U };
+ make_base_point(q1);
+ uint64_t table2[384U] = { 0U };
+ uint64_t tmp[12U] = { 0U };
+ uint64_t *t0 = table2;
+ uint64_t *t1 = table2 + (uint32_t)12U;
+ make_point_at_inf(t0);
+ memcpy(t1, q2, (uint32_t)12U * sizeof(uint64_t));
+ KRML_MAYBE_FOR15(i,
+ (uint32_t)0U,
+ (uint32_t)15U,
+ (uint32_t)1U,
+ uint64_t *t11 = table2 + (i + (uint32_t)1U) * (uint32_t)12U;
+ point_double(tmp, t11);
+ memcpy(table2 + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U,
+ tmp,
+ (uint32_t)12U * sizeof(uint64_t));
+ uint64_t *t2 = table2 + ((uint32_t)2U * i + (uint32_t)2U) * (uint32_t)12U;
+ point_add(tmp, q2, t2);
+ memcpy(table2 + ((uint32_t)2U * i + (uint32_t)3U) * (uint32_t)12U,
+ tmp,
+ (uint32_t)12U * sizeof(uint64_t)););
+ uint64_t tmp0[12U] = { 0U };
+ uint32_t i0 = (uint32_t)255U;
+ uint64_t bits_c = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar1, i0, (uint32_t)5U);
+ uint32_t bits_l32 = (uint32_t)bits_c;
+ const uint64_t
+ *a_bits_l = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l32 * (uint32_t)12U;
+ memcpy(res, (uint64_t *)a_bits_l, (uint32_t)12U * sizeof(uint64_t));
+ uint32_t i1 = (uint32_t)255U;
+ uint64_t bits_c0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar2, i1, (uint32_t)5U);
+ uint32_t bits_l320 = (uint32_t)bits_c0;
+ const uint64_t *a_bits_l0 = table2 + bits_l320 * (uint32_t)12U;
+ memcpy(tmp0, (uint64_t *)a_bits_l0, (uint32_t)12U * sizeof(uint64_t));
+ point_add(res, res, tmp0);
+ uint64_t tmp1[12U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)51U; i++) {
+ KRML_MAYBE_FOR5(i2, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, point_double(res, res););
+ uint32_t k = (uint32_t)255U - (uint32_t)5U * i - (uint32_t)5U;
+ uint64_t bits_l = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar2, k, (uint32_t)5U);
+ uint32_t bits_l321 = (uint32_t)bits_l;
+ const uint64_t *a_bits_l1 = table2 + bits_l321 * (uint32_t)12U;
+ memcpy(tmp1, (uint64_t *)a_bits_l1, (uint32_t)12U * sizeof(uint64_t));
+ point_add(res, res, tmp1);
+ uint32_t k0 = (uint32_t)255U - (uint32_t)5U * i - (uint32_t)5U;
+ uint64_t bits_l0 = Hacl_Bignum_Lib_bn_get_bits_u64((uint32_t)4U, scalar1, k0, (uint32_t)5U);
+ uint32_t bits_l322 = (uint32_t)bits_l0;
+ const uint64_t
+ *a_bits_l2 = Hacl_P256_PrecompTable_precomp_basepoint_table_w5 + bits_l322 * (uint32_t)12U;
+ memcpy(tmp1, (uint64_t *)a_bits_l2, (uint32_t)12U * sizeof(uint64_t));
+ point_add(res, res, tmp1);
+ }
+}
+
+static inline uint64_t
+bn_is_lt_order_mask4(uint64_t *f)
+{
+ uint64_t tmp[4U] = { 0U };
+ make_order(tmp);
+ uint64_t c = bn_sub4(tmp, f, tmp);
+ return (uint64_t)0U - c;
+}
+
+static inline uint64_t
+bn_is_lt_order_and_gt_zero_mask4(uint64_t *f)
+{
+ uint64_t is_lt_order = bn_is_lt_order_mask4(f);
+ uint64_t is_eq_zero = bn_is_zero_mask4(f);
+ return is_lt_order & ~is_eq_zero;
+}
+
+static inline void
+qmod_short(uint64_t *res, uint64_t *x)
+{
+ uint64_t tmp[4U] = { 0U };
+ make_order(tmp);
+ uint64_t c = bn_sub4(tmp, x, tmp);
+ bn_cmovznz4(res, c, tmp, x);
+}
+
+static inline void
+qadd(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t n[4U] = { 0U };
+ make_order(n);
+ bn_add_mod4(res, n, x, y);
+}
+
+static inline void
+qmont_reduction(uint64_t *res, uint64_t *x)
+{
+ uint64_t n[4U] = { 0U };
+ make_order(n);
+ uint64_t c0 = (uint64_t)0U;
+ KRML_MAYBE_FOR4(
+ i0,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t qj = (uint64_t)0xccd1c8aaee00bc4fU * x[i0];
+ uint64_t *res_j0 = x + i0;
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t a_i = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = res_j0 + (uint32_t)4U * (uint32_t)0U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, qj, c, res_i0);
+ uint64_t a_i0 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, qj, c, res_i1);
+ uint64_t a_i1 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, qj, c, res_i2);
+ uint64_t a_i2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = res_j0 + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, qj, c, res_i);
+ } uint64_t r = c;
+ uint64_t c1 = r;
+ uint64_t *resb = x + (uint32_t)4U + i0;
+ uint64_t res_j = x[(uint32_t)4U + i0];
+ c0 = Lib_IntTypes_Intrinsics_add_carry_u64(c0, c1, res_j, resb););
+ memcpy(res, x + (uint32_t)4U, (uint32_t)4U * sizeof(uint64_t));
+ uint64_t c00 = c0;
+ uint64_t tmp[4U] = { 0U };
+ uint64_t c = (uint64_t)0U;
+ {
+ uint64_t t1 = res[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = n[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = tmp + (uint32_t)4U * (uint32_t)0U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+ uint64_t t10 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+ uint64_t t11 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+ uint64_t t12 = res[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = n[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = tmp + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+ }
+ uint64_t c1 = c;
+ uint64_t c2 = c00 - c1;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = res;
+ uint64_t x1 = (c2 & res[i]) | (~c2 & tmp[i]);
+ os[i] = x1;);
+}
+
+static inline void
+from_qmont(uint64_t *res, uint64_t *x)
+{
+ uint64_t tmp[8U] = { 0U };
+ memcpy(tmp, x, (uint32_t)4U * sizeof(uint64_t));
+ qmont_reduction(res, tmp);
+}
+
+static inline void
+qmul(uint64_t *res, uint64_t *x, uint64_t *y)
+{
+ uint64_t tmp[8U] = { 0U };
+ bn_mul4(tmp, x, y);
+ qmont_reduction(res, tmp);
+}
+
+static inline void
+qsqr(uint64_t *res, uint64_t *x)
+{
+ uint64_t tmp[8U] = { 0U };
+ bn_sqr4(tmp, x);
+ qmont_reduction(res, tmp);
+}
+
+bool
+Hacl_Impl_P256_DH_ecp256dh_i(uint8_t *public_key, uint8_t *private_key)
+{
+ uint64_t tmp[16U] = { 0U };
+ uint64_t *sk = tmp;
+ uint64_t *pk = tmp + (uint32_t)4U;
+ bn_from_bytes_be4(sk, private_key);
+ uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(sk);
+ uint64_t oneq[4U] = { 0U };
+ oneq[0U] = (uint64_t)1U;
+ oneq[1U] = (uint64_t)0U;
+ oneq[2U] = (uint64_t)0U;
+ oneq[3U] = (uint64_t)0U;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = sk;
+ uint64_t uu____0 = oneq[i];
+ uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+ os[i] = x;);
+ uint64_t is_sk_valid = is_b_valid;
+ point_mul_g(pk, sk);
+ point_store(public_key, pk);
+ return is_sk_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+bool
+Hacl_Impl_P256_DH_ecp256dh_r(
+ uint8_t *shared_secret,
+ uint8_t *their_pubkey,
+ uint8_t *private_key)
+{
+ uint64_t tmp[16U] = { 0U };
+ uint64_t *sk = tmp;
+ uint64_t *pk = tmp + (uint32_t)4U;
+ bool is_pk_valid = load_point_vartime(pk, their_pubkey);
+ bn_from_bytes_be4(sk, private_key);
+ uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(sk);
+ uint64_t oneq[4U] = { 0U };
+ oneq[0U] = (uint64_t)1U;
+ oneq[1U] = (uint64_t)0U;
+ oneq[2U] = (uint64_t)0U;
+ oneq[3U] = (uint64_t)0U;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = sk;
+ uint64_t uu____0 = oneq[i];
+ uint64_t x = uu____0 ^ (is_b_valid & (sk[i] ^ uu____0));
+ os[i] = x;);
+ uint64_t is_sk_valid = is_b_valid;
+ uint64_t ss_proj[12U] = { 0U };
+ if (is_pk_valid) {
+ point_mul(ss_proj, sk, pk);
+ point_store(shared_secret, ss_proj);
+ }
+ return is_sk_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU && is_pk_valid;
+}
+
+static inline void
+qinv(uint64_t *res, uint64_t *r)
+{
+ uint64_t tmp[28U] = { 0U };
+ uint64_t *x6 = tmp;
+ uint64_t *x_11 = tmp + (uint32_t)4U;
+ uint64_t *x_101 = tmp + (uint32_t)8U;
+ uint64_t *x_111 = tmp + (uint32_t)12U;
+ uint64_t *x_1111 = tmp + (uint32_t)16U;
+ uint64_t *x_10101 = tmp + (uint32_t)20U;
+ uint64_t *x_101111 = tmp + (uint32_t)24U;
+ memcpy(x6, r, (uint32_t)4U * sizeof(uint64_t));
+ {
+ qsqr(x6, x6);
+ }
+ qmul(x_11, x6, r);
+ qmul(x_101, x6, x_11);
+ qmul(x_111, x6, x_101);
+ memcpy(x6, x_101, (uint32_t)4U * sizeof(uint64_t));
+ {
+ qsqr(x6, x6);
+ }
+ qmul(x_1111, x_101, x6);
+ {
+ qsqr(x6, x6);
+ }
+ qmul(x_10101, x6, r);
+ memcpy(x6, x_10101, (uint32_t)4U * sizeof(uint64_t));
+ {
+ qsqr(x6, x6);
+ }
+ qmul(x_101111, x_101, x6);
+ qmul(x6, x_10101, x6);
+ uint64_t tmp1[4U] = { 0U };
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(x6, x6););
+ qmul(x6, x6, x_11);
+ memcpy(tmp1, x6, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR8(i, (uint32_t)0U, (uint32_t)8U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x6);
+ memcpy(x6, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ KRML_MAYBE_FOR16(i, (uint32_t)0U, (uint32_t)16U, (uint32_t)1U, qsqr(x6, x6););
+ qmul(x6, x6, tmp1);
+ memcpy(tmp1, x6, (uint32_t)4U * sizeof(uint64_t));
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)64U; i++) {
+ qsqr(tmp1, tmp1);
+ }
+ qmul(tmp1, tmp1, x6);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ qsqr(tmp1, tmp1);
+ }
+ qmul(tmp1, tmp1, x6);
+ KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101111);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_111);
+ KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_11);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_1111);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_10101);
+ KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101);
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101);
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_111);
+ KRML_MAYBE_FOR9(i, (uint32_t)0U, (uint32_t)9U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101111);
+ KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_1111);
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, r);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, r);
+ KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_1111);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_111);
+ KRML_MAYBE_FOR4(i, (uint32_t)0U, (uint32_t)4U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_111);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_111);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101);
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_11);
+ KRML_MAYBE_FOR10(i, (uint32_t)0U, (uint32_t)10U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_101111);
+ KRML_MAYBE_FOR2(i, (uint32_t)0U, (uint32_t)2U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_11);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_11);
+ KRML_MAYBE_FOR5(i, (uint32_t)0U, (uint32_t)5U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_11);
+ KRML_MAYBE_FOR3(i, (uint32_t)0U, (uint32_t)3U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, r);
+ KRML_MAYBE_FOR7(i, (uint32_t)0U, (uint32_t)7U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_10101);
+ KRML_MAYBE_FOR6(i, (uint32_t)0U, (uint32_t)6U, (uint32_t)1U, qsqr(tmp1, tmp1););
+ qmul(tmp1, tmp1, x_1111);
+ memcpy(x6, tmp1, (uint32_t)4U * sizeof(uint64_t));
+ memcpy(res, x6, (uint32_t)4U * sizeof(uint64_t));
+}
+
+static inline void
+qmul_mont(uint64_t *sinv, uint64_t *b, uint64_t *res)
+{
+ uint64_t tmp[4U] = { 0U };
+ from_qmont(tmp, b);
+ qmul(res, sinv, tmp);
+}
+
+static inline bool
+ecdsa_verify_msg_as_qelem(
+ uint64_t *m_q,
+ uint8_t *public_key,
+ uint8_t *signature_r,
+ uint8_t *signature_s)
+{
+ uint64_t tmp[28U] = { 0U };
+ uint64_t *pk = tmp;
+ uint64_t *r_q = tmp + (uint32_t)12U;
+ uint64_t *s_q = tmp + (uint32_t)16U;
+ uint64_t *u1 = tmp + (uint32_t)20U;
+ uint64_t *u2 = tmp + (uint32_t)24U;
+ bool is_pk_valid = load_point_vartime(pk, public_key);
+ bn_from_bytes_be4(r_q, signature_r);
+ bn_from_bytes_be4(s_q, signature_s);
+ uint64_t is_r_valid = bn_is_lt_order_and_gt_zero_mask4(r_q);
+ uint64_t is_s_valid = bn_is_lt_order_and_gt_zero_mask4(s_q);
+ bool
+ is_rs_valid =
+ is_r_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU && is_s_valid == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ if (!(is_pk_valid && is_rs_valid)) {
+ return false;
+ }
+ uint64_t sinv[4U] = { 0U };
+ qinv(sinv, s_q);
+ qmul_mont(sinv, m_q, u1);
+ qmul_mont(sinv, r_q, u2);
+ uint64_t res[12U] = { 0U };
+ point_mul_double_g(res, u1, u2, pk);
+ if (is_point_at_inf_vartime(res)) {
+ return false;
+ }
+ uint64_t x[4U] = { 0U };
+ to_aff_point_x(x, res);
+ qmod_short(x, x);
+ bool res1 = bn_is_eq_vartime4(x, r_q);
+ return res1;
+}
+
+static inline bool
+ecdsa_sign_msg_as_qelem(
+ uint8_t *signature,
+ uint64_t *m_q,
+ uint8_t *private_key,
+ uint8_t *nonce)
+{
+ uint64_t rsdk_q[16U] = { 0U };
+ uint64_t *r_q = rsdk_q;
+ uint64_t *s_q = rsdk_q + (uint32_t)4U;
+ uint64_t *d_a = rsdk_q + (uint32_t)8U;
+ uint64_t *k_q = rsdk_q + (uint32_t)12U;
+ bn_from_bytes_be4(d_a, private_key);
+ uint64_t is_b_valid0 = bn_is_lt_order_and_gt_zero_mask4(d_a);
+ uint64_t oneq0[4U] = { 0U };
+ oneq0[0U] = (uint64_t)1U;
+ oneq0[1U] = (uint64_t)0U;
+ oneq0[2U] = (uint64_t)0U;
+ oneq0[3U] = (uint64_t)0U;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = d_a;
+ uint64_t uu____0 = oneq0[i];
+ uint64_t x = uu____0 ^ (is_b_valid0 & (d_a[i] ^ uu____0));
+ os[i] = x;);
+ uint64_t is_sk_valid = is_b_valid0;
+ bn_from_bytes_be4(k_q, nonce);
+ uint64_t is_b_valid = bn_is_lt_order_and_gt_zero_mask4(k_q);
+ uint64_t oneq[4U] = { 0U };
+ oneq[0U] = (uint64_t)1U;
+ oneq[1U] = (uint64_t)0U;
+ oneq[2U] = (uint64_t)0U;
+ oneq[3U] = (uint64_t)0U;
+ KRML_MAYBE_FOR4(i,
+ (uint32_t)0U,
+ (uint32_t)4U,
+ (uint32_t)1U,
+ uint64_t *os = k_q;
+ uint64_t uu____1 = oneq[i];
+ uint64_t x = uu____1 ^ (is_b_valid & (k_q[i] ^ uu____1));
+ os[i] = x;);
+ uint64_t is_nonce_valid = is_b_valid;
+ uint64_t are_sk_nonce_valid = is_sk_valid & is_nonce_valid;
+ uint64_t p[12U] = { 0U };
+ point_mul_g(p, k_q);
+ to_aff_point_x(r_q, p);
+ qmod_short(r_q, r_q);
+ uint64_t kinv[4U] = { 0U };
+ qinv(kinv, k_q);
+ qmul(s_q, r_q, d_a);
+ from_qmont(m_q, m_q);
+ qadd(s_q, m_q, s_q);
+ qmul(s_q, kinv, s_q);
+ bn2_to_bytes_be4(signature, r_q, s_q);
+ uint64_t is_r_zero = bn_is_zero_mask4(r_q);
+ uint64_t is_s_zero = bn_is_zero_mask4(s_q);
+ uint64_t m = are_sk_nonce_valid & (~is_r_zero & ~is_s_zero);
+ bool res = m == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ return res;
+}
+
+/*******************************************************************************
+
+ Verified C library for ECDSA and ECDH functions over the P-256 NIST curve.
+
+ This module implements signing and verification, key validation, conversions
+ between various point representations, and ECDH key agreement.
+
+*******************************************************************************/
+
+/*****************/
+/* ECDSA signing */
+/*****************/
+
+/**
+Create an ECDSA signature WITHOUT hashing first.
+
+ This function is intended to receive a hash of the input.
+ For convenience, we recommend using one of the hash-and-sign combined functions above.
+
+ The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`).
+
+ NOTE: The equivalent functions in OpenSSL and Fiat-Crypto both accept inputs
+ smaller than 32 bytes. These libraries left-pad the input with enough zeroes to
+ reach the minimum 32 byte size. Clients who need behavior identical to OpenSSL
+ need to perform the left-padding themselves.
+
+ The function returns `true` for successful creation of an ECDSA signature and `false` otherwise.
+
+ The outparam `signature` (R || S) points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+ The arguments `private_key` and `nonce` point to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `private_key` and `nonce` are valid values:
+ • 0 < `private_key` < the order of the curve
+ • 0 < `nonce` < the order of the curve
+*/
+bool
+Hacl_P256_ecdsa_sign_p256_without_hash(
+ uint8_t *signature,
+ uint32_t msg_len,
+ uint8_t *msg,
+ uint8_t *private_key,
+ uint8_t *nonce)
+{
+ uint64_t m_q[4U] = { 0U };
+ uint8_t mHash[32U] = { 0U };
+ memcpy(mHash, msg, (uint32_t)32U * sizeof(uint8_t));
+ KRML_HOST_IGNORE(msg_len);
+ uint8_t *mHash32 = mHash;
+ bn_from_bytes_be4(m_q, mHash32);
+ qmod_short(m_q, m_q);
+ bool res = ecdsa_sign_msg_as_qelem(signature, m_q, private_key, nonce);
+ return res;
+}
+
+/**********************/
+/* ECDSA verification */
+/**********************/
+
+/**
+Verify an ECDSA signature WITHOUT hashing first.
+
+ This function is intended to receive a hash of the input.
+ For convenience, we recommend using one of the hash-and-verify combined functions above.
+
+ The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`).
+
+ The function returns `true` if the signature is valid and `false` otherwise.
+
+ The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+ The argument `public_key` (x || y) points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The arguments `signature_r` and `signature_s` point to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `public_key` is valid
+*/
+bool
+Hacl_P256_ecdsa_verif_without_hash(
+ uint32_t msg_len,
+ uint8_t *msg,
+ uint8_t *public_key,
+ uint8_t *signature_r,
+ uint8_t *signature_s)
+{
+ uint64_t m_q[4U] = { 0U };
+ uint8_t mHash[32U] = { 0U };
+ memcpy(mHash, msg, (uint32_t)32U * sizeof(uint8_t));
+ KRML_HOST_IGNORE(msg_len);
+ uint8_t *mHash32 = mHash;
+ bn_from_bytes_be4(m_q, mHash32);
+ qmod_short(m_q, m_q);
+ bool res = ecdsa_verify_msg_as_qelem(m_q, public_key, signature_r, signature_s);
+ return res;
+}
+
+/******************/
+/* Key validation */
+/******************/
+
+/**
+Public key validation.
+
+ The function returns `true` if a public key is valid and `false` otherwise.
+
+ The argument `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The public key (x || y) is valid (with respect to SP 800-56A):
+ • the public key is not the “point at infinity”, represented as O.
+ • the affine x and y coordinates of the point represented by the public key are
+ in the range [0, p – 1] where p is the prime defining the finite field.
+ • y^2 = x^3 + ax + b where a and b are the coefficients of the curve equation.
+ The last extract is taken from: https://neilmadden.blog/2017/05/17/so-how-do-you-validate-nist-ecdh-public-keys/
+*/
+bool
+Hacl_P256_validate_public_key(uint8_t *public_key)
+{
+ uint64_t point_jac[12U] = { 0U };
+ bool res = load_point_vartime(point_jac, public_key);
+ return res;
+}
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool
+Hacl_P256_validate_private_key(uint8_t *private_key)
+{
+ uint64_t bn_sk[4U] = { 0U };
+ bn_from_bytes_be4(bn_sk, private_key);
+ uint64_t res = bn_is_lt_order_and_gt_zero_mask4(bn_sk);
+ return res == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
+
+/*******************************************************************************
+ Parsing and Serializing public keys.
+
+ A public key is a point (x, y) on the P-256 NIST curve.
+
+ The point can be represented in the following three ways.
+ • raw = [ x || y ], 64 bytes
+ • uncompressed = [ 0x04 || x || y ], 65 bytes
+ • compressed = [ (0x02 for even `y` and 0x03 for odd `y`) || x ], 33 bytes
+
+*******************************************************************************/
+
+/**
+Convert a public key from uncompressed to its raw form.
+
+ The function returns `true` for successful conversion of a public key and `false` otherwise.
+
+ The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `pk` points to 65 bytes of valid memory, i.e., uint8_t[65].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+bool
+Hacl_P256_uncompressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
+{
+ uint8_t pk0 = pk[0U];
+ if (pk0 != (uint8_t)0x04U) {
+ return false;
+ }
+ memcpy(pk_raw, pk + (uint32_t)1U, (uint32_t)64U * sizeof(uint8_t));
+ return true;
+}
+
+/**
+Convert a public key from compressed to its raw form.
+
+ The function returns `true` for successful conversion of a public key and `false` otherwise.
+
+ The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
+
+ The function also checks whether (x, y) is a valid point.
+*/
+bool
+Hacl_P256_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw)
+{
+ uint64_t xa[4U] = { 0U };
+ uint64_t ya[4U] = { 0U };
+ uint8_t *pk_xb = pk + (uint32_t)1U;
+ bool b = aff_point_decompress_vartime(xa, ya, pk);
+ if (b) {
+ memcpy(pk_raw, pk_xb, (uint32_t)32U * sizeof(uint8_t));
+ bn_to_bytes_be4(pk_raw + (uint32_t)32U, ya);
+ }
+ return b;
+}
+
+/**
+Convert a public key from raw to its uncompressed form.
+
+ The outparam `pk` points to 65 bytes of valid memory, i.e., uint8_t[65].
+ The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+void
+Hacl_P256_raw_to_uncompressed(uint8_t *pk_raw, uint8_t *pk)
+{
+ pk[0U] = (uint8_t)0x04U;
+ memcpy(pk + (uint32_t)1U, pk_raw, (uint32_t)64U * sizeof(uint8_t));
+}
+
+/**
+Convert a public key from raw to its compressed form.
+
+ The outparam `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
+ The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+void
+Hacl_P256_raw_to_compressed(uint8_t *pk_raw, uint8_t *pk)
+{
+ uint8_t *pk_x = pk_raw;
+ uint8_t *pk_y = pk_raw + (uint32_t)32U;
+ uint64_t bn_f[4U] = { 0U };
+ bn_from_bytes_be4(bn_f, pk_y);
+ uint64_t is_odd_f = bn_f[0U] & (uint64_t)1U;
+ pk[0U] = (uint8_t)is_odd_f + (uint8_t)0x02U;
+ memcpy(pk + (uint32_t)1U, pk_x, (uint32_t)32U * sizeof(uint8_t));
+}
+
+/******************/
+/* ECDH agreement */
+/******************/
+
+/**
+Compute the public key from the private key.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The outparam `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve.
+*/
+bool
+Hacl_P256_dh_initiator(uint8_t *public_key, uint8_t *private_key)
+{
+ return Hacl_Impl_P256_DH_ecp256dh_i(public_key, private_key);
+}
+
+/**
+Execute the diffie-hellmann key exchange.
+
+ The function returns `true` for successful creation of an ECDH shared secret and
+ `false` otherwise.
+
+ The outparam `shared_secret` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `their_pubkey` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `private_key` and `their_pubkey` are valid.
+*/
+bool
+Hacl_P256_dh_responder(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t *private_key)
+{
+ return Hacl_Impl_P256_DH_ecp256dh_r(shared_secret, their_pubkey, private_key);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_P256.h b/security/nss/lib/freebl/verified/Hacl_P256.h
new file mode 100644
index 0000000000..a725df844e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P256.h
@@ -0,0 +1,237 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_P256_H
+#define __Hacl_P256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+#include "lib_intrinsics.h"
+
+/*******************************************************************************
+
+ Verified C library for ECDSA and ECDH functions over the P-256 NIST curve.
+
+ This module implements signing and verification, key validation, conversions
+ between various point representations, and ECDH key agreement.
+
+*******************************************************************************/
+
+/*****************/
+/* ECDSA signing */
+/*****************/
+
+/**
+Create an ECDSA signature WITHOUT hashing first.
+
+ This function is intended to receive a hash of the input.
+ For convenience, we recommend using one of the hash-and-sign combined functions above.
+
+ The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`).
+
+ NOTE: The equivalent functions in OpenSSL and Fiat-Crypto both accept inputs
+ smaller than 32 bytes. These libraries left-pad the input with enough zeroes to
+ reach the minimum 32 byte size. Clients who need behavior identical to OpenSSL
+ need to perform the left-padding themselves.
+
+ The function returns `true` for successful creation of an ECDSA signature and `false` otherwise.
+
+ The outparam `signature` (R || S) points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+ The arguments `private_key` and `nonce` point to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `private_key` and `nonce` are valid values:
+ • 0 < `private_key` < the order of the curve
+ • 0 < `nonce` < the order of the curve
+*/
+bool
+Hacl_P256_ecdsa_sign_p256_without_hash(
+ uint8_t *signature,
+ uint32_t msg_len,
+ uint8_t *msg,
+ uint8_t *private_key,
+ uint8_t *nonce);
+
+/**********************/
+/* ECDSA verification */
+/**********************/
+
+/**
+Verify an ECDSA signature WITHOUT hashing first.
+
+ This function is intended to receive a hash of the input.
+ For convenience, we recommend using one of the hash-and-verify combined functions above.
+
+ The argument `msg` MUST be at least 32 bytes (i.e. `msg_len >= 32`).
+
+ The function returns `true` if the signature is valid and `false` otherwise.
+
+ The argument `msg` points to `msg_len` bytes of valid memory, i.e., uint8_t[msg_len].
+ The argument `public_key` (x || y) points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The arguments `signature_r` and `signature_s` point to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `public_key` is valid
+*/
+bool
+Hacl_P256_ecdsa_verif_without_hash(
+ uint32_t msg_len,
+ uint8_t *msg,
+ uint8_t *public_key,
+ uint8_t *signature_r,
+ uint8_t *signature_s);
+
+/******************/
+/* Key validation */
+/******************/
+
+/**
+Public key validation.
+
+ The function returns `true` if a public key is valid and `false` otherwise.
+
+ The argument `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The public key (x || y) is valid (with respect to SP 800-56A):
+ • the public key is not the “point at infinity”, represented as O.
+ • the affine x and y coordinates of the point represented by the public key are
+ in the range [0, p – 1] where p is the prime defining the finite field.
+ • y^2 = x^3 + ax + b where a and b are the coefficients of the curve equation.
+ The last extract is taken from: https://neilmadden.blog/2017/05/17/so-how-do-you-validate-nist-ecdh-public-keys/
+*/
+bool Hacl_P256_validate_public_key(uint8_t *public_key);
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool Hacl_P256_validate_private_key(uint8_t *private_key);
+
+/*******************************************************************************
+ Parsing and Serializing public keys.
+
+ A public key is a point (x, y) on the P-256 NIST curve.
+
+ The point can be represented in the following three ways.
+ • raw = [ x || y ], 64 bytes
+ • uncompressed = [ 0x04 || x || y ], 65 bytes
+ • compressed = [ (0x02 for even `y` and 0x03 for odd `y`) || x ], 33 bytes
+
+*******************************************************************************/
+
+/**
+Convert a public key from uncompressed to its raw form.
+
+ The function returns `true` for successful conversion of a public key and `false` otherwise.
+
+ The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `pk` points to 65 bytes of valid memory, i.e., uint8_t[65].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+bool Hacl_P256_uncompressed_to_raw(uint8_t *pk, uint8_t *pk_raw);
+
+/**
+Convert a public key from compressed to its raw form.
+
+ The function returns `true` for successful conversion of a public key and `false` otherwise.
+
+ The outparam `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
+
+ The function also checks whether (x, y) is a valid point.
+*/
+bool Hacl_P256_compressed_to_raw(uint8_t *pk, uint8_t *pk_raw);
+
+/**
+Convert a public key from raw to its uncompressed form.
+
+ The outparam `pk` points to 65 bytes of valid memory, i.e., uint8_t[65].
+ The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+void Hacl_P256_raw_to_uncompressed(uint8_t *pk_raw, uint8_t *pk);
+
+/**
+Convert a public key from raw to its compressed form.
+
+ The outparam `pk` points to 33 bytes of valid memory, i.e., uint8_t[33].
+ The argument `pk_raw` points to 64 bytes of valid memory, i.e., uint8_t[64].
+
+ The function DOESN'T check whether (x, y) is a valid point.
+*/
+void Hacl_P256_raw_to_compressed(uint8_t *pk_raw, uint8_t *pk);
+
+/******************/
+/* ECDH agreement */
+/******************/
+
+/**
+Compute the public key from the private key.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The outparam `public_key` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve.
+*/
+bool Hacl_P256_dh_initiator(uint8_t *public_key, uint8_t *private_key);
+
+/**
+Execute the diffie-hellmann key exchange.
+
+ The function returns `true` for successful creation of an ECDH shared secret and
+ `false` otherwise.
+
+ The outparam `shared_secret` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `their_pubkey` points to 64 bytes of valid memory, i.e., uint8_t[64].
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The function also checks whether `private_key` and `their_pubkey` are valid.
+*/
+bool
+Hacl_P256_dh_responder(uint8_t *shared_secret, uint8_t *their_pubkey, uint8_t *private_key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_P256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_P384.c b/security/nss/lib/freebl/verified/Hacl_P384.c
new file mode 100644
index 0000000000..bd06958f2b
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P384.c
@@ -0,0 +1,126 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_P384.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum_Base.h"
+
+static inline uint64_t
+bn_is_eq_mask(uint64_t *x, uint64_t *y)
+{
+ uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ KRML_MAYBE_FOR6(i,
+ (uint32_t)0U,
+ (uint32_t)6U,
+ (uint32_t)1U,
+ uint64_t uu____0 = FStar_UInt64_eq_mask(x[i], y[i]);
+ mask = uu____0 & mask;);
+ uint64_t mask1 = mask;
+ return mask1;
+}
+
+static inline uint64_t
+bn_sub(uint64_t *a, uint64_t *b, uint64_t *c)
+{
+ uint64_t c1 = (uint64_t)0U;
+ {
+ uint64_t t1 = b[(uint32_t)4U * (uint32_t)0U];
+ uint64_t t20 = c[(uint32_t)4U * (uint32_t)0U];
+ uint64_t *res_i0 = a + (uint32_t)4U * (uint32_t)0U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
+ uint64_t t10 = b[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t t21 = c[(uint32_t)4U * (uint32_t)0U + (uint32_t)1U];
+ uint64_t *res_i1 = a + (uint32_t)4U * (uint32_t)0U + (uint32_t)1U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
+ uint64_t t11 = b[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t t22 = c[(uint32_t)4U * (uint32_t)0U + (uint32_t)2U];
+ uint64_t *res_i2 = a + (uint32_t)4U * (uint32_t)0U + (uint32_t)2U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
+ uint64_t t12 = b[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t t2 = c[(uint32_t)4U * (uint32_t)0U + (uint32_t)3U];
+ uint64_t *res_i = a + (uint32_t)4U * (uint32_t)0U + (uint32_t)3U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i);
+ }
+ KRML_MAYBE_FOR2(i,
+ (uint32_t)4U,
+ (uint32_t)6U,
+ (uint32_t)1U,
+ uint64_t t1 = b[i];
+ uint64_t t2 = c[i];
+ uint64_t *res_i = a + i;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i););
+ uint64_t c10 = c1;
+ return c10;
+}
+
+static inline void
+bn_from_bytes_be(uint64_t *a, uint8_t *b)
+{
+ KRML_MAYBE_FOR6(i,
+ (uint32_t)0U,
+ (uint32_t)6U,
+ (uint32_t)1U,
+ uint64_t *os = a;
+ uint64_t u = load64_be(b + ((uint32_t)6U - i - (uint32_t)1U) * (uint32_t)8U);
+ uint64_t x = u;
+ os[i] = x;);
+}
+
+static inline void
+p384_make_order(uint64_t *n)
+{
+ n[0U] = (uint64_t)0xecec196accc52973U;
+ n[1U] = (uint64_t)0x581a0db248b0a77aU;
+ n[2U] = (uint64_t)0xc7634d81f4372ddfU;
+ n[3U] = (uint64_t)0xffffffffffffffffU;
+ n[4U] = (uint64_t)0xffffffffffffffffU;
+ n[5U] = (uint64_t)0xffffffffffffffffU;
+}
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 48 bytes of valid memory, i.e., uint8_t[48].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool
+Hacl_P384_validate_private_key(uint8_t *private_key)
+{
+ uint64_t bn_sk[6U] = { 0U };
+ bn_from_bytes_be(bn_sk, private_key);
+ uint64_t tmp[6U] = { 0U };
+ p384_make_order(tmp);
+ uint64_t c = bn_sub(tmp, bn_sk, tmp);
+ uint64_t is_lt_order = (uint64_t)0U - c;
+ uint64_t bn_zero[6U] = { 0U };
+ uint64_t res = bn_is_eq_mask(bn_sk, bn_zero);
+ uint64_t is_eq_zero = res;
+ uint64_t res0 = is_lt_order & ~is_eq_zero;
+ return res0 == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_P384.h b/security/nss/lib/freebl/verified/Hacl_P384.h
new file mode 100644
index 0000000000..4109947e6b
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P384.h
@@ -0,0 +1,68 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_P384_H
+#define __Hacl_P384_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+
+#include "lib_intrinsics.h"
+
+/*******************************************************************************
+
+ Verified C library for ECDSA and ECDH functions over the P-384 NIST curve.
+
+ This module implements signing and verification, key validation, conversions
+ between various point representations, and ECDH key agreement.
+
+*******************************************************************************/
+
+/******************/
+/* Key validation */
+/******************/
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 32 bytes of valid memory, i.e., uint8_t[32].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool Hacl_P384_validate_private_key(uint8_t *private_key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_P384_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_P521.c b/security/nss/lib/freebl/verified/Hacl_P521.c
new file mode 100644
index 0000000000..481a64b96f
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P521.c
@@ -0,0 +1,131 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_P521.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum_Base.h"
+
+static inline uint64_t
+bn_is_eq_mask(uint64_t *x, uint64_t *y)
+{
+ uint64_t mask = (uint64_t)0xFFFFFFFFFFFFFFFFU;
+ KRML_MAYBE_FOR9(i,
+ (uint32_t)0U,
+ (uint32_t)9U,
+ (uint32_t)1U,
+ uint64_t uu____0 = FStar_UInt64_eq_mask(x[i], y[i]);
+ mask = uu____0 & mask;);
+ uint64_t mask1 = mask;
+ return mask1;
+}
+
+static inline uint64_t
+bn_sub(uint64_t *a, uint64_t *b, uint64_t *c)
+{
+ uint64_t c1 = (uint64_t)0U;
+ KRML_MAYBE_FOR2(i,
+ (uint32_t)0U,
+ (uint32_t)2U,
+ (uint32_t)1U,
+ uint64_t t1 = b[(uint32_t)4U * i];
+ uint64_t t20 = c[(uint32_t)4U * i];
+ uint64_t *res_i0 = a + (uint32_t)4U * i;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t20, res_i0);
+ uint64_t t10 = b[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t t21 = c[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = a + (uint32_t)4U * i + (uint32_t)1U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t10, t21, res_i1);
+ uint64_t t11 = b[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t t22 = c[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = a + (uint32_t)4U * i + (uint32_t)2U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t11, t22, res_i2);
+ uint64_t t12 = b[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t t2 = c[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = a + (uint32_t)4U * i + (uint32_t)3U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t12, t2, res_i););
+ {
+ uint64_t t1 = b[8U];
+ uint64_t t2 = c[8U];
+ uint64_t *res_i = a + (uint32_t)8U;
+ c1 = Lib_IntTypes_Intrinsics_sub_borrow_u64(c1, t1, t2, res_i);
+ }
+ uint64_t c10 = c1;
+ return c10;
+}
+
+static inline void
+bn_from_bytes_be(uint64_t *a, uint8_t *b)
+{
+ uint8_t tmp[72U] = { 0U };
+ memcpy(tmp + (uint32_t)6U, b, (uint32_t)66U * sizeof(uint8_t));
+ KRML_MAYBE_FOR9(i,
+ (uint32_t)0U,
+ (uint32_t)9U,
+ (uint32_t)1U,
+ uint64_t *os = a;
+ uint64_t u = load64_be(tmp + ((uint32_t)9U - i - (uint32_t)1U) * (uint32_t)8U);
+ uint64_t x = u;
+ os[i] = x;);
+}
+
+static inline void
+p521_make_order(uint64_t *n)
+{
+ n[0U] = (uint64_t)0xbb6fb71e91386409U;
+ n[1U] = (uint64_t)0x3bb5c9b8899c47aeU;
+ n[2U] = (uint64_t)0x7fcc0148f709a5d0U;
+ n[3U] = (uint64_t)0x51868783bf2f966bU;
+ n[4U] = (uint64_t)0xfffffffffffffffaU;
+ n[5U] = (uint64_t)0xffffffffffffffffU;
+ n[6U] = (uint64_t)0xffffffffffffffffU;
+ n[7U] = (uint64_t)0xffffffffffffffffU;
+ n[8U] = (uint64_t)0x1ffU;
+}
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool
+Hacl_P521_validate_private_key(uint8_t *private_key)
+{
+ uint64_t bn_sk[9U] = { 0U };
+ bn_from_bytes_be(bn_sk, private_key);
+ uint64_t tmp[9U] = { 0U };
+ p521_make_order(tmp);
+ uint64_t c = bn_sub(tmp, bn_sk, tmp);
+ uint64_t is_lt_order = (uint64_t)0U - c;
+ uint64_t bn_zero[9U] = { 0U };
+ uint64_t res = bn_is_eq_mask(bn_sk, bn_zero);
+ uint64_t is_eq_zero = res;
+ uint64_t res0 = is_lt_order & ~is_eq_zero;
+ return res0 == (uint64_t)0xFFFFFFFFFFFFFFFFU;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_P521.h b/security/nss/lib/freebl/verified/Hacl_P521.h
new file mode 100644
index 0000000000..d7afebdc28
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_P521.h
@@ -0,0 +1,59 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_P521_H
+#define __Hacl_P521_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+
+#include "lib_intrinsics.h"
+
+/******************/
+/* Key validation */
+/******************/
+
+/**
+Private key validation.
+
+ The function returns `true` if a private key is valid and `false` otherwise.
+
+ The argument `private_key` points to 66 bytes of valid memory, i.e., uint8_t[66].
+
+ The private key is valid:
+ • 0 < `private_key` < the order of the curve
+*/
+bool Hacl_P521_validate_private_key(uint8_t *private_key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_P521_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c
new file mode 100644
index 0000000000..41de9bc0d6
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c
@@ -0,0 +1,1617 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_Poly1305_128.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b)
+{
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(b);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load64_le(b + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f10 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f02 = f00;
+ Lib_IntVector_Intrinsics_vec128 f12 = f10;
+ Lib_IntVector_Intrinsics_vec128 f22 = f20;
+ Lib_IntVector_Intrinsics_vec128 f32 = f30;
+ Lib_IntVector_Intrinsics_vec128 f42 = f40;
+ e[0U] = f02;
+ e[1U] = f12;
+ e[2U] = f22;
+ e[3U] = f32;
+ e[4U] = f42;
+ uint64_t b10 = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b10);
+ Lib_IntVector_Intrinsics_vec128 f43 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f43, mask);
+ Lib_IntVector_Intrinsics_vec128 acc0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 acc1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 acc2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 acc3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 acc4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 e0 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 e1 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 e2 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 e3 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 e4 = e[4U];
+ Lib_IntVector_Intrinsics_vec128
+ f0 = Lib_IntVector_Intrinsics_vec128_insert64(acc0, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f1 = Lib_IntVector_Intrinsics_vec128_insert64(acc1, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f2 = Lib_IntVector_Intrinsics_vec128_insert64(acc2, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f3 = Lib_IntVector_Intrinsics_vec128_insert64(acc3, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f4 = Lib_IntVector_Intrinsics_vec128_insert64(acc4, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128 f01 = Lib_IntVector_Intrinsics_vec128_add64(f0, e0);
+ Lib_IntVector_Intrinsics_vec128 f11 = Lib_IntVector_Intrinsics_vec128_add64(f1, e1);
+ Lib_IntVector_Intrinsics_vec128 f21 = Lib_IntVector_Intrinsics_vec128_add64(f2, e2);
+ Lib_IntVector_Intrinsics_vec128 f31 = Lib_IntVector_Intrinsics_vec128_add64(f3, e3);
+ Lib_IntVector_Intrinsics_vec128 f41 = Lib_IntVector_Intrinsics_vec128_add64(f4, e4);
+ Lib_IntVector_Intrinsics_vec128 acc01 = f01;
+ Lib_IntVector_Intrinsics_vec128 acc11 = f11;
+ Lib_IntVector_Intrinsics_vec128 acc21 = f21;
+ Lib_IntVector_Intrinsics_vec128 acc31 = f31;
+ Lib_IntVector_Intrinsics_vec128 acc41 = f41;
+ acc[0U] = acc01;
+ acc[1U] = acc11;
+ acc[2U] = acc21;
+ acc[3U] = acc31;
+ acc[4U] = acc41;
+}
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(
+ Lib_IntVector_Intrinsics_vec128 *out,
+ Lib_IntVector_Intrinsics_vec128 *p)
+{
+ Lib_IntVector_Intrinsics_vec128 *r = p;
+ Lib_IntVector_Intrinsics_vec128 *r2 = p + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 a0 = out[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = out[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = out[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = out[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = out[4U];
+ Lib_IntVector_Intrinsics_vec128 r10 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r12 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r13 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r14 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r20 = r2[0U];
+ Lib_IntVector_Intrinsics_vec128 r21 = r2[1U];
+ Lib_IntVector_Intrinsics_vec128 r22 = r2[2U];
+ Lib_IntVector_Intrinsics_vec128 r23 = r2[3U];
+ Lib_IntVector_Intrinsics_vec128 r24 = r2[4U];
+ Lib_IntVector_Intrinsics_vec128
+ r201 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r20, r10);
+ Lib_IntVector_Intrinsics_vec128
+ r211 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r21, r11);
+ Lib_IntVector_Intrinsics_vec128
+ r221 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r22, r12);
+ Lib_IntVector_Intrinsics_vec128
+ r231 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r23, r13);
+ Lib_IntVector_Intrinsics_vec128
+ r241 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r24, r14);
+ Lib_IntVector_Intrinsics_vec128
+ r251 = Lib_IntVector_Intrinsics_vec128_smul64(r211, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r252 = Lib_IntVector_Intrinsics_vec128_smul64(r221, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r253 = Lib_IntVector_Intrinsics_vec128_smul64(r231, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r254 = Lib_IntVector_Intrinsics_vec128_smul64(r241, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_mul64(r201, a0);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_mul64(r211, a0);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_mul64(r221, a0);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_mul64(r231, a0);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_mul64(r241, a0);
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r221, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r231, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r221, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r252, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r251, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r252, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a4));
+ Lib_IntVector_Intrinsics_vec128 t0 = a05;
+ Lib_IntVector_Intrinsics_vec128 t1 = a15;
+ Lib_IntVector_Intrinsics_vec128 t2 = a25;
+ Lib_IntVector_Intrinsics_vec128 t3 = a35;
+ Lib_IntVector_Intrinsics_vec128 t4 = a45;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ Lib_IntVector_Intrinsics_vec128
+ o01 =
+ Lib_IntVector_Intrinsics_vec128_add64(o0,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o0, o0));
+ Lib_IntVector_Intrinsics_vec128
+ o11 =
+ Lib_IntVector_Intrinsics_vec128_add64(o10,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o10, o10));
+ Lib_IntVector_Intrinsics_vec128
+ o21 =
+ Lib_IntVector_Intrinsics_vec128_add64(o20,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o20, o20));
+ Lib_IntVector_Intrinsics_vec128
+ o31 =
+ Lib_IntVector_Intrinsics_vec128_add64(o30,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o30, o30));
+ Lib_IntVector_Intrinsics_vec128
+ o41 =
+ Lib_IntVector_Intrinsics_vec128_add64(o40,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o40, o40));
+ Lib_IntVector_Intrinsics_vec128
+ l = Lib_IntVector_Intrinsics_vec128_add64(o01, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec128_and(l,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(o11, c0);
+ Lib_IntVector_Intrinsics_vec128
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec128_and(l0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(o21, c1);
+ Lib_IntVector_Intrinsics_vec128
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec128_and(l1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(o31, c2);
+ Lib_IntVector_Intrinsics_vec128
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec128_and(l2,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(o41, c3);
+ Lib_IntVector_Intrinsics_vec128
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec128_and(l3,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ o00 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 o1 = tmp1;
+ Lib_IntVector_Intrinsics_vec128 o2 = tmp2;
+ Lib_IntVector_Intrinsics_vec128 o3 = tmp3;
+ Lib_IntVector_Intrinsics_vec128 o4 = tmp4;
+ out[0U] = o00;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+void
+Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[1U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[2U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[3U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[4U] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn_5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r_vec0 = Lib_IntVector_Intrinsics_vec128_load64(lo1);
+ Lib_IntVector_Intrinsics_vec128 r_vec1 = Lib_IntVector_Intrinsics_vec128_load64(hi1);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(r_vec0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(r_vec1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ Lib_IntVector_Intrinsics_vec128 f200 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 f210 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 f220 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 f230 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 f240 = r[4U];
+ r5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f200, (uint64_t)5U);
+ r5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f210, (uint64_t)5U);
+ r5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f220, (uint64_t)5U);
+ r5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f230, (uint64_t)5U);
+ r5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f240, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, Lib_IntVector_Intrinsics_vec128_mul64(r0, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, Lib_IntVector_Intrinsics_vec128_mul64(r1, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, Lib_IntVector_Intrinsics_vec128_mul64(r2, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, Lib_IntVector_Intrinsics_vec128_mul64(r3, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f14));
+ Lib_IntVector_Intrinsics_vec128 t0 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ rn[0U] = o0;
+ rn[1U] = o1;
+ rn[2U] = o2;
+ rn[3U] = o3;
+ rn[4U] = o4;
+ Lib_IntVector_Intrinsics_vec128 f201 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f201, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f21, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f22, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f23, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f24, (uint64_t)5U);
+}
+
+void
+Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_128_poly1305_update(
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t len,
+ uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ uint32_t sz_block = (uint32_t)32U;
+ uint32_t len0 = len / sz_block * sz_block;
+ uint8_t *t0 = text;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)32U;
+ uint8_t *text0 = t0;
+ Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t0 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f25 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f25;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f0;
+ e[1U] = f1;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f110 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f120 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f130 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f140 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a11 =
+ Lib_IntVector_Intrinsics_vec128_add64(a1,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a21 =
+ Lib_IntVector_Intrinsics_vec128_add64(a2,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a31 =
+ Lib_IntVector_Intrinsics_vec128_add64(a3,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a41 =
+ Lib_IntVector_Intrinsics_vec128_add64(a4,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec128 t01 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o00 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ acc[0U] = o00;
+ acc[1U] = o10;
+ acc[2U] = o20;
+ acc[3U] = o30;
+ acc[4U] = o40;
+ Lib_IntVector_Intrinsics_vec128 f100 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24);
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc, pre);
+ }
+ uint32_t len1 = len - len0;
+ uint8_t *t1 = text + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t1 + i * (uint32_t)16U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = t1 + nb * (uint32_t)16U;
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_128_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec128 *ctx)
+{
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 f0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f23 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f33 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f40 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128
+ l0 = Lib_IntVector_Intrinsics_vec128_add64(f0, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp00 =
+ Lib_IntVector_Intrinsics_vec128_and(l0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c00 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(f13, c00);
+ Lib_IntVector_Intrinsics_vec128
+ tmp10 =
+ Lib_IntVector_Intrinsics_vec128_and(l1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c10 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(f23, c10);
+ Lib_IntVector_Intrinsics_vec128
+ tmp20 =
+ Lib_IntVector_Intrinsics_vec128_and(l2,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c20 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(f33, c20);
+ Lib_IntVector_Intrinsics_vec128
+ tmp30 =
+ Lib_IntVector_Intrinsics_vec128_and(l3,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c30 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l4 = Lib_IntVector_Intrinsics_vec128_add64(f40, c30);
+ Lib_IntVector_Intrinsics_vec128
+ tmp40 =
+ Lib_IntVector_Intrinsics_vec128_and(l4,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c40 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp00,
+ Lib_IntVector_Intrinsics_vec128_smul64(c40, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 f110 = tmp10;
+ Lib_IntVector_Intrinsics_vec128 f210 = tmp20;
+ Lib_IntVector_Intrinsics_vec128 f310 = tmp30;
+ Lib_IntVector_Intrinsics_vec128 f410 = tmp40;
+ Lib_IntVector_Intrinsics_vec128
+ l = Lib_IntVector_Intrinsics_vec128_add64(f010, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec128_and(l,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l5 = Lib_IntVector_Intrinsics_vec128_add64(f110, c0);
+ Lib_IntVector_Intrinsics_vec128
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec128_and(l5,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l5, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l6 = Lib_IntVector_Intrinsics_vec128_add64(f210, c1);
+ Lib_IntVector_Intrinsics_vec128
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec128_and(l6,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l6, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l7 = Lib_IntVector_Intrinsics_vec128_add64(f310, c2);
+ Lib_IntVector_Intrinsics_vec128
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec128_and(l7,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l7, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l8 = Lib_IntVector_Intrinsics_vec128_add64(f410, c3);
+ Lib_IntVector_Intrinsics_vec128
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec128_and(l8,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l8, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ f02 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 f12 = tmp1;
+ Lib_IntVector_Intrinsics_vec128 f22 = tmp2;
+ Lib_IntVector_Intrinsics_vec128 f32 = tmp3;
+ Lib_IntVector_Intrinsics_vec128 f42 = tmp4;
+ Lib_IntVector_Intrinsics_vec128
+ mh = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ ml = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffffbU);
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_eq64(f42, mh);
+ Lib_IntVector_Intrinsics_vec128
+ mask1 =
+ Lib_IntVector_Intrinsics_vec128_and(mask,
+ Lib_IntVector_Intrinsics_vec128_eq64(f32, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask2 =
+ Lib_IntVector_Intrinsics_vec128_and(mask1,
+ Lib_IntVector_Intrinsics_vec128_eq64(f22, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask3 =
+ Lib_IntVector_Intrinsics_vec128_and(mask2,
+ Lib_IntVector_Intrinsics_vec128_eq64(f12, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask4 =
+ Lib_IntVector_Intrinsics_vec128_and(mask3,
+ Lib_IntVector_Intrinsics_vec128_lognot(Lib_IntVector_Intrinsics_vec128_gt64(ml, f02)));
+ Lib_IntVector_Intrinsics_vec128 ph = Lib_IntVector_Intrinsics_vec128_and(mask4, mh);
+ Lib_IntVector_Intrinsics_vec128 pl = Lib_IntVector_Intrinsics_vec128_and(mask4, ml);
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_sub64(f02, pl);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_sub64(f12, ph);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_sub64(f22, ph);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_sub64(f32, ph);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_sub64(f42, ph);
+ Lib_IntVector_Intrinsics_vec128 f011 = o0;
+ Lib_IntVector_Intrinsics_vec128 f111 = o1;
+ Lib_IntVector_Intrinsics_vec128 f211 = o2;
+ Lib_IntVector_Intrinsics_vec128 f311 = o3;
+ Lib_IntVector_Intrinsics_vec128 f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ Lib_IntVector_Intrinsics_vec128 f00 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f4 = acc[4U];
+ uint64_t f01 = Lib_IntVector_Intrinsics_vec128_extract64(f00, (uint32_t)0U);
+ uint64_t f112 = Lib_IntVector_Intrinsics_vec128_extract64(f1, (uint32_t)0U);
+ uint64_t f212 = Lib_IntVector_Intrinsics_vec128_extract64(f2, (uint32_t)0U);
+ uint64_t f312 = Lib_IntVector_Intrinsics_vec128_extract64(f3, (uint32_t)0U);
+ uint64_t f41 = Lib_IntVector_Intrinsics_vec128_extract64(f4, (uint32_t)0U);
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ KRML_PRE_ALIGN(16)
+ Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U };
+ Hacl_Poly1305_128_poly1305_init(ctx, key);
+ Hacl_Poly1305_128_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_128_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h
new file mode 100644
index 0000000000..7108ba0a60
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h
@@ -0,0 +1,64 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Poly1305_128_H
+#define __Hacl_Poly1305_128_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "libintvector.h"
+
+typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx;
+
+void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key);
+
+void Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text);
+
+void
+Hacl_Poly1305_128_poly1305_update(
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t len,
+ uint8_t *text);
+
+void
+Hacl_Poly1305_128_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec128 *ctx);
+
+void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Poly1305_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c
new file mode 100644
index 0000000000..612e3be33c
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c
@@ -0,0 +1,2088 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "internal/Hacl_Poly1305_256.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b)
+{
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(b);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load64_le(b + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256 t0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256 t3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t2 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t2, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ t1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t1, mask26);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o0 = o5;
+ Lib_IntVector_Intrinsics_vec256 o1 = o10;
+ Lib_IntVector_Intrinsics_vec256 o2 = o20;
+ Lib_IntVector_Intrinsics_vec256 o3 = o30;
+ Lib_IntVector_Intrinsics_vec256 o4 = o40;
+ e[0U] = o0;
+ e[1U] = o1;
+ e[2U] = o2;
+ e[3U] = o3;
+ e[4U] = o4;
+ uint64_t b1 = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b1);
+ Lib_IntVector_Intrinsics_vec256 f40 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f40, mask);
+ Lib_IntVector_Intrinsics_vec256 acc0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 acc1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 acc2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 acc3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 acc4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 e0 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 e1 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 e2 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 e3 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 e4 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 r0 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r1 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r2 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r3 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r4 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256
+ r01 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r0,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc0, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r11 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r1,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc1, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r21 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r2,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc2, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r31 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r3,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc3, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r41 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r4,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc4, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_add64(r01, e0);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_add64(r11, e1);
+ Lib_IntVector_Intrinsics_vec256 f2 = Lib_IntVector_Intrinsics_vec256_add64(r21, e2);
+ Lib_IntVector_Intrinsics_vec256 f3 = Lib_IntVector_Intrinsics_vec256_add64(r31, e3);
+ Lib_IntVector_Intrinsics_vec256 f4 = Lib_IntVector_Intrinsics_vec256_add64(r41, e4);
+ Lib_IntVector_Intrinsics_vec256 acc01 = f0;
+ Lib_IntVector_Intrinsics_vec256 acc11 = f1;
+ Lib_IntVector_Intrinsics_vec256 acc21 = f2;
+ Lib_IntVector_Intrinsics_vec256 acc31 = f3;
+ Lib_IntVector_Intrinsics_vec256 acc41 = f4;
+ acc[0U] = acc01;
+ acc[1U] = acc11;
+ acc[2U] = acc21;
+ acc[3U] = acc31;
+ acc[4U] = acc41;
+}
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(
+ Lib_IntVector_Intrinsics_vec256 *out,
+ Lib_IntVector_Intrinsics_vec256 *p)
+{
+ Lib_IntVector_Intrinsics_vec256 *r = p;
+ Lib_IntVector_Intrinsics_vec256 *r_5 = p + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *r4 = p + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 a0 = out[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = out[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = out[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = out[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = out[4U];
+ Lib_IntVector_Intrinsics_vec256 r10 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r12 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r13 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r14 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r151 = r_5[1U];
+ Lib_IntVector_Intrinsics_vec256 r152 = r_5[2U];
+ Lib_IntVector_Intrinsics_vec256 r153 = r_5[3U];
+ Lib_IntVector_Intrinsics_vec256 r154 = r_5[4U];
+ Lib_IntVector_Intrinsics_vec256 r40 = r4[0U];
+ Lib_IntVector_Intrinsics_vec256 r41 = r4[1U];
+ Lib_IntVector_Intrinsics_vec256 r42 = r4[2U];
+ Lib_IntVector_Intrinsics_vec256 r43 = r4[3U];
+ Lib_IntVector_Intrinsics_vec256 r44 = r4[4U];
+ Lib_IntVector_Intrinsics_vec256 a010 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r10);
+ Lib_IntVector_Intrinsics_vec256 a110 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r10);
+ Lib_IntVector_Intrinsics_vec256 a210 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r10);
+ Lib_IntVector_Intrinsics_vec256 a310 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r10);
+ Lib_IntVector_Intrinsics_vec256 a410 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r10);
+ Lib_IntVector_Intrinsics_vec256
+ a020 =
+ Lib_IntVector_Intrinsics_vec256_add64(a010,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a120 =
+ Lib_IntVector_Intrinsics_vec256_add64(a110,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a220 =
+ Lib_IntVector_Intrinsics_vec256_add64(a210,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a320 =
+ Lib_IntVector_Intrinsics_vec256_add64(a310,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a420 =
+ Lib_IntVector_Intrinsics_vec256_add64(a410,
+ Lib_IntVector_Intrinsics_vec256_mul64(r13, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a030 =
+ Lib_IntVector_Intrinsics_vec256_add64(a020,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a130 =
+ Lib_IntVector_Intrinsics_vec256_add64(a120,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a230 =
+ Lib_IntVector_Intrinsics_vec256_add64(a220,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a330 =
+ Lib_IntVector_Intrinsics_vec256_add64(a320,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a430 =
+ Lib_IntVector_Intrinsics_vec256_add64(a420,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a040 =
+ Lib_IntVector_Intrinsics_vec256_add64(a030,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a140 =
+ Lib_IntVector_Intrinsics_vec256_add64(a130,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a240 =
+ Lib_IntVector_Intrinsics_vec256_add64(a230,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a340 =
+ Lib_IntVector_Intrinsics_vec256_add64(a330,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a440 =
+ Lib_IntVector_Intrinsics_vec256_add64(a430,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a050 =
+ Lib_IntVector_Intrinsics_vec256_add64(a040,
+ Lib_IntVector_Intrinsics_vec256_mul64(r151, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a150 =
+ Lib_IntVector_Intrinsics_vec256_add64(a140,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a250 =
+ Lib_IntVector_Intrinsics_vec256_add64(a240,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a350 =
+ Lib_IntVector_Intrinsics_vec256_add64(a340,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a450 =
+ Lib_IntVector_Intrinsics_vec256_add64(a440,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r14));
+ Lib_IntVector_Intrinsics_vec256 t00 = a050;
+ Lib_IntVector_Intrinsics_vec256 t10 = a150;
+ Lib_IntVector_Intrinsics_vec256 t20 = a250;
+ Lib_IntVector_Intrinsics_vec256 t30 = a350;
+ Lib_IntVector_Intrinsics_vec256 t40 = a450;
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260);
+ Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260);
+ Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00);
+ Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10);
+ Lib_IntVector_Intrinsics_vec256
+ z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5);
+ Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260);
+ Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260);
+ Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010);
+ Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260);
+ Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260);
+ Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020);
+ Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130);
+ Lib_IntVector_Intrinsics_vec256
+ z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260);
+ Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030);
+ Lib_IntVector_Intrinsics_vec256 r20 = x020;
+ Lib_IntVector_Intrinsics_vec256 r21 = x120;
+ Lib_IntVector_Intrinsics_vec256 r22 = x210;
+ Lib_IntVector_Intrinsics_vec256 r23 = x320;
+ Lib_IntVector_Intrinsics_vec256 r24 = x420;
+ Lib_IntVector_Intrinsics_vec256 a011 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r20);
+ Lib_IntVector_Intrinsics_vec256 a111 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r20);
+ Lib_IntVector_Intrinsics_vec256 a211 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r20);
+ Lib_IntVector_Intrinsics_vec256 a311 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r20);
+ Lib_IntVector_Intrinsics_vec256 a411 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r20);
+ Lib_IntVector_Intrinsics_vec256
+ a021 =
+ Lib_IntVector_Intrinsics_vec256_add64(a011,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a121 =
+ Lib_IntVector_Intrinsics_vec256_add64(a111,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a221 =
+ Lib_IntVector_Intrinsics_vec256_add64(a211,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a321 =
+ Lib_IntVector_Intrinsics_vec256_add64(a311,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a421 =
+ Lib_IntVector_Intrinsics_vec256_add64(a411,
+ Lib_IntVector_Intrinsics_vec256_mul64(r13, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a031 =
+ Lib_IntVector_Intrinsics_vec256_add64(a021,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a131 =
+ Lib_IntVector_Intrinsics_vec256_add64(a121,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a231 =
+ Lib_IntVector_Intrinsics_vec256_add64(a221,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a331 =
+ Lib_IntVector_Intrinsics_vec256_add64(a321,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a431 =
+ Lib_IntVector_Intrinsics_vec256_add64(a421,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a041 =
+ Lib_IntVector_Intrinsics_vec256_add64(a031,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a141 =
+ Lib_IntVector_Intrinsics_vec256_add64(a131,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a241 =
+ Lib_IntVector_Intrinsics_vec256_add64(a231,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a341 =
+ Lib_IntVector_Intrinsics_vec256_add64(a331,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a441 =
+ Lib_IntVector_Intrinsics_vec256_add64(a431,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a051 =
+ Lib_IntVector_Intrinsics_vec256_add64(a041,
+ Lib_IntVector_Intrinsics_vec256_mul64(r151, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a151 =
+ Lib_IntVector_Intrinsics_vec256_add64(a141,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a251 =
+ Lib_IntVector_Intrinsics_vec256_add64(a241,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a351 =
+ Lib_IntVector_Intrinsics_vec256_add64(a341,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a451 =
+ Lib_IntVector_Intrinsics_vec256_add64(a441,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r24));
+ Lib_IntVector_Intrinsics_vec256 t01 = a051;
+ Lib_IntVector_Intrinsics_vec256 t11 = a151;
+ Lib_IntVector_Intrinsics_vec256 t21 = a251;
+ Lib_IntVector_Intrinsics_vec256 t31 = a351;
+ Lib_IntVector_Intrinsics_vec256 t41 = a451;
+ Lib_IntVector_Intrinsics_vec256
+ mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z04 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z14 = Lib_IntVector_Intrinsics_vec256_shift_right64(t31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x03 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261);
+ Lib_IntVector_Intrinsics_vec256 x33 = Lib_IntVector_Intrinsics_vec256_and(t31, mask261);
+ Lib_IntVector_Intrinsics_vec256 x13 = Lib_IntVector_Intrinsics_vec256_add64(t11, z04);
+ Lib_IntVector_Intrinsics_vec256 x43 = Lib_IntVector_Intrinsics_vec256_add64(t41, z14);
+ Lib_IntVector_Intrinsics_vec256
+ z011 = Lib_IntVector_Intrinsics_vec256_shift_right64(x13, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z111 = Lib_IntVector_Intrinsics_vec256_shift_right64(x43, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t6 = Lib_IntVector_Intrinsics_vec256_shift_left64(z111, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z111, t6);
+ Lib_IntVector_Intrinsics_vec256 x111 = Lib_IntVector_Intrinsics_vec256_and(x13, mask261);
+ Lib_IntVector_Intrinsics_vec256 x411 = Lib_IntVector_Intrinsics_vec256_and(x43, mask261);
+ Lib_IntVector_Intrinsics_vec256 x22 = Lib_IntVector_Intrinsics_vec256_add64(t21, z011);
+ Lib_IntVector_Intrinsics_vec256 x011 = Lib_IntVector_Intrinsics_vec256_add64(x03, z120);
+ Lib_IntVector_Intrinsics_vec256
+ z021 = Lib_IntVector_Intrinsics_vec256_shift_right64(x22, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z131 = Lib_IntVector_Intrinsics_vec256_shift_right64(x011, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x211 = Lib_IntVector_Intrinsics_vec256_and(x22, mask261);
+ Lib_IntVector_Intrinsics_vec256 x021 = Lib_IntVector_Intrinsics_vec256_and(x011, mask261);
+ Lib_IntVector_Intrinsics_vec256 x311 = Lib_IntVector_Intrinsics_vec256_add64(x33, z021);
+ Lib_IntVector_Intrinsics_vec256 x121 = Lib_IntVector_Intrinsics_vec256_add64(x111, z131);
+ Lib_IntVector_Intrinsics_vec256
+ z031 = Lib_IntVector_Intrinsics_vec256_shift_right64(x311, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x321 = Lib_IntVector_Intrinsics_vec256_and(x311, mask261);
+ Lib_IntVector_Intrinsics_vec256 x421 = Lib_IntVector_Intrinsics_vec256_add64(x411, z031);
+ Lib_IntVector_Intrinsics_vec256 r30 = x021;
+ Lib_IntVector_Intrinsics_vec256 r31 = x121;
+ Lib_IntVector_Intrinsics_vec256 r32 = x211;
+ Lib_IntVector_Intrinsics_vec256 r33 = x321;
+ Lib_IntVector_Intrinsics_vec256 r34 = x421;
+ Lib_IntVector_Intrinsics_vec256
+ v12120 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r20, r10);
+ Lib_IntVector_Intrinsics_vec256
+ v34340 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r40, r30);
+ Lib_IntVector_Intrinsics_vec256
+ r12340 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34340, v12120);
+ Lib_IntVector_Intrinsics_vec256
+ v12121 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r21, r11);
+ Lib_IntVector_Intrinsics_vec256
+ v34341 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r41, r31);
+ Lib_IntVector_Intrinsics_vec256
+ r12341 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34341, v12121);
+ Lib_IntVector_Intrinsics_vec256
+ v12122 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r22, r12);
+ Lib_IntVector_Intrinsics_vec256
+ v34342 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r42, r32);
+ Lib_IntVector_Intrinsics_vec256
+ r12342 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34342, v12122);
+ Lib_IntVector_Intrinsics_vec256
+ v12123 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r23, r13);
+ Lib_IntVector_Intrinsics_vec256
+ v34343 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r43, r33);
+ Lib_IntVector_Intrinsics_vec256
+ r12343 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34343, v12123);
+ Lib_IntVector_Intrinsics_vec256
+ v12124 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r24, r14);
+ Lib_IntVector_Intrinsics_vec256
+ v34344 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r44, r34);
+ Lib_IntVector_Intrinsics_vec256
+ r12344 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34344, v12124);
+ Lib_IntVector_Intrinsics_vec256
+ r123451 = Lib_IntVector_Intrinsics_vec256_smul64(r12341, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123452 = Lib_IntVector_Intrinsics_vec256_smul64(r12342, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123453 = Lib_IntVector_Intrinsics_vec256_smul64(r12343, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123454 = Lib_IntVector_Intrinsics_vec256_smul64(r12344, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_mul64(r12340, a0);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_mul64(r12341, a0);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_mul64(r12342, a0);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_mul64(r12343, a0);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_mul64(r12344, a0);
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12342, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12343, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12342, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123452, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123451, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123452, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a4));
+ Lib_IntVector_Intrinsics_vec256 t0 = a05;
+ Lib_IntVector_Intrinsics_vec256 t1 = a15;
+ Lib_IntVector_Intrinsics_vec256 t2 = a25;
+ Lib_IntVector_Intrinsics_vec256 t3 = a35;
+ Lib_IntVector_Intrinsics_vec256 t4 = a45;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z121 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z121);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o10 = x12;
+ Lib_IntVector_Intrinsics_vec256 o20 = x21;
+ Lib_IntVector_Intrinsics_vec256 o30 = x32;
+ Lib_IntVector_Intrinsics_vec256 o40 = x42;
+ Lib_IntVector_Intrinsics_vec256
+ v00 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o0, o0);
+ Lib_IntVector_Intrinsics_vec256 v10 = Lib_IntVector_Intrinsics_vec256_add64(o0, v00);
+ Lib_IntVector_Intrinsics_vec256
+ v10h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v10, v10);
+ Lib_IntVector_Intrinsics_vec256 v20 = Lib_IntVector_Intrinsics_vec256_add64(v10, v10h);
+ Lib_IntVector_Intrinsics_vec256
+ v01 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o10, o10);
+ Lib_IntVector_Intrinsics_vec256 v11 = Lib_IntVector_Intrinsics_vec256_add64(o10, v01);
+ Lib_IntVector_Intrinsics_vec256
+ v11h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v11, v11);
+ Lib_IntVector_Intrinsics_vec256 v21 = Lib_IntVector_Intrinsics_vec256_add64(v11, v11h);
+ Lib_IntVector_Intrinsics_vec256
+ v02 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o20, o20);
+ Lib_IntVector_Intrinsics_vec256 v12 = Lib_IntVector_Intrinsics_vec256_add64(o20, v02);
+ Lib_IntVector_Intrinsics_vec256
+ v12h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v12, v12);
+ Lib_IntVector_Intrinsics_vec256 v22 = Lib_IntVector_Intrinsics_vec256_add64(v12, v12h);
+ Lib_IntVector_Intrinsics_vec256
+ v03 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o30, o30);
+ Lib_IntVector_Intrinsics_vec256 v13 = Lib_IntVector_Intrinsics_vec256_add64(o30, v03);
+ Lib_IntVector_Intrinsics_vec256
+ v13h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v13, v13);
+ Lib_IntVector_Intrinsics_vec256 v23 = Lib_IntVector_Intrinsics_vec256_add64(v13, v13h);
+ Lib_IntVector_Intrinsics_vec256
+ v04 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o40, o40);
+ Lib_IntVector_Intrinsics_vec256 v14 = Lib_IntVector_Intrinsics_vec256_add64(o40, v04);
+ Lib_IntVector_Intrinsics_vec256
+ v14h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v14, v14);
+ Lib_IntVector_Intrinsics_vec256 v24 = Lib_IntVector_Intrinsics_vec256_add64(v14, v14h);
+ Lib_IntVector_Intrinsics_vec256
+ l = Lib_IntVector_Intrinsics_vec256_add64(v20, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec256_and(l,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l0 = Lib_IntVector_Intrinsics_vec256_add64(v21, c0);
+ Lib_IntVector_Intrinsics_vec256
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec256_and(l0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(v22, c1);
+ Lib_IntVector_Intrinsics_vec256
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec256_and(l1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(v23, c2);
+ Lib_IntVector_Intrinsics_vec256
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec256_and(l2,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(v24, c3);
+ Lib_IntVector_Intrinsics_vec256
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec256_and(l3,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ o00 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 o1 = tmp1;
+ Lib_IntVector_Intrinsics_vec256 o2 = tmp2;
+ Lib_IntVector_Intrinsics_vec256 o3 = tmp3;
+ Lib_IntVector_Intrinsics_vec256 o4 = tmp4;
+ out[0U] = o00;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+void
+Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[1U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[2U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[3U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[4U] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn_5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r_vec0 = Lib_IntVector_Intrinsics_vec256_load64(lo1);
+ Lib_IntVector_Intrinsics_vec256 r_vec1 = Lib_IntVector_Intrinsics_vec256_load64(hi1);
+ Lib_IntVector_Intrinsics_vec256
+ f00 =
+ Lib_IntVector_Intrinsics_vec256_and(r_vec0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f15 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(r_vec1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f0 = f00;
+ Lib_IntVector_Intrinsics_vec256 f1 = f15;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ Lib_IntVector_Intrinsics_vec256 f200 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 f210 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 f220 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 f230 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 f240 = r[4U];
+ r5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f200, (uint64_t)5U);
+ r5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f210, (uint64_t)5U);
+ r5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f220, (uint64_t)5U);
+ r5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f230, (uint64_t)5U);
+ r5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f240, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r10 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r20 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r30 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r40 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r510 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r520 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r530 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r540 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f100 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 a00 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f100);
+ Lib_IntVector_Intrinsics_vec256 a10 = Lib_IntVector_Intrinsics_vec256_mul64(r10, f100);
+ Lib_IntVector_Intrinsics_vec256 a20 = Lib_IntVector_Intrinsics_vec256_mul64(r20, f100);
+ Lib_IntVector_Intrinsics_vec256 a30 = Lib_IntVector_Intrinsics_vec256_mul64(r30, f100);
+ Lib_IntVector_Intrinsics_vec256 a40 = Lib_IntVector_Intrinsics_vec256_mul64(r40, f100);
+ Lib_IntVector_Intrinsics_vec256
+ a010 =
+ Lib_IntVector_Intrinsics_vec256_add64(a00,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a110 =
+ Lib_IntVector_Intrinsics_vec256_add64(a10,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a210 =
+ Lib_IntVector_Intrinsics_vec256_add64(a20,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a310 =
+ Lib_IntVector_Intrinsics_vec256_add64(a30,
+ Lib_IntVector_Intrinsics_vec256_mul64(r20, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a410 =
+ Lib_IntVector_Intrinsics_vec256_add64(a40,
+ Lib_IntVector_Intrinsics_vec256_mul64(r30, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a020 =
+ Lib_IntVector_Intrinsics_vec256_add64(a010,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a120 =
+ Lib_IntVector_Intrinsics_vec256_add64(a110,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a220 =
+ Lib_IntVector_Intrinsics_vec256_add64(a210,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a320 =
+ Lib_IntVector_Intrinsics_vec256_add64(a310,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a420 =
+ Lib_IntVector_Intrinsics_vec256_add64(a410,
+ Lib_IntVector_Intrinsics_vec256_mul64(r20, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a030 =
+ Lib_IntVector_Intrinsics_vec256_add64(a020,
+ Lib_IntVector_Intrinsics_vec256_mul64(r520, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a130 =
+ Lib_IntVector_Intrinsics_vec256_add64(a120,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a230 =
+ Lib_IntVector_Intrinsics_vec256_add64(a220,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a330 =
+ Lib_IntVector_Intrinsics_vec256_add64(a320,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a430 =
+ Lib_IntVector_Intrinsics_vec256_add64(a420,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a040 =
+ Lib_IntVector_Intrinsics_vec256_add64(a030,
+ Lib_IntVector_Intrinsics_vec256_mul64(r510, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a140 =
+ Lib_IntVector_Intrinsics_vec256_add64(a130,
+ Lib_IntVector_Intrinsics_vec256_mul64(r520, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a240 =
+ Lib_IntVector_Intrinsics_vec256_add64(a230,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a340 =
+ Lib_IntVector_Intrinsics_vec256_add64(a330,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a440 =
+ Lib_IntVector_Intrinsics_vec256_add64(a430,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t00 = a040;
+ Lib_IntVector_Intrinsics_vec256 t10 = a140;
+ Lib_IntVector_Intrinsics_vec256 t20 = a240;
+ Lib_IntVector_Intrinsics_vec256 t30 = a340;
+ Lib_IntVector_Intrinsics_vec256 t40 = a440;
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260);
+ Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260);
+ Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00);
+ Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10);
+ Lib_IntVector_Intrinsics_vec256
+ z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5);
+ Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260);
+ Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260);
+ Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010);
+ Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260);
+ Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260);
+ Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020);
+ Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130);
+ Lib_IntVector_Intrinsics_vec256
+ z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260);
+ Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030);
+ Lib_IntVector_Intrinsics_vec256 o00 = x020;
+ Lib_IntVector_Intrinsics_vec256 o10 = x120;
+ Lib_IntVector_Intrinsics_vec256 o20 = x210;
+ Lib_IntVector_Intrinsics_vec256 o30 = x320;
+ Lib_IntVector_Intrinsics_vec256 o40 = x420;
+ rn[0U] = o00;
+ rn[1U] = o10;
+ rn[2U] = o20;
+ rn[3U] = o30;
+ rn[4U] = o40;
+ Lib_IntVector_Intrinsics_vec256 f201 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f211 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f221 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f231 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f241 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f201, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f211, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f221, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f231, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f241, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 r00 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn_5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn_5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn_5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn_5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r00, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, Lib_IntVector_Intrinsics_vec256_mul64(r1, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, Lib_IntVector_Intrinsics_vec256_mul64(r2, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, Lib_IntVector_Intrinsics_vec256_mul64(r3, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f14));
+ Lib_IntVector_Intrinsics_vec256 t0 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z120);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ rn[0U] = o0;
+ rn[1U] = o1;
+ rn[2U] = o2;
+ rn[3U] = o3;
+ rn[4U] = o4;
+ Lib_IntVector_Intrinsics_vec256 f202 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f202, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f21, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f22, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f23, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f24, (uint64_t)5U);
+}
+
+void
+Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_256_poly1305_update(
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t len,
+ uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ uint32_t sz_block = (uint32_t)64U;
+ uint32_t len0 = len / sz_block * sz_block;
+ uint8_t *t0 = text;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)64U;
+ uint8_t *text0 = t0;
+ Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t0 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o00 = o5;
+ Lib_IntVector_Intrinsics_vec256 o11 = o10;
+ Lib_IntVector_Intrinsics_vec256 o21 = o20;
+ Lib_IntVector_Intrinsics_vec256 o31 = o30;
+ Lib_IntVector_Intrinsics_vec256 o41 = o40;
+ e[0U] = o00;
+ e[1U] = o11;
+ e[2U] = o21;
+ e[3U] = o31;
+ e[4U] = o41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a21 =
+ Lib_IntVector_Intrinsics_vec256_add64(a2,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a31 =
+ Lib_IntVector_Intrinsics_vec256_add64(a3,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a41 =
+ Lib_IntVector_Intrinsics_vec256_add64(a4,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t01 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o01 = x02;
+ Lib_IntVector_Intrinsics_vec256 o12 = x12;
+ Lib_IntVector_Intrinsics_vec256 o22 = x21;
+ Lib_IntVector_Intrinsics_vec256 o32 = x32;
+ Lib_IntVector_Intrinsics_vec256 o42 = x42;
+ acc[0U] = o01;
+ acc[1U] = o12;
+ acc[2U] = o22;
+ acc[3U] = o32;
+ acc[4U] = o42;
+ Lib_IntVector_Intrinsics_vec256 f100 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc, pre);
+ }
+ uint32_t len1 = len - len0;
+ uint8_t *t1 = text + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t1 + i * (uint32_t)16U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = t1 + nb * (uint32_t)16U;
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_256_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec256 *ctx)
+{
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 f0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f23 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f33 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f40 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256
+ l0 = Lib_IntVector_Intrinsics_vec256_add64(f0, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp00 =
+ Lib_IntVector_Intrinsics_vec256_and(l0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c00 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(f13, c00);
+ Lib_IntVector_Intrinsics_vec256
+ tmp10 =
+ Lib_IntVector_Intrinsics_vec256_and(l1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c10 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(f23, c10);
+ Lib_IntVector_Intrinsics_vec256
+ tmp20 =
+ Lib_IntVector_Intrinsics_vec256_and(l2,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c20 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(f33, c20);
+ Lib_IntVector_Intrinsics_vec256
+ tmp30 =
+ Lib_IntVector_Intrinsics_vec256_and(l3,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c30 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l4 = Lib_IntVector_Intrinsics_vec256_add64(f40, c30);
+ Lib_IntVector_Intrinsics_vec256
+ tmp40 =
+ Lib_IntVector_Intrinsics_vec256_and(l4,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c40 = Lib_IntVector_Intrinsics_vec256_shift_right64(l4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp00,
+ Lib_IntVector_Intrinsics_vec256_smul64(c40, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 f110 = tmp10;
+ Lib_IntVector_Intrinsics_vec256 f210 = tmp20;
+ Lib_IntVector_Intrinsics_vec256 f310 = tmp30;
+ Lib_IntVector_Intrinsics_vec256 f410 = tmp40;
+ Lib_IntVector_Intrinsics_vec256
+ l = Lib_IntVector_Intrinsics_vec256_add64(f010, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec256_and(l,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l5 = Lib_IntVector_Intrinsics_vec256_add64(f110, c0);
+ Lib_IntVector_Intrinsics_vec256
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec256_and(l5,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l5, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l6 = Lib_IntVector_Intrinsics_vec256_add64(f210, c1);
+ Lib_IntVector_Intrinsics_vec256
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec256_and(l6,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l6, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l7 = Lib_IntVector_Intrinsics_vec256_add64(f310, c2);
+ Lib_IntVector_Intrinsics_vec256
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec256_and(l7,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l7, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l8 = Lib_IntVector_Intrinsics_vec256_add64(f410, c3);
+ Lib_IntVector_Intrinsics_vec256
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec256_and(l8,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l8, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ f02 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 f12 = tmp1;
+ Lib_IntVector_Intrinsics_vec256 f22 = tmp2;
+ Lib_IntVector_Intrinsics_vec256 f32 = tmp3;
+ Lib_IntVector_Intrinsics_vec256 f42 = tmp4;
+ Lib_IntVector_Intrinsics_vec256
+ mh = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ ml = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffffbU);
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_eq64(f42, mh);
+ Lib_IntVector_Intrinsics_vec256
+ mask1 =
+ Lib_IntVector_Intrinsics_vec256_and(mask,
+ Lib_IntVector_Intrinsics_vec256_eq64(f32, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask2 =
+ Lib_IntVector_Intrinsics_vec256_and(mask1,
+ Lib_IntVector_Intrinsics_vec256_eq64(f22, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask3 =
+ Lib_IntVector_Intrinsics_vec256_and(mask2,
+ Lib_IntVector_Intrinsics_vec256_eq64(f12, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask4 =
+ Lib_IntVector_Intrinsics_vec256_and(mask3,
+ Lib_IntVector_Intrinsics_vec256_lognot(Lib_IntVector_Intrinsics_vec256_gt64(ml, f02)));
+ Lib_IntVector_Intrinsics_vec256 ph = Lib_IntVector_Intrinsics_vec256_and(mask4, mh);
+ Lib_IntVector_Intrinsics_vec256 pl = Lib_IntVector_Intrinsics_vec256_and(mask4, ml);
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_sub64(f02, pl);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_sub64(f12, ph);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_sub64(f22, ph);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_sub64(f32, ph);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_sub64(f42, ph);
+ Lib_IntVector_Intrinsics_vec256 f011 = o0;
+ Lib_IntVector_Intrinsics_vec256 f111 = o1;
+ Lib_IntVector_Intrinsics_vec256 f211 = o2;
+ Lib_IntVector_Intrinsics_vec256 f311 = o3;
+ Lib_IntVector_Intrinsics_vec256 f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ Lib_IntVector_Intrinsics_vec256 f00 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f4 = acc[4U];
+ uint64_t f01 = Lib_IntVector_Intrinsics_vec256_extract64(f00, (uint32_t)0U);
+ uint64_t f112 = Lib_IntVector_Intrinsics_vec256_extract64(f1, (uint32_t)0U);
+ uint64_t f212 = Lib_IntVector_Intrinsics_vec256_extract64(f2, (uint32_t)0U);
+ uint64_t f312 = Lib_IntVector_Intrinsics_vec256_extract64(f3, (uint32_t)0U);
+ uint64_t f41 = Lib_IntVector_Intrinsics_vec256_extract64(f4, (uint32_t)0U);
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ KRML_PRE_ALIGN(32)
+ Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U };
+ Hacl_Poly1305_256_poly1305_init(ctx, key);
+ Hacl_Poly1305_256_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_256_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h
new file mode 100644
index 0000000000..5d591e1137
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h
@@ -0,0 +1,64 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Poly1305_256_H
+#define __Hacl_Poly1305_256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "libintvector.h"
+
+typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx;
+
+void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key);
+
+void Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text);
+
+void
+Hacl_Poly1305_256_poly1305_update(
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t len,
+ uint8_t *text);
+
+void
+Hacl_Poly1305_256_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec256 *ctx);
+
+void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Poly1305_256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
new file mode 100644
index 0000000000..09def133bc
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
@@ -0,0 +1,573 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Poly1305_32.h"
+
+void
+Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key)
+{
+ uint64_t *acc = ctx;
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = (uint64_t)0U;
+ acc[1U] = (uint64_t)0U;
+ acc[2U] = (uint64_t)0U;
+ acc[3U] = (uint64_t)0U;
+ acc[4U] = (uint64_t)0U;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t *rn = pre + (uint32_t)10U;
+ uint64_t *rn_5 = pre + (uint32_t)15U;
+ uint64_t r_vec0 = lo1;
+ uint64_t r_vec1 = hi1;
+ uint64_t f00 = r_vec0 & (uint64_t)0x3ffffffU;
+ uint64_t f10 = r_vec0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = r_vec0 >> (uint32_t)52U | (r_vec1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = r_vec1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = r_vec1 >> (uint32_t)40U;
+ uint64_t f0 = f00;
+ uint64_t f1 = f10;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ uint64_t f200 = r[0U];
+ uint64_t f21 = r[1U];
+ uint64_t f22 = r[2U];
+ uint64_t f23 = r[3U];
+ uint64_t f24 = r[4U];
+ r5[0U] = f200 * (uint64_t)5U;
+ r5[1U] = f21 * (uint64_t)5U;
+ r5[2U] = f22 * (uint64_t)5U;
+ r5[3U] = f23 * (uint64_t)5U;
+ r5[4U] = f24 * (uint64_t)5U;
+ rn[0U] = r[0U];
+ rn[1U] = r[1U];
+ rn[2U] = r[2U];
+ rn[3U] = r[3U];
+ rn[4U] = r[4U];
+ rn_5[0U] = r5[0U];
+ rn_5[1U] = r5[1U];
+ rn_5[2U] = r5[2U];
+ rn_5[3U] = r5[3U];
+ rn_5[4U] = r5[4U];
+}
+
+void
+Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text)
+{
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text)
+{
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint32_t nb = len / (uint32_t)16U;
+ uint32_t rem = len % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text + i * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = text + nb * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(uint8_t));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ uint64_t mask = b;
+ uint64_t fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = fi | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx)
+{
+ uint64_t *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ uint64_t f0 = acc[0U];
+ uint64_t f13 = acc[1U];
+ uint64_t f23 = acc[2U];
+ uint64_t f33 = acc[3U];
+ uint64_t f40 = acc[4U];
+ uint64_t l0 = f0 + (uint64_t)0U;
+ uint64_t tmp00 = l0 & (uint64_t)0x3ffffffU;
+ uint64_t c00 = l0 >> (uint32_t)26U;
+ uint64_t l1 = f13 + c00;
+ uint64_t tmp10 = l1 & (uint64_t)0x3ffffffU;
+ uint64_t c10 = l1 >> (uint32_t)26U;
+ uint64_t l2 = f23 + c10;
+ uint64_t tmp20 = l2 & (uint64_t)0x3ffffffU;
+ uint64_t c20 = l2 >> (uint32_t)26U;
+ uint64_t l3 = f33 + c20;
+ uint64_t tmp30 = l3 & (uint64_t)0x3ffffffU;
+ uint64_t c30 = l3 >> (uint32_t)26U;
+ uint64_t l4 = f40 + c30;
+ uint64_t tmp40 = l4 & (uint64_t)0x3ffffffU;
+ uint64_t c40 = l4 >> (uint32_t)26U;
+ uint64_t f010 = tmp00 + c40 * (uint64_t)5U;
+ uint64_t f110 = tmp10;
+ uint64_t f210 = tmp20;
+ uint64_t f310 = tmp30;
+ uint64_t f410 = tmp40;
+ uint64_t l = f010 + (uint64_t)0U;
+ uint64_t tmp0 = l & (uint64_t)0x3ffffffU;
+ uint64_t c0 = l >> (uint32_t)26U;
+ uint64_t l5 = f110 + c0;
+ uint64_t tmp1 = l5 & (uint64_t)0x3ffffffU;
+ uint64_t c1 = l5 >> (uint32_t)26U;
+ uint64_t l6 = f210 + c1;
+ uint64_t tmp2 = l6 & (uint64_t)0x3ffffffU;
+ uint64_t c2 = l6 >> (uint32_t)26U;
+ uint64_t l7 = f310 + c2;
+ uint64_t tmp3 = l7 & (uint64_t)0x3ffffffU;
+ uint64_t c3 = l7 >> (uint32_t)26U;
+ uint64_t l8 = f410 + c3;
+ uint64_t tmp4 = l8 & (uint64_t)0x3ffffffU;
+ uint64_t c4 = l8 >> (uint32_t)26U;
+ uint64_t f02 = tmp0 + c4 * (uint64_t)5U;
+ uint64_t f12 = tmp1;
+ uint64_t f22 = tmp2;
+ uint64_t f32 = tmp3;
+ uint64_t f42 = tmp4;
+ uint64_t mh = (uint64_t)0x3ffffffU;
+ uint64_t ml = (uint64_t)0x3fffffbU;
+ uint64_t mask = FStar_UInt64_eq_mask(f42, mh);
+ uint64_t mask1 = mask & FStar_UInt64_eq_mask(f32, mh);
+ uint64_t mask2 = mask1 & FStar_UInt64_eq_mask(f22, mh);
+ uint64_t mask3 = mask2 & FStar_UInt64_eq_mask(f12, mh);
+ uint64_t mask4 = mask3 & ~~FStar_UInt64_gte_mask(f02, ml);
+ uint64_t ph = mask4 & mh;
+ uint64_t pl = mask4 & ml;
+ uint64_t o0 = f02 - pl;
+ uint64_t o1 = f12 - ph;
+ uint64_t o2 = f22 - ph;
+ uint64_t o3 = f32 - ph;
+ uint64_t o4 = f42 - ph;
+ uint64_t f011 = o0;
+ uint64_t f111 = o1;
+ uint64_t f211 = o2;
+ uint64_t f311 = o3;
+ uint64_t f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ uint64_t f00 = acc[0U];
+ uint64_t f1 = acc[1U];
+ uint64_t f2 = acc[2U];
+ uint64_t f3 = acc[3U];
+ uint64_t f4 = acc[4U];
+ uint64_t f01 = f00;
+ uint64_t f112 = f1;
+ uint64_t f212 = f2;
+ uint64_t f312 = f3;
+ uint64_t f41 = f4;
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ uint64_t ctx[25U] = { 0U };
+ Hacl_Poly1305_32_poly1305_init(ctx, key);
+ Hacl_Poly1305_32_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_32_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
new file mode 100644
index 0000000000..abbc3d11f1
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
@@ -0,0 +1,56 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Poly1305_32_H
+#define __Hacl_Poly1305_32_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "Hacl_Krmllib.h"
+
+typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx;
+
+void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key);
+
+void Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text);
+
+void Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text);
+
+void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx);
+
+void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Poly1305_32_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Streaming_Types.h b/security/nss/lib/freebl/verified/Hacl_Streaming_Types.h
new file mode 100644
index 0000000000..0c897e9d97
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Streaming_Types.h
@@ -0,0 +1,78 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __Hacl_Streaming_Types_H
+#define __Hacl_Streaming_Types_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#define Spec_Hash_Definitions_SHA2_224 0
+#define Spec_Hash_Definitions_SHA2_256 1
+#define Spec_Hash_Definitions_SHA2_384 2
+#define Spec_Hash_Definitions_SHA2_512 3
+#define Spec_Hash_Definitions_SHA1 4
+#define Spec_Hash_Definitions_MD5 5
+#define Spec_Hash_Definitions_Blake2S 6
+#define Spec_Hash_Definitions_Blake2B 7
+#define Spec_Hash_Definitions_SHA3_256 8
+#define Spec_Hash_Definitions_SHA3_224 9
+#define Spec_Hash_Definitions_SHA3_384 10
+#define Spec_Hash_Definitions_SHA3_512 11
+#define Spec_Hash_Definitions_Shake128 12
+#define Spec_Hash_Definitions_Shake256 13
+
+typedef uint8_t Spec_Hash_Definitions_hash_alg;
+
+#define Hacl_Streaming_Types_Success 0
+#define Hacl_Streaming_Types_InvalidAlgorithm 1
+#define Hacl_Streaming_Types_InvalidLength 2
+#define Hacl_Streaming_Types_MaximumLengthExceeded 3
+
+typedef uint8_t Hacl_Streaming_Types_error_code;
+
+typedef struct Hacl_Streaming_MD_state_32_s {
+ uint32_t *block_state;
+ uint8_t *buf;
+ uint64_t total_len;
+} Hacl_Streaming_MD_state_32;
+
+typedef struct Hacl_Streaming_MD_state_64_s {
+ uint64_t *block_state;
+ uint8_t *buf;
+ uint64_t total_len;
+} Hacl_Streaming_MD_state_64;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __Hacl_Streaming_Types_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/config.h b/security/nss/lib/freebl/verified/config.h
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/security/nss/lib/freebl/verified/config.h
diff --git a/security/nss/lib/freebl/verified/curve25519-inline.h b/security/nss/lib/freebl/verified/curve25519-inline.h
new file mode 100644
index 0000000000..690e75a1b9
--- /dev/null
+++ b/security/nss/lib/freebl/verified/curve25519-inline.h
@@ -0,0 +1,942 @@
+#ifdef __GNUC__
+#if defined(__x86_64__) || defined(_M_X64)
+#pragma once
+#include <inttypes.h>
+
+// Computes the addition of four-element f1 with value in f2
+// and returns the carry (if any)
+static inline void
+add_scalar(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+ __asm__ volatile(
+ // Clear registers to propagate the carry bit
+ " xor %%r8d, %%r8d;"
+ " xor %%r9d, %%r9d;"
+ " xor %%r10d, %%r10d;"
+ " xor %%r11d, %%r11d;"
+ " xor %%eax, %%eax;"
+
+ // Begin addition chain
+ " addq 0(%2), %0;"
+ " movq %0, 0(%1);"
+ " adcxq 8(%2), %%r8;"
+ " movq %%r8, 8(%1);"
+ " adcxq 16(%2), %%r9;"
+ " movq %%r9, 16(%1);"
+ " adcxq 24(%2), %%r10;"
+ " movq %%r10, 24(%1);"
+
+ // Return the carry bit in a register
+ " adcx %%r11, %%rax;"
+ : "+&r"(f2)
+ : "r"(out), "r"(f1)
+ : "%rax", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+}
+
+// Computes the field addition of two field elements
+static inline void
+fadd(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ __asm__ volatile(
+ // Compute the raw addition of f1 + f2
+ " movq 0(%0), %%r8;"
+ " addq 0(%2), %%r8;"
+ " movq 8(%0), %%r9;"
+ " adcxq 8(%2), %%r9;"
+ " movq 16(%0), %%r10;"
+ " adcxq 16(%2), %%r10;"
+ " movq 24(%0), %%r11;"
+ " adcxq 24(%2), %%r11;"
+
+ /////// Wrap the result back into the field //////
+
+ // Step 1: Compute carry*38
+ " mov $0, %%rax;"
+ " mov $38, %0;"
+ " cmovc %0, %%rax;"
+
+ // Step 2: Add carry*38 to the original sum
+ " xor %%ecx, %%ecx;"
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %0, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r"(f2)
+ : "r"(out), "r"(f1)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+}
+
+// Computes the field substraction of two field elements
+static inline void
+fsub(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ __asm__ volatile(
+ // Compute the raw substraction of f1-f2
+ " movq 0(%1), %%r8;"
+ " subq 0(%2), %%r8;"
+ " movq 8(%1), %%r9;"
+ " sbbq 8(%2), %%r9;"
+ " movq 16(%1), %%r10;"
+ " sbbq 16(%2), %%r10;"
+ " movq 24(%1), %%r11;"
+ " sbbq 24(%2), %%r11;"
+
+ /////// Wrap the result back into the field //////
+
+ // Step 1: Compute carry*38
+ " mov $0, %%rax;"
+ " mov $38, %%rcx;"
+ " cmovc %%rcx, %%rax;"
+
+ // Step 2: Substract carry*38 from the original difference
+ " sub %%rax, %%r8;"
+ " sbb $0, %%r9;"
+ " sbb $0, %%r10;"
+ " sbb $0, %%r11;"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rcx, %%rax;"
+ " sub %%rax, %%r8;"
+
+ // Store the result
+ " movq %%r8, 0(%0);"
+ " movq %%r9, 8(%0);"
+ " movq %%r10, 16(%0);"
+ " movq %%r11, 24(%0);"
+ :
+ : "r"(out), "r"(f1), "r"(f2)
+ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc");
+}
+
+// Computes a field multiplication: out <- f1 * f2
+// Uses the 8-element buffer tmp for intermediate results
+static inline void
+fmul(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
+{
+ __asm__ volatile(
+
+ /////// Compute the raw multiplication: tmp <- src1 * src2 //////
+
+ // Compute src1[0] * src2
+ " movq 0(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 0(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 8(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
+ // Compute src1[1] * src2
+ " movq 8(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 8(%2), %%r8;"
+ " movq %%r8, 8(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 16(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[2] * src2
+ " movq 16(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 16(%2), %%r8;"
+ " movq %%r8, 16(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 24(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[3] * src2
+ " movq 24(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 24(%2), %%r8;"
+ " movq %%r8, 24(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 32(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 40(%2);"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 48(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 56(%2);"
+
+ // Line up pointers
+ " mov %2, %0;"
+ " mov %3, %2;"
+
+ /////// Wrap the result back into the field //////
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %1, %%r9;"
+ " movq %%r9, 8(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 16(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 24(%2);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%2);"
+ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc");
+}
+
+// Computes two field multiplications:
+// out[0] <- f1[0] * f2[0]
+// out[1] <- f1[1] * f2[1]
+// Uses the 16-element buffer tmp for intermediate results:
+static inline void
+fmul2(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp)
+{
+ __asm__ volatile(
+
+ /////// Compute the raw multiplication tmp[0] <- f1[0] * f2[0] //////
+
+ // Compute src1[0] * src2
+ " movq 0(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 0(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 8(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
+ // Compute src1[1] * src2
+ " movq 8(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 8(%2), %%r8;"
+ " movq %%r8, 8(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 16(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[2] * src2
+ " movq 16(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 16(%2), %%r8;"
+ " movq %%r8, 16(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 24(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[3] * src2
+ " movq 24(%0), %%rdx;"
+ " mulxq 0(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 24(%2), %%r8;"
+ " movq %%r8, 24(%2);"
+ " mulxq 8(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 32(%2);"
+ " mulxq 16(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 40(%2);"
+ " mov $0, %%r8;"
+ " mulxq 24(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 48(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 56(%2);"
+
+ /////// Compute the raw multiplication tmp[1] <- f1[1] * f2[1] //////
+
+ // Compute src1[0] * src2
+ " movq 32(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " movq %%r8, 64(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " movq %%r10, 72(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+
+ // Compute src1[1] * src2
+ " movq 40(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 72(%2), %%r8;"
+ " movq %%r8, 72(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 80(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[2] * src2
+ " movq 48(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 80(%2), %%r8;"
+ " movq %%r8, 80(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 88(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+
+ // Compute src1[3] * src2
+ " movq 56(%0), %%rdx;"
+ " mulxq 32(%1), %%r8, %%r9;"
+ " xor %%r10d, %%r10d;"
+ " adcxq 88(%2), %%r8;"
+ " movq %%r8, 88(%2);"
+ " mulxq 40(%1), %%r10, %%r11;"
+ " adox %%r9, %%r10;"
+ " adcx %%rbx, %%r10;"
+ " movq %%r10, 96(%2);"
+ " mulxq 48(%1), %%rbx, %%r13;"
+ " adox %%r11, %%rbx;"
+ " adcx %%r14, %%rbx;"
+ " movq %%rbx, 104(%2);"
+ " mov $0, %%r8;"
+ " mulxq 56(%1), %%r14, %%rdx;"
+ " adox %%r13, %%r14;"
+ " adcx %%rax, %%r14;"
+ " movq %%r14, 112(%2);"
+ " mov $0, %%rax;"
+ " adox %%rdx, %%rax;"
+ " adcx %%r8, %%rax;"
+ " movq %%rax, 120(%2);"
+
+ // Line up pointers
+ " mov %2, %0;"
+ " mov %3, %2;"
+
+ /////// Wrap the results back into the field //////
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %1, %%r9;"
+ " movq %%r9, 8(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 16(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 24(%2);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%2);"
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 96(%0), %%r8, %%r13;"
+ " xor %k1, %k1;"
+ " adoxq 64(%0), %%r8;"
+ " mulxq 104(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%0), %%r9;"
+ " mulxq 112(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 80(%0), %%r10;"
+ " mulxq 120(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%0), %%r11;"
+ " adcx %1, %%rax;"
+ " adox %1, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %1, %%r9;"
+ " movq %%r9, 40(%2);"
+ " adcx %1, %%r10;"
+ " movq %%r10, 48(%2);"
+ " adcx %1, %%r11;"
+ " movq %%r11, 56(%2);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%2);"
+ : "+&r"(f1), "+&r"(f2), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc");
+}
+
+// Computes the field multiplication of four-element f1 with value in f2
+// Requires f2 to be smaller than 2^17
+static inline void
+fmul_scalar(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+ register uint64_t f2_r __asm__("rdx") = f2;
+
+ __asm__ volatile(
+ // Compute the raw multiplication of f1*f2
+ " mulxq 0(%2), %%r8, %%rcx;" // f1[0]*f2
+ " mulxq 8(%2), %%r9, %%rbx;" // f1[1]*f2
+ " add %%rcx, %%r9;"
+ " mov $0, %%rcx;"
+ " mulxq 16(%2), %%r10, %%r13;" // f1[2]*f2
+ " adcx %%rbx, %%r10;"
+ " mulxq 24(%2), %%r11, %%rax;" // f1[3]*f2
+ " adcx %%r13, %%r11;"
+ " adcx %%rcx, %%rax;"
+
+ /////// Wrap the result back into the field //////
+
+ // Step 1: Compute carry*38
+ " mov $38, %%rdx;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r"(f2_r)
+ : "r"(out), "r"(f1)
+ : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", "memory", "cc");
+}
+
+// Computes p1 <- bit ? p2 : p1 in constant time
+static inline void
+cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2)
+{
+ __asm__ volatile(
+ // Transfer bit into CF flag
+ " add $18446744073709551615, %0;"
+
+ // cswap p1[0], p2[0]
+ " movq 0(%1), %%r8;"
+ " movq 0(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 0(%1);"
+ " movq %%r9, 0(%2);"
+
+ // cswap p1[1], p2[1]
+ " movq 8(%1), %%r8;"
+ " movq 8(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 8(%1);"
+ " movq %%r9, 8(%2);"
+
+ // cswap p1[2], p2[2]
+ " movq 16(%1), %%r8;"
+ " movq 16(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 16(%1);"
+ " movq %%r9, 16(%2);"
+
+ // cswap p1[3], p2[3]
+ " movq 24(%1), %%r8;"
+ " movq 24(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 24(%1);"
+ " movq %%r9, 24(%2);"
+
+ // cswap p1[4], p2[4]
+ " movq 32(%1), %%r8;"
+ " movq 32(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 32(%1);"
+ " movq %%r9, 32(%2);"
+
+ // cswap p1[5], p2[5]
+ " movq 40(%1), %%r8;"
+ " movq 40(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 40(%1);"
+ " movq %%r9, 40(%2);"
+
+ // cswap p1[6], p2[6]
+ " movq 48(%1), %%r8;"
+ " movq 48(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 48(%1);"
+ " movq %%r9, 48(%2);"
+
+ // cswap p1[7], p2[7]
+ " movq 56(%1), %%r8;"
+ " movq 56(%2), %%r9;"
+ " mov %%r8, %%r10;"
+ " cmovc %%r9, %%r8;"
+ " cmovc %%r10, %%r9;"
+ " movq %%r8, 56(%1);"
+ " movq %%r9, 56(%2);"
+ : "+&r"(bit)
+ : "r"(p1), "r"(p2)
+ : "%r8", "%r9", "%r10", "memory", "cc");
+}
+
+// Computes the square of a field element: out <- f * f
+// Uses the 8-element buffer tmp for intermediate results
+static inline void
+fsqr(uint64_t *out, uint64_t *f, uint64_t *tmp)
+{
+ __asm__ volatile(
+
+ /////// Compute the raw multiplication: tmp <- f * f //////
+
+ // Step 1: Compute all partial products
+ " movq 0(%0), %%rdx;" // f[0]
+ " mulxq 8(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" // f[1]*f[0]
+ " mulxq 16(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" // f[2]*f[0]
+ " mulxq 24(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" // f[3]*f[0]
+ " movq 24(%0), %%rdx;" // f[3]
+ " mulxq 8(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" // f[1]*f[3]
+ " mulxq 16(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" // f[2]*f[3]
+ " movq 8(%0), %%rdx;"
+ " adcx %%r15, %%r13;" // f1
+ " mulxq 16(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" // f[2]*f[1]
+
+ // Step 2: Compute two parallel carry chains
+ " xor %%r15d, %%r15d;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ // Step 3: Compute intermediate squares
+ " movq 0(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[0]^2
+ " movq %%rax, 0(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 8(%1);"
+ " movq 8(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[1]^2
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 16(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 24(%1);"
+ " movq 16(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[2]^2
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 32(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 40(%1);"
+ " movq 24(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[3]^2
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 48(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 56(%1);"
+
+ // Line up pointers
+ " mov %1, %0;"
+ " mov %2, %1;"
+
+ /////// Wrap the result back into the field //////
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+ : "+&r"(f), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc");
+}
+
+// Computes two field squarings:
+// out[0] <- f[0] * f[0]
+// out[1] <- f[1] * f[1]
+// Uses the 16-element buffer tmp for intermediate results
+static inline void
+fsqr2(uint64_t *out, uint64_t *f, uint64_t *tmp)
+{
+ __asm__ volatile(
+ // Step 1: Compute all partial products
+ " movq 0(%0), %%rdx;" // f[0]
+ " mulxq 8(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" // f[1]*f[0]
+ " mulxq 16(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" // f[2]*f[0]
+ " mulxq 24(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" // f[3]*f[0]
+ " movq 24(%0), %%rdx;" // f[3]
+ " mulxq 8(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" // f[1]*f[3]
+ " mulxq 16(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" // f[2]*f[3]
+ " movq 8(%0), %%rdx;"
+ " adcx %%r15, %%r13;" // f1
+ " mulxq 16(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" // f[2]*f[1]
+
+ // Step 2: Compute two parallel carry chains
+ " xor %%r15d, %%r15d;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ // Step 3: Compute intermediate squares
+ " movq 0(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[0]^2
+ " movq %%rax, 0(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 8(%1);"
+ " movq 8(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[1]^2
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 16(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 24(%1);"
+ " movq 16(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[2]^2
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 32(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 40(%1);"
+ " movq 24(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[3]^2
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 48(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 56(%1);"
+
+ // Step 1: Compute all partial products
+ " movq 32(%0), %%rdx;" // f[0]
+ " mulxq 40(%0), %%r8, %%r14;"
+ " xor %%r15d, %%r15d;" // f[1]*f[0]
+ " mulxq 48(%0), %%r9, %%r10;"
+ " adcx %%r14, %%r9;" // f[2]*f[0]
+ " mulxq 56(%0), %%rax, %%rcx;"
+ " adcx %%rax, %%r10;" // f[3]*f[0]
+ " movq 56(%0), %%rdx;" // f[3]
+ " mulxq 40(%0), %%r11, %%rbx;"
+ " adcx %%rcx, %%r11;" // f[1]*f[3]
+ " mulxq 48(%0), %%rax, %%r13;"
+ " adcx %%rax, %%rbx;" // f[2]*f[3]
+ " movq 40(%0), %%rdx;"
+ " adcx %%r15, %%r13;" // f1
+ " mulxq 48(%0), %%rax, %%rcx;"
+ " mov $0, %%r14;" // f[2]*f[1]
+
+ // Step 2: Compute two parallel carry chains
+ " xor %%r15d, %%r15d;"
+ " adox %%rax, %%r10;"
+ " adcx %%r8, %%r8;"
+ " adox %%rcx, %%r11;"
+ " adcx %%r9, %%r9;"
+ " adox %%r15, %%rbx;"
+ " adcx %%r10, %%r10;"
+ " adox %%r15, %%r13;"
+ " adcx %%r11, %%r11;"
+ " adox %%r15, %%r14;"
+ " adcx %%rbx, %%rbx;"
+ " adcx %%r13, %%r13;"
+ " adcx %%r14, %%r14;"
+
+ // Step 3: Compute intermediate squares
+ " movq 32(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[0]^2
+ " movq %%rax, 64(%1);"
+ " add %%rcx, %%r8;"
+ " movq %%r8, 72(%1);"
+ " movq 40(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[1]^2
+ " adcx %%rax, %%r9;"
+ " movq %%r9, 80(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 88(%1);"
+ " movq 48(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[2]^2
+ " adcx %%rax, %%r11;"
+ " movq %%r11, 96(%1);"
+ " adcx %%rcx, %%rbx;"
+ " movq %%rbx, 104(%1);"
+ " movq 56(%0), %%rdx;"
+ " mulx %%rdx, %%rax, %%rcx;" // f[3]^2
+ " adcx %%rax, %%r13;"
+ " movq %%r13, 112(%1);"
+ " adcx %%rcx, %%r14;"
+ " movq %%r14, 120(%1);"
+
+ // Line up pointers
+ " mov %1, %0;"
+ " mov %2, %1;"
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 32(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
+ " adoxq 0(%0), %%r8;"
+ " mulxq 40(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 8(%0), %%r9;"
+ " mulxq 48(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 16(%0), %%r10;"
+ " mulxq 56(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 24(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 8(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 16(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 24(%1);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 0(%1);"
+
+ // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo
+ " mov $38, %%rdx;"
+ " mulxq 96(%0), %%r8, %%r13;"
+ " xor %%ecx, %%ecx;"
+ " adoxq 64(%0), %%r8;"
+ " mulxq 104(%0), %%r9, %%rbx;"
+ " adcx %%r13, %%r9;"
+ " adoxq 72(%0), %%r9;"
+ " mulxq 112(%0), %%r10, %%r13;"
+ " adcx %%rbx, %%r10;"
+ " adoxq 80(%0), %%r10;"
+ " mulxq 120(%0), %%r11, %%rax;"
+ " adcx %%r13, %%r11;"
+ " adoxq 88(%0), %%r11;"
+ " adcx %%rcx, %%rax;"
+ " adox %%rcx, %%rax;"
+ " imul %%rdx, %%rax;"
+
+ // Step 2: Fold the carry back into dst
+ " add %%rax, %%r8;"
+ " adcx %%rcx, %%r9;"
+ " movq %%r9, 40(%1);"
+ " adcx %%rcx, %%r10;"
+ " movq %%r10, 48(%1);"
+ " adcx %%rcx, %%r11;"
+ " movq %%r11, 56(%1);"
+
+ // Step 3: Fold the carry bit back in; guaranteed not to carry at this point
+ " mov $0, %%rax;"
+ " cmovc %%rdx, %%rax;"
+ " add %%rax, %%r8;"
+ " movq %%r8, 32(%1);"
+ : "+&r"(f), "+&r"(tmp)
+ : "r"(out)
+ : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc");
+}
+
+#endif /* defined(__x86_64__) || defined(_M_X64) */
+#endif /* __GNUC__ */
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h
new file mode 100644
index 0000000000..bc4ed6f0fa
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h
@@ -0,0 +1,315 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Bignum_H
+#define __internal_Hacl_Bignum_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum_Base.h"
+#include "../Hacl_Bignum.h"
+#include "lib_intrinsics.h"
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32(
+ uint32_t aLen,
+ uint32_t *a,
+ uint32_t *b,
+ uint32_t *tmp,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64(
+ uint32_t aLen,
+ uint64_t *a,
+ uint64_t *b,
+ uint64_t *tmp,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32(
+ uint32_t aLen,
+ uint32_t *a,
+ uint32_t *tmp,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64(
+ uint32_t aLen,
+ uint64_t *a,
+ uint64_t *tmp,
+ uint64_t *res);
+
+void
+Hacl_Bignum_bn_add_mod_n_u32(
+ uint32_t len1,
+ uint32_t *n,
+ uint32_t *a,
+ uint32_t *b,
+ uint32_t *res);
+
+void
+Hacl_Bignum_bn_add_mod_n_u64(
+ uint32_t len1,
+ uint64_t *n,
+ uint64_t *a,
+ uint64_t *b,
+ uint64_t *res);
+
+void
+Hacl_Bignum_bn_sub_mod_n_u32(
+ uint32_t len1,
+ uint32_t *n,
+ uint32_t *a,
+ uint32_t *b,
+ uint32_t *res);
+
+void
+Hacl_Bignum_bn_sub_mod_n_u64(
+ uint32_t len1,
+ uint64_t *n,
+ uint64_t *a,
+ uint64_t *b,
+ uint64_t *res);
+
+uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0);
+
+uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0);
+
+uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n);
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32(
+ uint32_t len,
+ uint32_t nBits,
+ uint32_t *n,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t nInv,
+ uint32_t *c,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t nInv,
+ uint32_t *r2,
+ uint32_t *a,
+ uint32_t *aM);
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t nInv_u64,
+ uint32_t *aM,
+ uint32_t *a);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t nInv_u64,
+ uint32_t *aM,
+ uint32_t *bM,
+ uint32_t *resM);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t nInv_u64,
+ uint32_t *aM,
+ uint32_t *resM);
+
+uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n);
+
+void
+Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64(
+ uint32_t len,
+ uint32_t nBits,
+ uint64_t *n,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_reduction_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t nInv,
+ uint64_t *c,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Montgomery_bn_to_mont_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t nInv,
+ uint64_t *r2,
+ uint64_t *a,
+ uint64_t *aM);
+
+void
+Hacl_Bignum_Montgomery_bn_from_mont_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t nInv_u64,
+ uint64_t *aM,
+ uint64_t *a);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_mul_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t nInv_u64,
+ uint64_t *aM,
+ uint64_t *bM,
+ uint64_t *resM);
+
+void
+Hacl_Bignum_Montgomery_bn_mont_sqr_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t nInv_u64,
+ uint64_t *aM,
+ uint64_t *resM);
+
+uint32_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t *a,
+ uint32_t bBits,
+ uint32_t *b);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t mu,
+ uint32_t *r2,
+ uint32_t *a,
+ uint32_t bBits,
+ uint32_t *b,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32(
+ uint32_t len,
+ uint32_t *n,
+ uint32_t mu,
+ uint32_t *r2,
+ uint32_t *a,
+ uint32_t bBits,
+ uint32_t *b,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32(
+ uint32_t len,
+ uint32_t nBits,
+ uint32_t *n,
+ uint32_t *a,
+ uint32_t bBits,
+ uint32_t *b,
+ uint32_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32(
+ uint32_t len,
+ uint32_t nBits,
+ uint32_t *n,
+ uint32_t *a,
+ uint32_t bBits,
+ uint32_t *b,
+ uint32_t *res);
+
+uint64_t
+Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t *a,
+ uint32_t bBits,
+ uint64_t *b);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t mu,
+ uint64_t *r2,
+ uint64_t *a,
+ uint32_t bBits,
+ uint64_t *b,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64(
+ uint32_t len,
+ uint64_t *n,
+ uint64_t mu,
+ uint64_t *r2,
+ uint64_t *a,
+ uint32_t bBits,
+ uint64_t *b,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64(
+ uint32_t len,
+ uint32_t nBits,
+ uint64_t *n,
+ uint64_t *a,
+ uint32_t bBits,
+ uint64_t *b,
+ uint64_t *res);
+
+void
+Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64(
+ uint32_t len,
+ uint32_t nBits,
+ uint64_t *n,
+ uint64_t *a,
+ uint32_t bBits,
+ uint64_t *b,
+ uint64_t *res);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Bignum_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Bignum25519_51.h b/security/nss/lib/freebl/verified/internal/Hacl_Bignum25519_51.h
new file mode 100644
index 0000000000..c3e86ca512
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Bignum25519_51.h
@@ -0,0 +1,680 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Bignum25519_51_H
+#define __internal_Hacl_Bignum25519_51_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "Hacl_Krmllib.h"
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f20 = f2[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f21 = f2[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f22 = f2[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f23 = f2[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f24 = f2[4U];
+ out[0U] = f10 + f20;
+ out[1U] = f11 + f21;
+ out[2U] = f12 + f22;
+ out[3U] = f13 + f23;
+ out[4U] = f14 + f24;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fsub(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f20 = f2[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f21 = f2[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f22 = f2[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f23 = f2[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f24 = f2[4U];
+ out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20;
+ out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21;
+ out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22;
+ out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23;
+ out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fmul(
+ uint64_t *out,
+ uint64_t *f1,
+ uint64_t *f2,
+ FStar_UInt128_uint128 *uu___)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ uint64_t f23 = f2[3U];
+ uint64_t f24 = f2[4U];
+ uint64_t tmp1 = f21 * (uint64_t)19U;
+ uint64_t tmp2 = f22 * (uint64_t)19U;
+ uint64_t tmp3 = f23 * (uint64_t)19U;
+ uint64_t tmp4 = f24 * (uint64_t)19U;
+ FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20);
+ FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21);
+ FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22);
+ FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23);
+ FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24);
+ FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4));
+ FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20));
+ FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21));
+ FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22));
+ FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23));
+ FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3));
+ FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4));
+ FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20));
+ FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21));
+ FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22));
+ FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2));
+ FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3));
+ FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4));
+ FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20));
+ FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21));
+ FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1));
+ FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2));
+ FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3));
+ FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4));
+ FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20));
+ FStar_UInt128_uint128 tmp_w0 = o04;
+ FStar_UInt128_uint128 tmp_w1 = o14;
+ FStar_UInt128_uint128 tmp_w2 = o24;
+ FStar_UInt128_uint128 tmp_w3 = o34;
+ FStar_UInt128_uint128 tmp_w4 = o44;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp01 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp11 + c5;
+ uint64_t o2 = tmp21;
+ uint64_t o3 = tmp31;
+ uint64_t o4 = tmp41;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fmul2(
+ uint64_t *out,
+ uint64_t *f1,
+ uint64_t *f2,
+ FStar_UInt128_uint128 *uu___)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ uint64_t f23 = f2[3U];
+ uint64_t f24 = f2[4U];
+ uint64_t f30 = f1[5U];
+ uint64_t f31 = f1[6U];
+ uint64_t f32 = f1[7U];
+ uint64_t f33 = f1[8U];
+ uint64_t f34 = f1[9U];
+ uint64_t f40 = f2[5U];
+ uint64_t f41 = f2[6U];
+ uint64_t f42 = f2[7U];
+ uint64_t f43 = f2[8U];
+ uint64_t f44 = f2[9U];
+ uint64_t tmp11 = f21 * (uint64_t)19U;
+ uint64_t tmp12 = f22 * (uint64_t)19U;
+ uint64_t tmp13 = f23 * (uint64_t)19U;
+ uint64_t tmp14 = f24 * (uint64_t)19U;
+ uint64_t tmp21 = f41 * (uint64_t)19U;
+ uint64_t tmp22 = f42 * (uint64_t)19U;
+ uint64_t tmp23 = f43 * (uint64_t)19U;
+ uint64_t tmp24 = f44 * (uint64_t)19U;
+ FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20);
+ FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21);
+ FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22);
+ FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23);
+ FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24);
+ FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14));
+ FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20));
+ FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21));
+ FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22));
+ FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23));
+ FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13));
+ FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14));
+ FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20));
+ FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21));
+ FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22));
+ FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12));
+ FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13));
+ FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14));
+ FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20));
+ FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21));
+ FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11));
+ FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12));
+ FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13));
+ FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14));
+ FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20));
+ FStar_UInt128_uint128 tmp_w10 = o040;
+ FStar_UInt128_uint128 tmp_w11 = o140;
+ FStar_UInt128_uint128 tmp_w12 = o240;
+ FStar_UInt128_uint128 tmp_w13 = o340;
+ FStar_UInt128_uint128 tmp_w14 = o440;
+ FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40);
+ FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41);
+ FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42);
+ FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43);
+ FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44);
+ FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24));
+ FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40));
+ FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41));
+ FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42));
+ FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43));
+ FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23));
+ FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24));
+ FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40));
+ FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41));
+ FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42));
+ FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22));
+ FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23));
+ FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24));
+ FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40));
+ FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41));
+ FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21));
+ FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22));
+ FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23));
+ FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24));
+ FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40));
+ FStar_UInt128_uint128 tmp_w20 = o04;
+ FStar_UInt128_uint128 tmp_w21 = o141;
+ FStar_UInt128_uint128 tmp_w22 = o241;
+ FStar_UInt128_uint128 tmp_w23 = o34;
+ FStar_UInt128_uint128 tmp_w24 = o44;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00));
+ uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10));
+ uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20));
+ uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30));
+ uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp00 + c40 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c50 = l_4 >> (uint32_t)51U;
+ uint64_t o100 = tmp0_;
+ uint64_t o112 = tmp10 + c50;
+ uint64_t o122 = tmp20;
+ uint64_t o132 = tmp30;
+ uint64_t o142 = tmp40;
+ FStar_UInt128_uint128
+ l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U));
+ FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U));
+ FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U));
+ FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U));
+ FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U));
+ uint64_t l_10 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_10 >> (uint32_t)51U;
+ uint64_t o200 = tmp0_0;
+ uint64_t o212 = tmp1 + c5;
+ uint64_t o222 = tmp2;
+ uint64_t o232 = tmp3;
+ uint64_t o242 = tmp4;
+ uint64_t o10 = o100;
+ uint64_t o11 = o112;
+ uint64_t o12 = o122;
+ uint64_t o13 = o132;
+ uint64_t o14 = o142;
+ uint64_t o20 = o200;
+ uint64_t o21 = o212;
+ uint64_t o22 = o222;
+ uint64_t o23 = o232;
+ uint64_t o24 = o242;
+ out[0U] = o10;
+ out[1U] = o11;
+ out[2U] = o12;
+ out[3U] = o13;
+ out[4U] = o14;
+ out[5U] = o20;
+ out[6U] = o21;
+ out[7U] = o22;
+ out[8U] = o23;
+ out[9U] = o24;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fmul1(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10);
+ FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11);
+ FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12);
+ FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13);
+ FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14);
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp1 + c5;
+ uint64_t o2 = tmp2;
+ uint64_t o3 = tmp3;
+ uint64_t o4 = tmp4;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fsqr(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t f3 = f[3U];
+ uint64_t f4 = f[4U];
+ uint64_t d0 = (uint64_t)2U * f0;
+ uint64_t d1 = (uint64_t)2U * f1;
+ uint64_t d2 = (uint64_t)38U * f2;
+ uint64_t d3 = (uint64_t)19U * f3;
+ uint64_t d419 = (uint64_t)19U * f4;
+ uint64_t d4 = (uint64_t)2U * d419;
+ FStar_UInt128_uint128
+ s0 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0),
+ FStar_UInt128_mul_wide(d4, f1)),
+ FStar_UInt128_mul_wide(d2, f3));
+ FStar_UInt128_uint128
+ s1 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1),
+ FStar_UInt128_mul_wide(d4, f2)),
+ FStar_UInt128_mul_wide(d3, f3));
+ FStar_UInt128_uint128
+ s2 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2),
+ FStar_UInt128_mul_wide(f1, f1)),
+ FStar_UInt128_mul_wide(d4, f3));
+ FStar_UInt128_uint128
+ s3 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3),
+ FStar_UInt128_mul_wide(d1, f2)),
+ FStar_UInt128_mul_wide(f4, d419));
+ FStar_UInt128_uint128
+ s4 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4),
+ FStar_UInt128_mul_wide(d1, f3)),
+ FStar_UInt128_mul_wide(f2, f2));
+ FStar_UInt128_uint128 o00 = s0;
+ FStar_UInt128_uint128 o10 = s1;
+ FStar_UInt128_uint128 o20 = s2;
+ FStar_UInt128_uint128 o30 = s3;
+ FStar_UInt128_uint128 o40 = s4;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp1 + c5;
+ uint64_t o2 = tmp2;
+ uint64_t o3 = tmp3;
+ uint64_t o4 = tmp4;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_fsqr2(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___)
+{
+ uint64_t f10 = f[0U];
+ uint64_t f11 = f[1U];
+ uint64_t f12 = f[2U];
+ uint64_t f13 = f[3U];
+ uint64_t f14 = f[4U];
+ uint64_t f20 = f[5U];
+ uint64_t f21 = f[6U];
+ uint64_t f22 = f[7U];
+ uint64_t f23 = f[8U];
+ uint64_t f24 = f[9U];
+ uint64_t d00 = (uint64_t)2U * f10;
+ uint64_t d10 = (uint64_t)2U * f11;
+ uint64_t d20 = (uint64_t)38U * f12;
+ uint64_t d30 = (uint64_t)19U * f13;
+ uint64_t d4190 = (uint64_t)19U * f14;
+ uint64_t d40 = (uint64_t)2U * d4190;
+ FStar_UInt128_uint128
+ s00 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10),
+ FStar_UInt128_mul_wide(d40, f11)),
+ FStar_UInt128_mul_wide(d20, f13));
+ FStar_UInt128_uint128
+ s10 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11),
+ FStar_UInt128_mul_wide(d40, f12)),
+ FStar_UInt128_mul_wide(d30, f13));
+ FStar_UInt128_uint128
+ s20 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12),
+ FStar_UInt128_mul_wide(f11, f11)),
+ FStar_UInt128_mul_wide(d40, f13));
+ FStar_UInt128_uint128
+ s30 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13),
+ FStar_UInt128_mul_wide(d10, f12)),
+ FStar_UInt128_mul_wide(f14, d4190));
+ FStar_UInt128_uint128
+ s40 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14),
+ FStar_UInt128_mul_wide(d10, f13)),
+ FStar_UInt128_mul_wide(f12, f12));
+ FStar_UInt128_uint128 o100 = s00;
+ FStar_UInt128_uint128 o110 = s10;
+ FStar_UInt128_uint128 o120 = s20;
+ FStar_UInt128_uint128 o130 = s30;
+ FStar_UInt128_uint128 o140 = s40;
+ uint64_t d0 = (uint64_t)2U * f20;
+ uint64_t d1 = (uint64_t)2U * f21;
+ uint64_t d2 = (uint64_t)38U * f22;
+ uint64_t d3 = (uint64_t)19U * f23;
+ uint64_t d419 = (uint64_t)19U * f24;
+ uint64_t d4 = (uint64_t)2U * d419;
+ FStar_UInt128_uint128
+ s0 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20),
+ FStar_UInt128_mul_wide(d4, f21)),
+ FStar_UInt128_mul_wide(d2, f23));
+ FStar_UInt128_uint128
+ s1 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21),
+ FStar_UInt128_mul_wide(d4, f22)),
+ FStar_UInt128_mul_wide(d3, f23));
+ FStar_UInt128_uint128
+ s2 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22),
+ FStar_UInt128_mul_wide(f21, f21)),
+ FStar_UInt128_mul_wide(d4, f23));
+ FStar_UInt128_uint128
+ s3 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23),
+ FStar_UInt128_mul_wide(d1, f22)),
+ FStar_UInt128_mul_wide(f24, d419));
+ FStar_UInt128_uint128
+ s4 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24),
+ FStar_UInt128_mul_wide(d1, f23)),
+ FStar_UInt128_mul_wide(f22, f22));
+ FStar_UInt128_uint128 o200 = s0;
+ FStar_UInt128_uint128 o210 = s1;
+ FStar_UInt128_uint128 o220 = s2;
+ FStar_UInt128_uint128 o230 = s3;
+ FStar_UInt128_uint128 o240 = s4;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00));
+ uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10));
+ uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20));
+ uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30));
+ uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp00 + c40 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c50 = l_4 >> (uint32_t)51U;
+ uint64_t o101 = tmp0_;
+ uint64_t o111 = tmp10 + c50;
+ uint64_t o121 = tmp20;
+ uint64_t o131 = tmp30;
+ uint64_t o141 = tmp40;
+ FStar_UInt128_uint128
+ l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U));
+ FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U));
+ FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U));
+ FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U));
+ FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U));
+ uint64_t l_10 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_10 >> (uint32_t)51U;
+ uint64_t o201 = tmp0_0;
+ uint64_t o211 = tmp1 + c5;
+ uint64_t o221 = tmp2;
+ uint64_t o231 = tmp3;
+ uint64_t o241 = tmp4;
+ uint64_t o10 = o101;
+ uint64_t o11 = o111;
+ uint64_t o12 = o121;
+ uint64_t o13 = o131;
+ uint64_t o14 = o141;
+ uint64_t o20 = o201;
+ uint64_t o21 = o211;
+ uint64_t o22 = o221;
+ uint64_t o23 = o231;
+ uint64_t o24 = o241;
+ out[0U] = o10;
+ out[1U] = o11;
+ out[2U] = o12;
+ out[3U] = o13;
+ out[4U] = o14;
+ out[5U] = o20;
+ out[6U] = o21;
+ out[7U] = o22;
+ out[8U] = o23;
+ out[9U] = o24;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_store_felem(uint64_t *u64s, uint64_t *f)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t f3 = f[3U];
+ uint64_t f4 = f[4U];
+ uint64_t l_ = f0 + (uint64_t)0U;
+ uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = l_ >> (uint32_t)51U;
+ uint64_t l_0 = f1 + c0;
+ uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = l_0 >> (uint32_t)51U;
+ uint64_t l_1 = f2 + c1;
+ uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = l_1 >> (uint32_t)51U;
+ uint64_t l_2 = f3 + c2;
+ uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = l_2 >> (uint32_t)51U;
+ uint64_t l_3 = f4 + c3;
+ uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = l_3 >> (uint32_t)51U;
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t f01 = tmp0_;
+ uint64_t f11 = tmp1 + c5;
+ uint64_t f21 = tmp2;
+ uint64_t f31 = tmp3;
+ uint64_t f41 = tmp4;
+ uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU);
+ uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU);
+ uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU);
+ uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU);
+ uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU);
+ uint64_t mask = (((m0 & m1) & m2) & m3) & m4;
+ uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU);
+ uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f02 = f0_;
+ uint64_t f12 = f1_;
+ uint64_t f22 = f2_;
+ uint64_t f32 = f3_;
+ uint64_t f42 = f4_;
+ uint64_t o00 = f02 | f12 << (uint32_t)51U;
+ uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U;
+ uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U;
+ uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U;
+ uint64_t o0 = o00;
+ uint64_t o1 = o10;
+ uint64_t o2 = o20;
+ uint64_t o3 = o30;
+ u64s[0U] = o0;
+ u64s[1U] = o1;
+ u64s[2U] = o2;
+ u64s[3U] = o3;
+}
+
+static inline void
+Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2)
+{
+ uint64_t mask = (uint64_t)0U - bit;
+ KRML_MAYBE_FOR10(i,
+ (uint32_t)0U,
+ (uint32_t)10U,
+ (uint32_t)1U,
+ uint64_t dummy = mask & (p1[i] ^ p2[i]);
+ p1[i] = p1[i] ^ dummy;
+ p2[i] = p2[i] ^ dummy;);
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Bignum25519_51_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Bignum_Base.h b/security/nss/lib/freebl/verified/internal/Hacl_Bignum_Base.h
new file mode 100644
index 0000000000..a6e4fe63f0
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Bignum_Base.h
@@ -0,0 +1,444 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Bignum_Base_H
+#define __internal_Hacl_Bignum_Base_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/internal/builtin.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "Hacl_Krmllib.h"
+#include "lib_intrinsics.h"
+
+static inline uint32_t
+Hacl_Bignum_Base_mul_wide_add2_u32(uint32_t a, uint32_t b, uint32_t c_in, uint32_t *out)
+{
+ uint32_t out0 = out[0U];
+ uint64_t res = (uint64_t)a * (uint64_t)b + (uint64_t)c_in + (uint64_t)out0;
+ out[0U] = (uint32_t)res;
+ return (uint32_t)(res >> (uint32_t)32U);
+}
+
+static inline uint64_t
+Hacl_Bignum_Base_mul_wide_add2_u64(uint64_t a, uint64_t b, uint64_t c_in, uint64_t *out)
+{
+ uint64_t out0 = out[0U];
+ FStar_UInt128_uint128
+ res =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(a, b),
+ FStar_UInt128_uint64_to_uint128(c_in)),
+ FStar_UInt128_uint64_to_uint128(out0));
+ out[0U] = FStar_UInt128_uint128_to_uint64(res);
+ return FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res, (uint32_t)64U));
+}
+
+static inline void
+Hacl_Bignum_Convert_bn_from_bytes_be_uint64(uint32_t len, uint8_t *b, uint64_t *res)
+{
+ uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+ uint32_t tmpLen = (uint32_t)8U * bnLen;
+ KRML_CHECK_SIZE(sizeof(uint8_t), tmpLen);
+ uint8_t *tmp = (uint8_t *)alloca(tmpLen * sizeof(uint8_t));
+ memset(tmp, 0U, tmpLen * sizeof(uint8_t));
+ memcpy(tmp + tmpLen - len, b, len * sizeof(uint8_t));
+ for (uint32_t i = (uint32_t)0U; i < bnLen; i++) {
+ uint64_t *os = res;
+ uint64_t u = load64_be(tmp + (bnLen - i - (uint32_t)1U) * (uint32_t)8U);
+ uint64_t x = u;
+ os[i] = x;
+ }
+}
+
+static inline void
+Hacl_Bignum_Convert_bn_to_bytes_be_uint64(uint32_t len, uint64_t *b, uint8_t *res)
+{
+ uint32_t bnLen = (len - (uint32_t)1U) / (uint32_t)8U + (uint32_t)1U;
+ uint32_t tmpLen = (uint32_t)8U * bnLen;
+ KRML_CHECK_SIZE(sizeof(uint8_t), tmpLen);
+ uint8_t *tmp = (uint8_t *)alloca(tmpLen * sizeof(uint8_t));
+ memset(tmp, 0U, tmpLen * sizeof(uint8_t));
+ for (uint32_t i = (uint32_t)0U; i < bnLen; i++) {
+ store64_be(tmp + i * (uint32_t)8U, b[bnLen - i - (uint32_t)1U]);
+ }
+ memcpy(res, tmp + tmpLen - len, len * sizeof(uint8_t));
+}
+
+static inline uint32_t
+Hacl_Bignum_Lib_bn_get_top_index_u32(uint32_t len, uint32_t *b)
+{
+ uint32_t priv = (uint32_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < len; i++) {
+ uint32_t mask = FStar_UInt32_eq_mask(b[i], (uint32_t)0U);
+ priv = (mask & priv) | (~mask & i);
+ }
+ return priv;
+}
+
+static inline uint64_t
+Hacl_Bignum_Lib_bn_get_top_index_u64(uint32_t len, uint64_t *b)
+{
+ uint64_t priv = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < len; i++) {
+ uint64_t mask = FStar_UInt64_eq_mask(b[i], (uint64_t)0U);
+ priv = (mask & priv) | (~mask & (uint64_t)i);
+ }
+ return priv;
+}
+
+static inline uint32_t
+Hacl_Bignum_Lib_bn_get_bits_u32(uint32_t len, uint32_t *b, uint32_t i, uint32_t l)
+{
+ uint32_t i1 = i / (uint32_t)32U;
+ uint32_t j = i % (uint32_t)32U;
+ uint32_t p1 = b[i1] >> j;
+ uint32_t ite;
+ if (i1 + (uint32_t)1U < len && (uint32_t)0U < j) {
+ ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)32U - j);
+ } else {
+ ite = p1;
+ }
+ return ite & (((uint32_t)1U << l) - (uint32_t)1U);
+}
+
+static inline uint64_t
+Hacl_Bignum_Lib_bn_get_bits_u64(uint32_t len, uint64_t *b, uint32_t i, uint32_t l)
+{
+ uint32_t i1 = i / (uint32_t)64U;
+ uint32_t j = i % (uint32_t)64U;
+ uint64_t p1 = b[i1] >> j;
+ uint64_t ite;
+ if (i1 + (uint32_t)1U < len && (uint32_t)0U < j) {
+ ite = p1 | b[i1 + (uint32_t)1U] << ((uint32_t)64U - j);
+ } else {
+ ite = p1;
+ }
+ return ite & (((uint64_t)1U << l) - (uint64_t)1U);
+}
+
+static inline uint32_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res)
+{
+ uint32_t c = (uint32_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint32_t t1 = a[(uint32_t)4U * i];
+ uint32_t t20 = b[(uint32_t)4U * i];
+ uint32_t *res_i0 = res + (uint32_t)4U * i;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t20, res_i0);
+ uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t10, t21, res_i1);
+ uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t11, t22, res_i2);
+ uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t12, t2, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint32_t t1 = a[i];
+ uint32_t t2 = b[i];
+ uint32_t *res_i = res + i;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u32(c, t1, t2, res_i);
+ }
+ return c;
+}
+
+static inline uint64_t
+Hacl_Bignum_Addition_bn_sub_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res)
+{
+ uint64_t c = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint64_t t1 = a[(uint32_t)4U * i];
+ uint64_t t20 = b[(uint32_t)4U * i];
+ uint64_t *res_i0 = res + (uint32_t)4U * i;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t20, res_i0);
+ uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t10, t21, res_i1);
+ uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t11, t22, res_i2);
+ uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t12, t2, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint64_t t1 = a[i];
+ uint64_t t2 = b[i];
+ uint64_t *res_i = res + i;
+ c = Lib_IntTypes_Intrinsics_sub_borrow_u64(c, t1, t2, res_i);
+ }
+ return c;
+}
+
+static inline uint32_t
+Hacl_Bignum_Addition_bn_add_eq_len_u32(uint32_t aLen, uint32_t *a, uint32_t *b, uint32_t *res)
+{
+ uint32_t c = (uint32_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint32_t t1 = a[(uint32_t)4U * i];
+ uint32_t t20 = b[(uint32_t)4U * i];
+ uint32_t *res_i0 = res + (uint32_t)4U * i;
+ c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t20, res_i0);
+ uint32_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t10, t21, res_i1);
+ uint32_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t11, t22, res_i2);
+ uint32_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t12, t2, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint32_t t1 = a[i];
+ uint32_t t2 = b[i];
+ uint32_t *res_i = res + i;
+ c = Lib_IntTypes_Intrinsics_add_carry_u32(c, t1, t2, res_i);
+ }
+ return c;
+}
+
+static inline uint64_t
+Hacl_Bignum_Addition_bn_add_eq_len_u64(uint32_t aLen, uint64_t *a, uint64_t *b, uint64_t *res)
+{
+ uint64_t c = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint64_t t1 = a[(uint32_t)4U * i];
+ uint64_t t20 = b[(uint32_t)4U * i];
+ uint64_t *res_i0 = res + (uint32_t)4U * i;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t20, res_i0);
+ uint64_t t10 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t t21 = b[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = res + (uint32_t)4U * i + (uint32_t)1U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t10, t21, res_i1);
+ uint64_t t11 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t t22 = b[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = res + (uint32_t)4U * i + (uint32_t)2U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t11, t22, res_i2);
+ uint64_t t12 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t t2 = b[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = res + (uint32_t)4U * i + (uint32_t)3U;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t12, t2, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint64_t t1 = a[i];
+ uint64_t t2 = b[i];
+ uint64_t *res_i = res + i;
+ c = Lib_IntTypes_Intrinsics_add_carry_u64(c, t1, t2, res_i);
+ }
+ return c;
+}
+
+static inline void
+Hacl_Bignum_Multiplication_bn_mul_u32(
+ uint32_t aLen,
+ uint32_t *a,
+ uint32_t bLen,
+ uint32_t *b,
+ uint32_t *res)
+{
+ memset(res, 0U, (aLen + bLen) * sizeof(uint32_t));
+ for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++) {
+ uint32_t bj = b[i0];
+ uint32_t *res_j = res + i0;
+ uint32_t c = (uint32_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint32_t a_i = a[(uint32_t)4U * i];
+ uint32_t *res_i0 = res_j + (uint32_t)4U * i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i0);
+ uint32_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, bj, c, res_i1);
+ uint32_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, bj, c, res_i2);
+ uint32_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, bj, c, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint32_t a_i = a[i];
+ uint32_t *res_i = res_j + i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, bj, c, res_i);
+ }
+ uint32_t r = c;
+ res[aLen + i0] = r;
+ }
+}
+
+static inline void
+Hacl_Bignum_Multiplication_bn_mul_u64(
+ uint32_t aLen,
+ uint64_t *a,
+ uint32_t bLen,
+ uint64_t *b,
+ uint64_t *res)
+{
+ memset(res, 0U, (aLen + bLen) * sizeof(uint64_t));
+ for (uint32_t i0 = (uint32_t)0U; i0 < bLen; i0++) {
+ uint64_t bj = b[i0];
+ uint64_t *res_j = res + i0;
+ uint64_t c = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < aLen / (uint32_t)4U; i++) {
+ uint64_t a_i = a[(uint32_t)4U * i];
+ uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i0);
+ uint64_t a_i0 = a[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, bj, c, res_i1);
+ uint64_t a_i1 = a[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, bj, c, res_i2);
+ uint64_t a_i2 = a[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, bj, c, res_i);
+ }
+ for (uint32_t i = aLen / (uint32_t)4U * (uint32_t)4U; i < aLen; i++) {
+ uint64_t a_i = a[i];
+ uint64_t *res_i = res_j + i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, bj, c, res_i);
+ }
+ uint64_t r = c;
+ res[aLen + i0] = r;
+ }
+}
+
+static inline void
+Hacl_Bignum_Multiplication_bn_sqr_u32(uint32_t aLen, uint32_t *a, uint32_t *res)
+{
+ memset(res, 0U, (aLen + aLen) * sizeof(uint32_t));
+ for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++) {
+ uint32_t *ab = a;
+ uint32_t a_j = a[i0];
+ uint32_t *res_j = res + i0;
+ uint32_t c = (uint32_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U; i++) {
+ uint32_t a_i = ab[(uint32_t)4U * i];
+ uint32_t *res_i0 = res_j + (uint32_t)4U * i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i0);
+ uint32_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+ uint32_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i0, a_j, c, res_i1);
+ uint32_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+ uint32_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i1, a_j, c, res_i2);
+ uint32_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+ uint32_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i2, a_j, c, res_i);
+ }
+ for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) {
+ uint32_t a_i = ab[i];
+ uint32_t *res_i = res_j + i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u32(a_i, a_j, c, res_i);
+ }
+ uint32_t r = c;
+ res[i0 + i0] = r;
+ }
+ uint32_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, res, res);
+ KRML_HOST_IGNORE(c0);
+ KRML_CHECK_SIZE(sizeof(uint32_t), aLen + aLen);
+ uint32_t *tmp = (uint32_t *)alloca((aLen + aLen) * sizeof(uint32_t));
+ memset(tmp, 0U, (aLen + aLen) * sizeof(uint32_t));
+ for (uint32_t i = (uint32_t)0U; i < aLen; i++) {
+ uint64_t res1 = (uint64_t)a[i] * (uint64_t)a[i];
+ uint32_t hi = (uint32_t)(res1 >> (uint32_t)32U);
+ uint32_t lo = (uint32_t)res1;
+ tmp[(uint32_t)2U * i] = lo;
+ tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+ }
+ uint32_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u32(aLen + aLen, res, tmp, res);
+ KRML_HOST_IGNORE(c1);
+}
+
+static inline void
+Hacl_Bignum_Multiplication_bn_sqr_u64(uint32_t aLen, uint64_t *a, uint64_t *res)
+{
+ memset(res, 0U, (aLen + aLen) * sizeof(uint64_t));
+ for (uint32_t i0 = (uint32_t)0U; i0 < aLen; i0++) {
+ uint64_t *ab = a;
+ uint64_t a_j = a[i0];
+ uint64_t *res_j = res + i0;
+ uint64_t c = (uint64_t)0U;
+ for (uint32_t i = (uint32_t)0U; i < i0 / (uint32_t)4U; i++) {
+ uint64_t a_i = ab[(uint32_t)4U * i];
+ uint64_t *res_i0 = res_j + (uint32_t)4U * i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i0);
+ uint64_t a_i0 = ab[(uint32_t)4U * i + (uint32_t)1U];
+ uint64_t *res_i1 = res_j + (uint32_t)4U * i + (uint32_t)1U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i0, a_j, c, res_i1);
+ uint64_t a_i1 = ab[(uint32_t)4U * i + (uint32_t)2U];
+ uint64_t *res_i2 = res_j + (uint32_t)4U * i + (uint32_t)2U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i1, a_j, c, res_i2);
+ uint64_t a_i2 = ab[(uint32_t)4U * i + (uint32_t)3U];
+ uint64_t *res_i = res_j + (uint32_t)4U * i + (uint32_t)3U;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i2, a_j, c, res_i);
+ }
+ for (uint32_t i = i0 / (uint32_t)4U * (uint32_t)4U; i < i0; i++) {
+ uint64_t a_i = ab[i];
+ uint64_t *res_i = res_j + i;
+ c = Hacl_Bignum_Base_mul_wide_add2_u64(a_i, a_j, c, res_i);
+ }
+ uint64_t r = c;
+ res[i0 + i0] = r;
+ }
+ uint64_t c0 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, res, res);
+ KRML_HOST_IGNORE(c0);
+ KRML_CHECK_SIZE(sizeof(uint64_t), aLen + aLen);
+ uint64_t *tmp = (uint64_t *)alloca((aLen + aLen) * sizeof(uint64_t));
+ memset(tmp, 0U, (aLen + aLen) * sizeof(uint64_t));
+ for (uint32_t i = (uint32_t)0U; i < aLen; i++) {
+ FStar_UInt128_uint128 res1 = FStar_UInt128_mul_wide(a[i], a[i]);
+ uint64_t hi = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(res1, (uint32_t)64U));
+ uint64_t lo = FStar_UInt128_uint128_to_uint64(res1);
+ tmp[(uint32_t)2U * i] = lo;
+ tmp[(uint32_t)2U * i + (uint32_t)1U] = hi;
+ }
+ uint64_t c1 = Hacl_Bignum_Addition_bn_add_eq_len_u64(aLen + aLen, res, tmp, res);
+ KRML_HOST_IGNORE(c1);
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Bignum_Base_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h
new file mode 100644
index 0000000000..d0edbc3cea
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h
@@ -0,0 +1,51 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Chacha20_H
+#define __internal_Hacl_Chacha20_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Chacha20.h"
+
+extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U];
+
+void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr);
+
+void
+Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Chacha20_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h
new file mode 100644
index 0000000000..cb4eb15ffa
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h
@@ -0,0 +1,55 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Curve25519_51_H
+#define __internal_Hacl_Curve25519_51_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum25519_51.h"
+#include "../Hacl_Curve25519_51.h"
+
+void
+Hacl_Curve25519_51_fsquare_times(
+ uint64_t *o,
+ uint64_t *inp,
+ FStar_UInt128_uint128 *tmp,
+ uint32_t n);
+
+void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Curve25519_51_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA3.h b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA3.h
new file mode 100644
index 0000000000..fedbe967f5
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA3.h
@@ -0,0 +1,62 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Hash_SHA3_H
+#define __internal_Hacl_Hash_SHA3_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Hash_SHA3.h"
+
+void
+Hacl_Hash_SHA3_update_multi_sha3(
+ Spec_Hash_Definitions_hash_alg a,
+ uint64_t *s,
+ uint8_t *blocks,
+ uint32_t n_blocks);
+
+void
+Hacl_Hash_SHA3_update_last_sha3(
+ Spec_Hash_Definitions_hash_alg a,
+ uint64_t *s,
+ uint8_t *input,
+ uint32_t input_len);
+
+void Hacl_Impl_SHA3_state_permute(uint64_t *s);
+
+void Hacl_Impl_SHA3_loadState(uint32_t rateInBytes, uint8_t *input, uint64_t *s);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Hash_SHA3_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h
new file mode 100644
index 0000000000..79836ae790
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h
@@ -0,0 +1,67 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Krmllib_H
+#define __internal_Hacl_Krmllib_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Krmllib.h"
+
+static KRML_NOINLINE uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b);
+
+static KRML_NOINLINE uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b);
+
+static KRML_NOINLINE uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b);
+
+static KRML_NOINLINE uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_be(uint8_t *x0);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Krmllib_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_P256.h b/security/nss/lib/freebl/verified/internal/Hacl_P256.h
new file mode 100644
index 0000000000..6e07194d53
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_P256.h
@@ -0,0 +1,56 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_P256_H
+#define __internal_Hacl_P256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "internal/Hacl_P256_PrecompTable.h"
+#include "internal/Hacl_Krmllib.h"
+#include "internal/Hacl_Bignum_Base.h"
+#include "../Hacl_P256.h"
+#include "lib_intrinsics.h"
+
+bool Hacl_Impl_P256_DH_ecp256dh_i(uint8_t *public_key, uint8_t *private_key);
+
+bool
+Hacl_Impl_P256_DH_ecp256dh_r(
+ uint8_t *shared_secret,
+ uint8_t *their_pubkey,
+ uint8_t *private_key);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_P256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_P256_PrecompTable.h b/security/nss/lib/freebl/verified/internal/Hacl_P256_PrecompTable.h
new file mode 100644
index 0000000000..93e2591875
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_P256_PrecompTable.h
@@ -0,0 +1,508 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_P256_PrecompTable_H
+#define __internal_Hacl_P256_PrecompTable_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+static const uint64_t
+ Hacl_P256_PrecompTable_precomp_basepoint_table_w4[192U] = {
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)8784043285714375740U,
+ (uint64_t)8483257759279461889U, (uint64_t)8789745728267363600U, (uint64_t)1770019616739251654U,
+ (uint64_t)15992936863339206154U, (uint64_t)10037038012062884956U,
+ (uint64_t)15197544864945402661U, (uint64_t)9615747158586711429U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)10634854829044225757U, (uint64_t)351552716085025155U, (uint64_t)10645315080955407736U,
+ (uint64_t)3609262091244858135U, (uint64_t)15760741698986874125U,
+ (uint64_t)14936374388219697827U, (uint64_t)15751360096993017895U,
+ (uint64_t)18012233706239762398U, (uint64_t)1993877568177495041U,
+ (uint64_t)10345888787846536528U, (uint64_t)7746511691117935375U,
+ (uint64_t)14517043990409914413U, (uint64_t)14122549297570634151U,
+ (uint64_t)16934610359517083771U, (uint64_t)5724511325497097418U, (uint64_t)8983432969107448705U,
+ (uint64_t)2687429970334080245U, (uint64_t)16525396802810050288U, (uint64_t)7602596488871585854U,
+ (uint64_t)4813919589149203084U, (uint64_t)7680395813780804519U, (uint64_t)6687709583048023590U,
+ (uint64_t)18086445169104142027U, (uint64_t)9637814708330203929U,
+ (uint64_t)14785108459960679090U, (uint64_t)3838023279095023581U, (uint64_t)3555615526157830307U,
+ (uint64_t)5177066488380472871U, (uint64_t)18218186719108038403U,
+ (uint64_t)16281556341699656105U, (uint64_t)1524227924561461191U, (uint64_t)4148060517641909597U,
+ (uint64_t)2858290374115363433U, (uint64_t)8942772026334130620U, (uint64_t)3034451298319885113U,
+ (uint64_t)8447866036736640940U, (uint64_t)11204933433076256578U,
+ (uint64_t)18333595740249588297U, (uint64_t)8259597024804538246U, (uint64_t)9539734295777539786U,
+ (uint64_t)9797290423046626413U, (uint64_t)5777303437849646537U, (uint64_t)8739356909899132020U,
+ (uint64_t)14815960973766782158U, (uint64_t)15286581798204509801U,
+ (uint64_t)17597362577777019682U, (uint64_t)13259283710820519742U,
+ (uint64_t)10501322996899164670U, (uint64_t)1221138904338319642U,
+ (uint64_t)14586685489551951885U, (uint64_t)895326705426031212U, (uint64_t)14398171728560617847U,
+ (uint64_t)9592550823745097391U, (uint64_t)17240998489162206026U, (uint64_t)8085479283308189196U,
+ (uint64_t)14844657737893882826U, (uint64_t)15923425394150618234U,
+ (uint64_t)2997808084773249525U, (uint64_t)494323555453660587U, (uint64_t)1215695327517794764U,
+ (uint64_t)9476207381098391690U, (uint64_t)7480789678419122995U, (uint64_t)15212230329321082489U,
+ (uint64_t)436189395349576388U, (uint64_t)17377474396456660834U, (uint64_t)15237013929655017939U,
+ (uint64_t)11444428846883781676U, (uint64_t)5112749694521428575U, (uint64_t)950829367509872073U,
+ (uint64_t)17665036182057559519U, (uint64_t)17205133339690002313U,
+ (uint64_t)16233765170251334549U, (uint64_t)10122775683257972591U,
+ (uint64_t)3352514236455632420U, (uint64_t)9143148522359954691U, (uint64_t)601191684005658860U,
+ (uint64_t)13398772186646349998U, (uint64_t)15512696600132928431U,
+ (uint64_t)9128416073728948653U, (uint64_t)11233051033546138578U, (uint64_t)6769345682610122833U,
+ (uint64_t)10823233224575054288U, (uint64_t)9997725227559980175U, (uint64_t)6733425642852897415U,
+ (uint64_t)16302206918151466066U, (uint64_t)1669330822143265921U, (uint64_t)2661645605036546002U,
+ (uint64_t)17182558479745802165U, (uint64_t)1165082692376932040U, (uint64_t)9470595929011488359U,
+ (uint64_t)6142147329285324932U, (uint64_t)4829075085998111287U, (uint64_t)10231370681107338930U,
+ (uint64_t)9591876895322495239U, (uint64_t)10316468561384076618U,
+ (uint64_t)11592503647238064235U, (uint64_t)13395813606055179632U, (uint64_t)511127033980815508U,
+ (uint64_t)12434976573147649880U, (uint64_t)3425094795384359127U, (uint64_t)6816971736303023445U,
+ (uint64_t)15444670609021139344U, (uint64_t)9464349818322082360U,
+ (uint64_t)16178216413042376883U, (uint64_t)9595540370774317348U, (uint64_t)7229365182662875710U,
+ (uint64_t)4601177649460012843U, (uint64_t)5455046447382487090U, (uint64_t)10854066421606187521U,
+ (uint64_t)15913416821879788071U, (uint64_t)2297365362023460173U, (uint64_t)2603252216454941350U,
+ (uint64_t)6768791943870490934U, (uint64_t)15705936687122754810U, (uint64_t)9537096567546600694U,
+ (uint64_t)17580538144855035062U, (uint64_t)4496542856965746638U, (uint64_t)8444341625922124942U,
+ (uint64_t)12191263903636183168U, (uint64_t)17427332907535974165U,
+ (uint64_t)14307569739254103736U, (uint64_t)13900598742063266169U,
+ (uint64_t)7176996424355977650U, (uint64_t)5709008170379717479U, (uint64_t)14471312052264549092U,
+ (uint64_t)1464519909491759867U, (uint64_t)3328154641049602121U, (uint64_t)13020349337171136774U,
+ (uint64_t)2772166279972051938U, (uint64_t)10854476939425975292U, (uint64_t)1967189930534630940U,
+ (uint64_t)2802919076529341959U, (uint64_t)14792226094833519208U,
+ (uint64_t)14675640928566522177U, (uint64_t)14838974364643800837U,
+ (uint64_t)17631460696099549980U, (uint64_t)17434186275364935469U,
+ (uint64_t)2665648200587705473U, (uint64_t)13202122464492564051U, (uint64_t)7576287350918073341U,
+ (uint64_t)2272206013910186424U, (uint64_t)14558761641743937843U, (uint64_t)5675729149929979729U,
+ (uint64_t)9043135187561613166U, (uint64_t)11750149293830589225U, (uint64_t)740555197954307911U,
+ (uint64_t)9871738005087190699U, (uint64_t)17178667634283502053U,
+ (uint64_t)18046255991533013265U, (uint64_t)4458222096988430430U, (uint64_t)8452427758526311627U,
+ (uint64_t)13825286929656615266U, (uint64_t)13956286357198391218U,
+ (uint64_t)15875692916799995079U, (uint64_t)10634895319157013920U,
+ (uint64_t)13230116118036304207U, (uint64_t)8795317393614625606U, (uint64_t)7001710806858862020U,
+ (uint64_t)7949746088586183478U, (uint64_t)14677556044923602317U,
+ (uint64_t)11184023437485843904U, (uint64_t)11215864722023085094U,
+ (uint64_t)6444464081471519014U, (uint64_t)1706241174022415217U, (uint64_t)8243975633057550613U,
+ (uint64_t)15502902453836085864U, (uint64_t)3799182188594003953U, (uint64_t)3538840175098724094U
+ };
+
+static const uint64_t
+ Hacl_P256_PrecompTable_precomp_g_pow2_64_table_w4[192U] = {
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1499621593102562565U,
+ (uint64_t)16692369783039433128U, (uint64_t)15337520135922861848U,
+ (uint64_t)5455737214495366228U, (uint64_t)17827017231032529600U,
+ (uint64_t)12413621606240782649U, (uint64_t)2290483008028286132U,
+ (uint64_t)15752017553340844820U, (uint64_t)4846430910634234874U,
+ (uint64_t)10861682798464583253U, (uint64_t)15404737222404363049U, (uint64_t)363586619281562022U,
+ (uint64_t)9866710912401645115U, (uint64_t)1162548847543228595U, (uint64_t)7649967190445130486U,
+ (uint64_t)5212340432230915749U, (uint64_t)7572620550182916491U, (uint64_t)14876145112448665096U,
+ (uint64_t)2063227348838176167U, (uint64_t)3519435548295415847U, (uint64_t)8390400282019023103U,
+ (uint64_t)17666843593163037841U, (uint64_t)9450204148816496323U, (uint64_t)8483374507652916768U,
+ (uint64_t)6254661047265818424U, (uint64_t)16382127809582285023U, (uint64_t)125359443771153172U,
+ (uint64_t)1374336701588437897U, (uint64_t)11362596098420127726U, (uint64_t)2101654420738681387U,
+ (uint64_t)12772780342444840510U, (uint64_t)12546934328908550060U,
+ (uint64_t)8331880412333790397U, (uint64_t)11687262051473819904U, (uint64_t)8926848496503457587U,
+ (uint64_t)9603974142010467857U, (uint64_t)13199952163826973175U, (uint64_t)2189856264898797734U,
+ (uint64_t)11356074861870267226U, (uint64_t)2027714896422561895U, (uint64_t)5261606367808050149U,
+ (uint64_t)153855954337762312U, (uint64_t)6375919692894573986U, (uint64_t)12364041207536146533U,
+ (uint64_t)1891896010455057160U, (uint64_t)1568123795087313171U, (uint64_t)18138710056556660101U,
+ (uint64_t)6004886947510047736U, (uint64_t)4811859325589542932U, (uint64_t)3618763430148954981U,
+ (uint64_t)11434521746258554122U, (uint64_t)10086341535864049427U,
+ (uint64_t)8073421629570399570U, (uint64_t)12680586148814729338U, (uint64_t)9619958020761569612U,
+ (uint64_t)15827203580658384478U, (uint64_t)12832694810937550406U,
+ (uint64_t)14977975484447400910U, (uint64_t)5478002389061063653U,
+ (uint64_t)14731136312639060880U, (uint64_t)4317867687275472033U, (uint64_t)6642650962855259884U,
+ (uint64_t)2514254944289495285U, (uint64_t)14231405641534478436U, (uint64_t)4045448346091518946U,
+ (uint64_t)8985477013445972471U, (uint64_t)8869039454457032149U, (uint64_t)4356978486208692970U,
+ (uint64_t)10805288613335538577U, (uint64_t)12832353127812502042U,
+ (uint64_t)4576590051676547490U, (uint64_t)6728053735138655107U, (uint64_t)17814206719173206184U,
+ (uint64_t)79790138573994940U, (uint64_t)17920293215101822267U, (uint64_t)13422026625585728864U,
+ (uint64_t)5018058010492547271U, (uint64_t)110232326023384102U, (uint64_t)10834264070056942976U,
+ (uint64_t)15222249086119088588U, (uint64_t)15119439519142044997U,
+ (uint64_t)11655511970063167313U, (uint64_t)1614477029450566107U, (uint64_t)3619322817271059794U,
+ (uint64_t)9352862040415412867U, (uint64_t)14017522553242747074U,
+ (uint64_t)13138513643674040327U, (uint64_t)3610195242889455765U, (uint64_t)8371069193996567291U,
+ (uint64_t)12670227996544662654U, (uint64_t)1205961025092146303U,
+ (uint64_t)13106709934003962112U, (uint64_t)4350113471327723407U,
+ (uint64_t)15060941403739680459U, (uint64_t)13639127647823205030U,
+ (uint64_t)10790943339357725715U, (uint64_t)498760574280648264U, (uint64_t)17922071907832082887U,
+ (uint64_t)15122670976670152145U, (uint64_t)6275027991110214322U, (uint64_t)7250912847491816402U,
+ (uint64_t)15206617260142982380U, (uint64_t)3385668313694152877U,
+ (uint64_t)17522479771766801905U, (uint64_t)2965919117476170655U, (uint64_t)1553238516603269404U,
+ (uint64_t)5820770015631050991U, (uint64_t)4999445222232605348U, (uint64_t)9245650860833717444U,
+ (uint64_t)1508811811724230728U, (uint64_t)5190684913765614385U, (uint64_t)15692927070934536166U,
+ (uint64_t)12981978499190500902U, (uint64_t)5143491963193394698U, (uint64_t)7705698092144084129U,
+ (uint64_t)581120653055084783U, (uint64_t)13886552864486459714U, (uint64_t)6290301270652587255U,
+ (uint64_t)8663431529954393128U, (uint64_t)17033405846475472443U, (uint64_t)5206780355442651635U,
+ (uint64_t)12580364474736467688U, (uint64_t)17934601912005283310U,
+ (uint64_t)15119491731028933652U, (uint64_t)17848231399859044858U,
+ (uint64_t)4427673319524919329U, (uint64_t)2673607337074368008U, (uint64_t)14034876464294699949U,
+ (uint64_t)10938948975420813697U, (uint64_t)15202340615298669183U,
+ (uint64_t)5496603454069431071U, (uint64_t)2486526142064906845U, (uint64_t)4507882119510526802U,
+ (uint64_t)13888151172411390059U, (uint64_t)15049027856908071726U,
+ (uint64_t)9667231543181973158U, (uint64_t)6406671575277563202U, (uint64_t)3395801050331215139U,
+ (uint64_t)9813607433539108308U, (uint64_t)2681417728820980381U, (uint64_t)18407064643927113994U,
+ (uint64_t)7707177692113485527U, (uint64_t)14218149384635317074U, (uint64_t)3658668346206375919U,
+ (uint64_t)15404713991002362166U, (uint64_t)10152074687696195207U,
+ (uint64_t)10926946599582128139U, (uint64_t)16907298600007085320U,
+ (uint64_t)16544287219664720279U, (uint64_t)11007075933432813205U,
+ (uint64_t)8652245965145713599U, (uint64_t)7857626748965990384U, (uint64_t)5602306604520095870U,
+ (uint64_t)2525139243938658618U, (uint64_t)14405696176872077447U,
+ (uint64_t)18432270482137885332U, (uint64_t)9913880809120071177U,
+ (uint64_t)16896141737831216972U, (uint64_t)7484791498211214829U,
+ (uint64_t)15635259968266497469U, (uint64_t)8495118537612215624U, (uint64_t)4915477980562575356U,
+ (uint64_t)16453519279754924350U, (uint64_t)14462108244565406969U,
+ (uint64_t)14837837755237096687U, (uint64_t)14130171078892575346U,
+ (uint64_t)15423793222528491497U, (uint64_t)5460399262075036084U,
+ (uint64_t)16085440580308415349U, (uint64_t)26873200736954488U, (uint64_t)5603655807457499550U,
+ (uint64_t)3342202915871129617U, (uint64_t)1604413932150236626U, (uint64_t)9684226585089458974U,
+ (uint64_t)1213229904006618539U, (uint64_t)6782978662408837236U, (uint64_t)11197029877749307372U,
+ (uint64_t)14085968786551657744U, (uint64_t)17352273610494009342U,
+ (uint64_t)7876582961192434984U
+ };
+
+static const uint64_t
+ Hacl_P256_PrecompTable_precomp_g_pow2_128_table_w4[192U] = {
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)14619254753077084366U,
+ (uint64_t)13913835116514008593U, (uint64_t)15060744674088488145U,
+ (uint64_t)17668414598203068685U, (uint64_t)10761169236902342334U,
+ (uint64_t)15467027479157446221U, (uint64_t)14989185522423469618U,
+ (uint64_t)14354539272510107003U, (uint64_t)14298211796392133693U,
+ (uint64_t)13270323784253711450U, (uint64_t)13380964971965046957U,
+ (uint64_t)8686204248456909699U, (uint64_t)17434630286744937066U, (uint64_t)1355903775279084720U,
+ (uint64_t)7554695053550308662U, (uint64_t)11354971222741863570U, (uint64_t)564601613420749879U,
+ (uint64_t)8466325837259054896U, (uint64_t)10752965181772434263U,
+ (uint64_t)11405876547368426319U, (uint64_t)13791894568738930940U,
+ (uint64_t)8230587134406354675U, (uint64_t)12415514098722758608U,
+ (uint64_t)18414183046995786744U, (uint64_t)15508000368227372870U,
+ (uint64_t)5781062464627999307U, (uint64_t)15339429052219195590U,
+ (uint64_t)16038703753810741903U, (uint64_t)9587718938298980714U, (uint64_t)4822658817952386407U,
+ (uint64_t)1376351024833260660U, (uint64_t)1120174910554766702U, (uint64_t)1730170933262569274U,
+ (uint64_t)5187428548444533500U, (uint64_t)16242053503368957131U, (uint64_t)3036811119519868279U,
+ (uint64_t)1760267587958926638U, (uint64_t)170244572981065185U, (uint64_t)8063080791967388171U,
+ (uint64_t)4824892826607692737U, (uint64_t)16286391083472040552U,
+ (uint64_t)11945158615253358747U, (uint64_t)14096887760410224200U,
+ (uint64_t)1613720831904557039U, (uint64_t)14316966673761197523U,
+ (uint64_t)17411006201485445341U, (uint64_t)8112301506943158801U, (uint64_t)2069889233927989984U,
+ (uint64_t)10082848378277483927U, (uint64_t)3609691194454404430U, (uint64_t)6110437205371933689U,
+ (uint64_t)9769135977342231601U, (uint64_t)11977962151783386478U,
+ (uint64_t)18088718692559983573U, (uint64_t)11741637975753055U, (uint64_t)11110390325701582190U,
+ (uint64_t)1341402251566067019U, (uint64_t)3028229550849726478U, (uint64_t)10438984083997451310U,
+ (uint64_t)12730851885100145709U, (uint64_t)11524169532089894189U,
+ (uint64_t)4523375903229602674U, (uint64_t)2028602258037385622U, (uint64_t)17082839063089388410U,
+ (uint64_t)6103921364634113167U, (uint64_t)17066180888225306102U,
+ (uint64_t)11395680486707876195U, (uint64_t)10952892272443345484U,
+ (uint64_t)8792831960605859401U, (uint64_t)14194485427742325139U,
+ (uint64_t)15146020821144305250U, (uint64_t)1654766014957123343U, (uint64_t)7955526243090948551U,
+ (uint64_t)3989277566080493308U, (uint64_t)12229385116397931231U,
+ (uint64_t)13430548930727025562U, (uint64_t)3434892688179800602U, (uint64_t)8431998794645622027U,
+ (uint64_t)12132530981596299272U, (uint64_t)2289461608863966999U,
+ (uint64_t)18345870950201487179U, (uint64_t)13517947207801901576U,
+ (uint64_t)5213113244172561159U, (uint64_t)17632986594098340879U, (uint64_t)4405251818133148856U,
+ (uint64_t)11783009269435447793U, (uint64_t)9332138983770046035U,
+ (uint64_t)12863411548922539505U, (uint64_t)3717030292816178224U,
+ (uint64_t)10026078446427137374U, (uint64_t)11167295326594317220U,
+ (uint64_t)12425328773141588668U, (uint64_t)5760335125172049352U, (uint64_t)9016843701117277863U,
+ (uint64_t)5657892835694680172U, (uint64_t)11025130589305387464U, (uint64_t)1368484957977406173U,
+ (uint64_t)17361351345281258834U, (uint64_t)1907113641956152700U,
+ (uint64_t)16439233413531427752U, (uint64_t)5893322296986588932U,
+ (uint64_t)14000206906171746627U, (uint64_t)14979266987545792900U,
+ (uint64_t)6926291766898221120U, (uint64_t)7162023296083360752U, (uint64_t)14762747553625382529U,
+ (uint64_t)12610831658612406849U, (uint64_t)10462926899548715515U,
+ (uint64_t)4794017723140405312U, (uint64_t)5234438200490163319U, (uint64_t)8019519110339576320U,
+ (uint64_t)7194604241290530100U, (uint64_t)12626770134810813246U,
+ (uint64_t)10793074474236419890U, (uint64_t)11323224347913978783U,
+ (uint64_t)16831128015895380245U, (uint64_t)18323094195124693378U,
+ (uint64_t)2361097165281567692U, (uint64_t)15755578675014279498U,
+ (uint64_t)14289876470325854580U, (uint64_t)12856787656093616839U,
+ (uint64_t)3578928531243900594U, (uint64_t)3847532758790503699U, (uint64_t)8377953190224748743U,
+ (uint64_t)3314546646092744596U, (uint64_t)800810188859334358U, (uint64_t)4626344124229343596U,
+ (uint64_t)6620381605850876621U, (uint64_t)11422073570955989527U,
+ (uint64_t)12676813626484814469U, (uint64_t)16725029886764122240U,
+ (uint64_t)16648497372773830008U, (uint64_t)9135702594931291048U,
+ (uint64_t)16080949688826680333U, (uint64_t)11528096561346602947U,
+ (uint64_t)2632498067099740984U, (uint64_t)11583842699108800714U, (uint64_t)8378404864573610526U,
+ (uint64_t)1076560261627788534U, (uint64_t)13836015994325032828U,
+ (uint64_t)11234295937817067909U, (uint64_t)5893659808396722708U,
+ (uint64_t)11277421142886984364U, (uint64_t)8968549037166726491U,
+ (uint64_t)14841374331394032822U, (uint64_t)9967344773947889341U, (uint64_t)8799244393578496085U,
+ (uint64_t)5094686877301601410U, (uint64_t)8780316747074726862U, (uint64_t)9119697306829835718U,
+ (uint64_t)15381243327921855368U, (uint64_t)2686250164449435196U,
+ (uint64_t)16466917280442198358U, (uint64_t)13791704489163125216U,
+ (uint64_t)16955859337117924272U, (uint64_t)17112836394923783642U,
+ (uint64_t)4639176427338618063U, (uint64_t)16770029310141094964U,
+ (uint64_t)11049953922966416185U, (uint64_t)12012669590884098968U,
+ (uint64_t)4859326885929417214U, (uint64_t)896380084392586061U, (uint64_t)7153028362977034008U,
+ (uint64_t)10540021163316263301U, (uint64_t)9318277998512936585U,
+ (uint64_t)18344496977694796523U, (uint64_t)11374737400567645494U,
+ (uint64_t)17158800051138212954U, (uint64_t)18343197867863253153U,
+ (uint64_t)18204799297967861226U, (uint64_t)15798973531606348828U,
+ (uint64_t)9870158263408310459U, (uint64_t)17578869832774612627U, (uint64_t)8395748875822696932U,
+ (uint64_t)15310679007370670872U, (uint64_t)11205576736030808860U,
+ (uint64_t)10123429210002838967U, (uint64_t)5910544144088393959U,
+ (uint64_t)14016615653353687369U, (uint64_t)11191676704772957822U
+ };
+
+static const uint64_t
+ Hacl_P256_PrecompTable_precomp_g_pow2_192_table_w4[192U] = {
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)7870395003430845958U,
+ (uint64_t)18001862936410067720U, (uint64_t)8006461232116967215U, (uint64_t)5921313779532424762U,
+ (uint64_t)10702113371959864307U, (uint64_t)8070517410642379879U, (uint64_t)7139806720777708306U,
+ (uint64_t)8253938546650739833U, (uint64_t)17490482834545705718U, (uint64_t)1065249776797037500U,
+ (uint64_t)5018258455937968775U, (uint64_t)14100621120178668337U, (uint64_t)8392845221328116213U,
+ (uint64_t)14630296398338540788U, (uint64_t)4268947906723414372U, (uint64_t)9231207002243517909U,
+ (uint64_t)14261219637616504262U, (uint64_t)7786881626982345356U,
+ (uint64_t)11412720751765882139U, (uint64_t)14119585051365330009U,
+ (uint64_t)15281626286521302128U, (uint64_t)6350171933454266732U,
+ (uint64_t)16559468304937127866U, (uint64_t)13200760478271693417U,
+ (uint64_t)6733381546280350776U, (uint64_t)3801404890075189193U, (uint64_t)2741036364686993903U,
+ (uint64_t)3218612940540174008U, (uint64_t)10894914335165419505U,
+ (uint64_t)11862941430149998362U, (uint64_t)4223151729402839584U, (uint64_t)2913215088487087887U,
+ (uint64_t)14562168920104952953U, (uint64_t)2170089393468287453U,
+ (uint64_t)10520900655016579352U, (uint64_t)7040362608949989273U, (uint64_t)8376510559381705307U,
+ (uint64_t)9142237200448131532U, (uint64_t)5696859948123854080U, (uint64_t)925422306716081180U,
+ (uint64_t)11155545953469186421U, (uint64_t)1888208646862572812U,
+ (uint64_t)11151095998248845721U, (uint64_t)15793503271680275267U,
+ (uint64_t)7729877044494854851U, (uint64_t)6235134673193032913U, (uint64_t)7364280682182401564U,
+ (uint64_t)5479679373325519985U, (uint64_t)17966037684582301763U,
+ (uint64_t)14140891609330279185U, (uint64_t)5814744449740463867U, (uint64_t)5652588426712591652U,
+ (uint64_t)774745682988690912U, (uint64_t)13228255573220500373U, (uint64_t)11949122068786859397U,
+ (uint64_t)8021166392900770376U, (uint64_t)7994323710948720063U, (uint64_t)9924618472877849977U,
+ (uint64_t)17618517523141194266U, (uint64_t)2750424097794401714U,
+ (uint64_t)15481749570715253207U, (uint64_t)14646964509921760497U,
+ (uint64_t)1037442848094301355U, (uint64_t)6295995947389299132U, (uint64_t)16915049722317579514U,
+ (uint64_t)10493877400992990313U, (uint64_t)18391008753060553521U, (uint64_t)483942209623707598U,
+ (uint64_t)2017775662838016613U, (uint64_t)5933251998459363553U, (uint64_t)11789135019970707407U,
+ (uint64_t)5484123723153268336U, (uint64_t)13246954648848484954U, (uint64_t)4774374393926023505U,
+ (uint64_t)14863995618704457336U, (uint64_t)13220153167104973625U,
+ (uint64_t)5988445485312390826U, (uint64_t)17580359464028944682U, (uint64_t)7297100131969874771U,
+ (uint64_t)379931507867989375U, (uint64_t)10927113096513421444U, (uint64_t)17688881974428340857U,
+ (uint64_t)4259872578781463333U, (uint64_t)8573076295966784472U, (uint64_t)16389829450727275032U,
+ (uint64_t)1667243868963568259U, (uint64_t)17730726848925960919U,
+ (uint64_t)11408899874569778008U, (uint64_t)3576527582023272268U,
+ (uint64_t)16492920640224231656U, (uint64_t)7906130545972460130U,
+ (uint64_t)13878604278207681266U, (uint64_t)41446695125652041U, (uint64_t)8891615271337333503U,
+ (uint64_t)2594537723613594470U, (uint64_t)7699579176995770924U, (uint64_t)147458463055730655U,
+ (uint64_t)12120406862739088406U, (uint64_t)12044892493010567063U,
+ (uint64_t)8554076749615475136U, (uint64_t)1005097692260929999U, (uint64_t)2687202654471188715U,
+ (uint64_t)9457588752176879209U, (uint64_t)17472884880062444019U, (uint64_t)9792097892056020166U,
+ (uint64_t)2525246678512797150U, (uint64_t)15958903035313115662U,
+ (uint64_t)11336038170342247032U, (uint64_t)11560342382835141123U,
+ (uint64_t)6212009033479929024U, (uint64_t)8214308203775021229U, (uint64_t)8475469210070503698U,
+ (uint64_t)13287024123485719563U, (uint64_t)12956951963817520723U,
+ (uint64_t)10693035819908470465U, (uint64_t)11375478788224786725U,
+ (uint64_t)16934625208487120398U, (uint64_t)10094585729115874495U,
+ (uint64_t)2763884524395905776U, (uint64_t)13535890148969964883U,
+ (uint64_t)13514657411765064358U, (uint64_t)9903074440788027562U,
+ (uint64_t)17324720726421199990U, (uint64_t)2273931039117368789U, (uint64_t)3442641041506157854U,
+ (uint64_t)1119853641236409612U, (uint64_t)12037070344296077989U, (uint64_t)581736433335671746U,
+ (uint64_t)6019150647054369174U, (uint64_t)14864096138068789375U, (uint64_t)6652995210998318662U,
+ (uint64_t)12773883697029175304U, (uint64_t)12751275631451845119U,
+ (uint64_t)11449095003038250478U, (uint64_t)1025805267334366480U, (uint64_t)2764432500300815015U,
+ (uint64_t)18274564429002844381U, (uint64_t)10445634195592600351U,
+ (uint64_t)11814099592837202735U, (uint64_t)5006796893679120289U, (uint64_t)6908397253997261914U,
+ (uint64_t)13266696965302879279U, (uint64_t)7768715053015037430U, (uint64_t)3569923738654785686U,
+ (uint64_t)5844853453464857549U, (uint64_t)1837340805629559110U, (uint64_t)1034657624388283114U,
+ (uint64_t)711244516069456460U, (uint64_t)12519286026957934814U, (uint64_t)2613464944620837619U,
+ (uint64_t)10003023321338286213U, (uint64_t)7291332092642881376U, (uint64_t)9832199564117004897U,
+ (uint64_t)3280736694860799890U, (uint64_t)6416452202849179874U, (uint64_t)7326961381798642069U,
+ (uint64_t)8435688798040635029U, (uint64_t)16630141263910982958U,
+ (uint64_t)17222635514422533318U, (uint64_t)9482787389178881499U, (uint64_t)836561194658263905U,
+ (uint64_t)3405319043337616649U, (uint64_t)2786146577568026518U, (uint64_t)7625483685691626321U,
+ (uint64_t)6728084875304656716U, (uint64_t)1140997959232544268U, (uint64_t)12847384827606303792U,
+ (uint64_t)1719121337754572070U, (uint64_t)12863589482936438532U, (uint64_t)3880712899640530862U,
+ (uint64_t)2748456882813671564U, (uint64_t)4775988900044623019U, (uint64_t)8937847374382191162U,
+ (uint64_t)3767367347172252295U, (uint64_t)13468672401049388646U,
+ (uint64_t)14359032216842397576U, (uint64_t)2002555958685443975U,
+ (uint64_t)16488678606651526810U, (uint64_t)11826135409597474760U,
+ (uint64_t)15296495673182508601U
+ };
+
+static const uint64_t
+ Hacl_P256_PrecompTable_precomp_basepoint_table_w5[384U] = {
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)0U, (uint64_t)8784043285714375740U,
+ (uint64_t)8483257759279461889U, (uint64_t)8789745728267363600U, (uint64_t)1770019616739251654U,
+ (uint64_t)15992936863339206154U, (uint64_t)10037038012062884956U,
+ (uint64_t)15197544864945402661U, (uint64_t)9615747158586711429U, (uint64_t)1U,
+ (uint64_t)18446744069414584320U, (uint64_t)18446744073709551615U, (uint64_t)4294967294U,
+ (uint64_t)10634854829044225757U, (uint64_t)351552716085025155U, (uint64_t)10645315080955407736U,
+ (uint64_t)3609262091244858135U, (uint64_t)15760741698986874125U,
+ (uint64_t)14936374388219697827U, (uint64_t)15751360096993017895U,
+ (uint64_t)18012233706239762398U, (uint64_t)1993877568177495041U,
+ (uint64_t)10345888787846536528U, (uint64_t)7746511691117935375U,
+ (uint64_t)14517043990409914413U, (uint64_t)14122549297570634151U,
+ (uint64_t)16934610359517083771U, (uint64_t)5724511325497097418U, (uint64_t)8983432969107448705U,
+ (uint64_t)2687429970334080245U, (uint64_t)16525396802810050288U, (uint64_t)7602596488871585854U,
+ (uint64_t)4813919589149203084U, (uint64_t)7680395813780804519U, (uint64_t)6687709583048023590U,
+ (uint64_t)18086445169104142027U, (uint64_t)9637814708330203929U,
+ (uint64_t)14785108459960679090U, (uint64_t)3838023279095023581U, (uint64_t)3555615526157830307U,
+ (uint64_t)5177066488380472871U, (uint64_t)18218186719108038403U,
+ (uint64_t)16281556341699656105U, (uint64_t)1524227924561461191U, (uint64_t)4148060517641909597U,
+ (uint64_t)2858290374115363433U, (uint64_t)8942772026334130620U, (uint64_t)3034451298319885113U,
+ (uint64_t)8447866036736640940U, (uint64_t)11204933433076256578U,
+ (uint64_t)18333595740249588297U, (uint64_t)8259597024804538246U, (uint64_t)9539734295777539786U,
+ (uint64_t)9797290423046626413U, (uint64_t)5777303437849646537U, (uint64_t)8739356909899132020U,
+ (uint64_t)14815960973766782158U, (uint64_t)15286581798204509801U,
+ (uint64_t)17597362577777019682U, (uint64_t)13259283710820519742U,
+ (uint64_t)10501322996899164670U, (uint64_t)1221138904338319642U,
+ (uint64_t)14586685489551951885U, (uint64_t)895326705426031212U, (uint64_t)14398171728560617847U,
+ (uint64_t)9592550823745097391U, (uint64_t)17240998489162206026U, (uint64_t)8085479283308189196U,
+ (uint64_t)14844657737893882826U, (uint64_t)15923425394150618234U,
+ (uint64_t)2997808084773249525U, (uint64_t)494323555453660587U, (uint64_t)1215695327517794764U,
+ (uint64_t)9476207381098391690U, (uint64_t)7480789678419122995U, (uint64_t)15212230329321082489U,
+ (uint64_t)436189395349576388U, (uint64_t)17377474396456660834U, (uint64_t)15237013929655017939U,
+ (uint64_t)11444428846883781676U, (uint64_t)5112749694521428575U, (uint64_t)950829367509872073U,
+ (uint64_t)17665036182057559519U, (uint64_t)17205133339690002313U,
+ (uint64_t)16233765170251334549U, (uint64_t)10122775683257972591U,
+ (uint64_t)3352514236455632420U, (uint64_t)9143148522359954691U, (uint64_t)601191684005658860U,
+ (uint64_t)13398772186646349998U, (uint64_t)15512696600132928431U,
+ (uint64_t)9128416073728948653U, (uint64_t)11233051033546138578U, (uint64_t)6769345682610122833U,
+ (uint64_t)10823233224575054288U, (uint64_t)9997725227559980175U, (uint64_t)6733425642852897415U,
+ (uint64_t)16302206918151466066U, (uint64_t)1669330822143265921U, (uint64_t)2661645605036546002U,
+ (uint64_t)17182558479745802165U, (uint64_t)1165082692376932040U, (uint64_t)9470595929011488359U,
+ (uint64_t)6142147329285324932U, (uint64_t)4829075085998111287U, (uint64_t)10231370681107338930U,
+ (uint64_t)9591876895322495239U, (uint64_t)10316468561384076618U,
+ (uint64_t)11592503647238064235U, (uint64_t)13395813606055179632U, (uint64_t)511127033980815508U,
+ (uint64_t)12434976573147649880U, (uint64_t)3425094795384359127U, (uint64_t)6816971736303023445U,
+ (uint64_t)15444670609021139344U, (uint64_t)9464349818322082360U,
+ (uint64_t)16178216413042376883U, (uint64_t)9595540370774317348U, (uint64_t)7229365182662875710U,
+ (uint64_t)4601177649460012843U, (uint64_t)5455046447382487090U, (uint64_t)10854066421606187521U,
+ (uint64_t)15913416821879788071U, (uint64_t)2297365362023460173U, (uint64_t)2603252216454941350U,
+ (uint64_t)6768791943870490934U, (uint64_t)15705936687122754810U, (uint64_t)9537096567546600694U,
+ (uint64_t)17580538144855035062U, (uint64_t)4496542856965746638U, (uint64_t)8444341625922124942U,
+ (uint64_t)12191263903636183168U, (uint64_t)17427332907535974165U,
+ (uint64_t)14307569739254103736U, (uint64_t)13900598742063266169U,
+ (uint64_t)7176996424355977650U, (uint64_t)5709008170379717479U, (uint64_t)14471312052264549092U,
+ (uint64_t)1464519909491759867U, (uint64_t)3328154641049602121U, (uint64_t)13020349337171136774U,
+ (uint64_t)2772166279972051938U, (uint64_t)10854476939425975292U, (uint64_t)1967189930534630940U,
+ (uint64_t)2802919076529341959U, (uint64_t)14792226094833519208U,
+ (uint64_t)14675640928566522177U, (uint64_t)14838974364643800837U,
+ (uint64_t)17631460696099549980U, (uint64_t)17434186275364935469U,
+ (uint64_t)2665648200587705473U, (uint64_t)13202122464492564051U, (uint64_t)7576287350918073341U,
+ (uint64_t)2272206013910186424U, (uint64_t)14558761641743937843U, (uint64_t)5675729149929979729U,
+ (uint64_t)9043135187561613166U, (uint64_t)11750149293830589225U, (uint64_t)740555197954307911U,
+ (uint64_t)9871738005087190699U, (uint64_t)17178667634283502053U,
+ (uint64_t)18046255991533013265U, (uint64_t)4458222096988430430U, (uint64_t)8452427758526311627U,
+ (uint64_t)13825286929656615266U, (uint64_t)13956286357198391218U,
+ (uint64_t)15875692916799995079U, (uint64_t)10634895319157013920U,
+ (uint64_t)13230116118036304207U, (uint64_t)8795317393614625606U, (uint64_t)7001710806858862020U,
+ (uint64_t)7949746088586183478U, (uint64_t)14677556044923602317U,
+ (uint64_t)11184023437485843904U, (uint64_t)11215864722023085094U,
+ (uint64_t)6444464081471519014U, (uint64_t)1706241174022415217U, (uint64_t)8243975633057550613U,
+ (uint64_t)15502902453836085864U, (uint64_t)3799182188594003953U, (uint64_t)3538840175098724094U,
+ (uint64_t)13240193491554624643U, (uint64_t)12365034249541329920U,
+ (uint64_t)2924326828590977357U, (uint64_t)5687195797140589099U, (uint64_t)16880427227292834531U,
+ (uint64_t)9691471435758991112U, (uint64_t)16642385273732487288U,
+ (uint64_t)12173806747523009914U, (uint64_t)13142722756877876849U,
+ (uint64_t)8370377548305121979U, (uint64_t)17988526053752025426U, (uint64_t)4818750752684100334U,
+ (uint64_t)5669241919350361655U, (uint64_t)4964810303238518540U, (uint64_t)16709712747671533191U,
+ (uint64_t)4461414404267448242U, (uint64_t)3971798785139504238U, (uint64_t)6276818948740422136U,
+ (uint64_t)1426735892164275762U, (uint64_t)7943622674892418919U, (uint64_t)9864274225563929680U,
+ (uint64_t)57815533745003233U, (uint64_t)10893588105168960233U, (uint64_t)15739162732907069535U,
+ (uint64_t)3923866849462073470U, (uint64_t)12279826158399226875U, (uint64_t)1533015761334846582U,
+ (uint64_t)15860156818568437510U, (uint64_t)8252625373831297988U, (uint64_t)9666953804812706358U,
+ (uint64_t)8767785238646914634U, (uint64_t)14382179044941403551U,
+ (uint64_t)10401039907264254245U, (uint64_t)8584860003763157350U, (uint64_t)3120462679504470266U,
+ (uint64_t)8670255778748340069U, (uint64_t)5313789577940369984U, (uint64_t)16977072364454789224U,
+ (uint64_t)12199578693972188324U, (uint64_t)18211098771672599237U,
+ (uint64_t)12868831556008795030U, (uint64_t)5310155061431048194U,
+ (uint64_t)18114153238435112606U, (uint64_t)14482365809278304512U,
+ (uint64_t)12520721662723001511U, (uint64_t)405943624021143002U, (uint64_t)8146944101507657423U,
+ (uint64_t)181739317780393495U, (uint64_t)81743892273670099U, (uint64_t)14759561962550473930U,
+ (uint64_t)4592623849546992939U, (uint64_t)6916440441743449719U, (uint64_t)1304610503530809833U,
+ (uint64_t)5464930909232486441U, (uint64_t)15414883617496224671U, (uint64_t)8129283345256790U,
+ (uint64_t)18294252198413739489U, (uint64_t)17394115281884857288U,
+ (uint64_t)7808348415224731235U, (uint64_t)13195566655747230608U, (uint64_t)8568194219353949094U,
+ (uint64_t)15329813048672122440U, (uint64_t)9604275495885785744U, (uint64_t)1577712551205219835U,
+ (uint64_t)15964209008022052790U, (uint64_t)15087297920782098160U,
+ (uint64_t)3946031512438511898U, (uint64_t)10050061168984440631U,
+ (uint64_t)11382452014533138316U, (uint64_t)6313670788911952792U,
+ (uint64_t)12015989229696164014U, (uint64_t)5946702628076168852U, (uint64_t)5219995658774362841U,
+ (uint64_t)12230141881068377972U, (uint64_t)12361195202673441956U,
+ (uint64_t)4732862275653856711U, (uint64_t)17221430380805252370U,
+ (uint64_t)15397525953897375810U, (uint64_t)16557437297239563045U,
+ (uint64_t)10101683801868971351U, (uint64_t)1402611372245592868U, (uint64_t)1931806383735563658U,
+ (uint64_t)10991705207471512479U, (uint64_t)861333583207471392U, (uint64_t)15207766844626322355U,
+ (uint64_t)9224628129811432393U, (uint64_t)3497069567089055613U, (uint64_t)11956632757898590316U,
+ (uint64_t)8733729372586312960U, (uint64_t)18091521051714930927U, (uint64_t)77582787724373283U,
+ (uint64_t)9922437373519669237U, (uint64_t)3079321456325704615U, (uint64_t)12171198408512478457U,
+ (uint64_t)17179130884012147596U, (uint64_t)6839115479620367181U, (uint64_t)4421032569964105406U,
+ (uint64_t)10353331468657256053U, (uint64_t)17400988720335968824U,
+ (uint64_t)17138855889417480540U, (uint64_t)4507980080381370611U,
+ (uint64_t)10703175719793781886U, (uint64_t)12598516658725890426U,
+ (uint64_t)8353463412173898932U, (uint64_t)17703029389228422404U, (uint64_t)9313111267107226233U,
+ (uint64_t)5441322942995154196U, (uint64_t)8952817660034465484U, (uint64_t)17571113341183703118U,
+ (uint64_t)7375087953801067019U, (uint64_t)13381466302076453648U, (uint64_t)3218165271423914596U,
+ (uint64_t)16956372157249382685U, (uint64_t)509080090049418841U, (uint64_t)13374233893294084913U,
+ (uint64_t)2988537624204297086U, (uint64_t)4979195832939384620U, (uint64_t)3803931594068976394U,
+ (uint64_t)10731535883829627646U, (uint64_t)12954845047607194278U,
+ (uint64_t)10494298062560667399U, (uint64_t)4967351022190213065U,
+ (uint64_t)13391917938145756456U, (uint64_t)951370484866918160U, (uint64_t)13531334179067685307U,
+ (uint64_t)12868421357919390599U, (uint64_t)15918857042998130258U,
+ (uint64_t)17769743831936974016U, (uint64_t)7137921979260368809U,
+ (uint64_t)12461369180685892062U, (uint64_t)827476514081935199U, (uint64_t)15107282134224767230U,
+ (uint64_t)10084765752802805748U, (uint64_t)3303739059392464407U,
+ (uint64_t)17859532612136591428U, (uint64_t)10949414770405040164U,
+ (uint64_t)12838613589371008785U, (uint64_t)5554397169231540728U,
+ (uint64_t)18375114572169624408U, (uint64_t)15649286703242390139U,
+ (uint64_t)2957281557463706877U, (uint64_t)14000350446219393213U,
+ (uint64_t)14355199721749620351U, (uint64_t)2730856240099299695U,
+ (uint64_t)17528131000714705752U, (uint64_t)2537498525883536360U, (uint64_t)6121058967084509393U,
+ (uint64_t)16897667060435514221U, (uint64_t)12367869599571112440U,
+ (uint64_t)3388831797050807508U, (uint64_t)16791449724090982798U, (uint64_t)2673426123453294928U,
+ (uint64_t)11369313542384405846U, (uint64_t)15641960333586432634U,
+ (uint64_t)15080962589658958379U, (uint64_t)7747943772340226569U, (uint64_t)8075023376199159152U,
+ (uint64_t)8485093027378306528U, (uint64_t)13503706844122243648U, (uint64_t)8401961362938086226U,
+ (uint64_t)8125426002124226402U, (uint64_t)9005399361407785203U, (uint64_t)6847968030066906634U,
+ (uint64_t)11934937736309295197U, (uint64_t)5116750888594772351U, (uint64_t)2817039227179245227U,
+ (uint64_t)17724206901239332980U, (uint64_t)4985702708254058578U, (uint64_t)5786345435756642871U,
+ (uint64_t)17772527414940936938U, (uint64_t)1201320251272957006U,
+ (uint64_t)15787430120324348129U, (uint64_t)6305488781359965661U,
+ (uint64_t)12423900845502858433U, (uint64_t)17485949424202277720U,
+ (uint64_t)2062237315546855852U, (uint64_t)10353639467860902375U, (uint64_t)2315398490451287299U,
+ (uint64_t)15394572894814882621U, (uint64_t)232866113801165640U, (uint64_t)7413443736109338926U,
+ (uint64_t)902719806551551191U, (uint64_t)16568853118619045174U, (uint64_t)14202214862428279177U,
+ (uint64_t)11719595395278861192U, (uint64_t)5890053236389907647U, (uint64_t)9996196494965833627U,
+ (uint64_t)12967056942364782577U, (uint64_t)9034128755157395787U,
+ (uint64_t)17898204904710512655U, (uint64_t)8229373445062993977U,
+ (uint64_t)13580036169519833644U
+ };
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_P256_PrecompTable_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h
new file mode 100644
index 0000000000..ccd18281e4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h
@@ -0,0 +1,53 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Poly1305_128_H
+#define __internal_Hacl_Poly1305_128_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Poly1305_128.h"
+#include "libintvector.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b);
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(
+ Lib_IntVector_Intrinsics_vec128 *out,
+ Lib_IntVector_Intrinsics_vec128 *p);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Poly1305_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h
new file mode 100644
index 0000000000..b26f9dd67d
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h
@@ -0,0 +1,53 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Poly1305_256_H
+#define __internal_Hacl_Poly1305_256_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Poly1305_256.h"
+#include "libintvector.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b);
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(
+ Lib_IntVector_Intrinsics_vec256 *out,
+ Lib_IntVector_Intrinsics_vec256 *p);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Poly1305_256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Spec.h b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h
new file mode 100644
index 0000000000..f717563d04
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h
@@ -0,0 +1,55 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Hacl_Spec_H
+#define __internal_Hacl_Spec_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+#include "../Hacl_Spec.h"
+
+#define Spec_Cipher_Expansion_Hacl_CHACHA20 0
+#define Spec_Cipher_Expansion_Vale_AES128 1
+#define Spec_Cipher_Expansion_Vale_AES256 2
+
+typedef uint8_t Spec_Cipher_Expansion_impl;
+
+#define Spec_Frodo_Params_SHAKE128 0
+#define Spec_Frodo_Params_AES128 1
+
+typedef uint8_t Spec_Frodo_Params_frodo_gen_a;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Hacl_Spec_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/internal/Vale.h b/security/nss/lib/freebl/verified/internal/Vale.h
new file mode 100644
index 0000000000..17af6593c9
--- /dev/null
+++ b/security/nss/lib/freebl/verified/internal/Vale.h
@@ -0,0 +1,185 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2022 INRIA, CMU and Microsoft Corporation
+ * Copyright (c) 2022-2023 HACL* Contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __internal_Vale_H
+#define __internal_Vale_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <string.h>
+#include "krml/internal/types.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/target.h"
+
+extern uint64_t add_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2);
+
+extern uint64_t fadd_e(uint64_t *x0, uint64_t *x1, uint64_t *x2);
+
+extern uint64_t sha256_update(uint32_t *x0, uint8_t *x1, uint64_t x2, uint32_t *x3);
+
+extern uint64_t x64_poly1305(uint8_t *x0, uint8_t *x1, uint64_t x2, uint64_t x3);
+
+extern uint64_t check_aesni(void);
+
+extern uint64_t check_sha(void);
+
+extern uint64_t check_adx_bmi2(void);
+
+extern uint64_t check_avx(void);
+
+extern uint64_t check_avx2(void);
+
+extern uint64_t check_movbe(void);
+
+extern uint64_t check_sse(void);
+
+extern uint64_t check_rdrand(void);
+
+extern uint64_t check_avx512(void);
+
+extern uint64_t check_osxsave(void);
+
+extern uint64_t check_avx_xcr0(void);
+
+extern uint64_t check_avx512_xcr0(void);
+
+extern uint64_t
+gcm128_decrypt_opt(
+ uint8_t *x0,
+ uint64_t x1,
+ uint64_t x2,
+ uint8_t *x3,
+ uint8_t *x4,
+ uint8_t *x5,
+ uint8_t *x6,
+ uint8_t *x7,
+ uint8_t *x8,
+ uint64_t x9,
+ uint8_t *x10,
+ uint8_t *x11,
+ uint64_t x12,
+ uint8_t *x13,
+ uint64_t x14,
+ uint8_t *x15,
+ uint8_t *x16);
+
+extern uint64_t
+gcm256_decrypt_opt(
+ uint8_t *x0,
+ uint64_t x1,
+ uint64_t x2,
+ uint8_t *x3,
+ uint8_t *x4,
+ uint8_t *x5,
+ uint8_t *x6,
+ uint8_t *x7,
+ uint8_t *x8,
+ uint64_t x9,
+ uint8_t *x10,
+ uint8_t *x11,
+ uint64_t x12,
+ uint8_t *x13,
+ uint64_t x14,
+ uint8_t *x15,
+ uint8_t *x16);
+
+extern uint64_t aes128_key_expansion(uint8_t *x0, uint8_t *x1);
+
+extern uint64_t aes256_key_expansion(uint8_t *x0, uint8_t *x1);
+
+extern uint64_t
+compute_iv_stdcall(
+ uint8_t *x0,
+ uint64_t x1,
+ uint64_t x2,
+ uint8_t *x3,
+ uint8_t *x4,
+ uint8_t *x5);
+
+extern uint64_t
+gcm128_encrypt_opt(
+ uint8_t *x0,
+ uint64_t x1,
+ uint64_t x2,
+ uint8_t *x3,
+ uint8_t *x4,
+ uint8_t *x5,
+ uint8_t *x6,
+ uint8_t *x7,
+ uint8_t *x8,
+ uint64_t x9,
+ uint8_t *x10,
+ uint8_t *x11,
+ uint64_t x12,
+ uint8_t *x13,
+ uint64_t x14,
+ uint8_t *x15,
+ uint8_t *x16);
+
+extern uint64_t
+gcm256_encrypt_opt(
+ uint8_t *x0,
+ uint64_t x1,
+ uint64_t x2,
+ uint8_t *x3,
+ uint8_t *x4,
+ uint8_t *x5,
+ uint8_t *x6,
+ uint8_t *x7,
+ uint8_t *x8,
+ uint64_t x9,
+ uint8_t *x10,
+ uint8_t *x11,
+ uint64_t x12,
+ uint8_t *x13,
+ uint64_t x14,
+ uint8_t *x15,
+ uint8_t *x16);
+
+extern uint64_t aes128_keyhash_init(uint8_t *x0, uint8_t *x1);
+
+extern uint64_t aes256_keyhash_init(uint8_t *x0, uint8_t *x1);
+
+extern uint64_t cswap2_e(uint64_t x0, uint64_t *x1, uint64_t *x2);
+
+extern uint64_t fsqr_e(uint64_t *x0, uint64_t *x1, uint64_t *x2);
+
+extern uint64_t fsqr2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2);
+
+extern uint64_t fmul_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3);
+
+extern uint64_t fmul2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3);
+
+extern uint64_t fmul_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2);
+
+extern uint64_t fsub_e(uint64_t *x0, uint64_t *x1, uint64_t *x2);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#define __internal_Vale_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h
new file mode 100644
index 0000000000..21d7e1b4f9
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h
@@ -0,0 +1,13 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KRML_ENDIAN_H
+#define __KRML_ENDIAN_H
+
+#ifdef __GNUC__
+#warning "c_endianness.h is deprecated, include lowstar_endianness.h instead"
+#endif
+
+#include "lowstar_endianness.h"
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h
new file mode 100644
index 0000000000..c7a5afb50a
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h
@@ -0,0 +1,89 @@
+#ifndef __FSTAR_INT_H
+#define __FSTAR_INT_H
+
+#include "internal/types.h"
+
+/*
+ * Arithmetic Shift Right operator
+ *
+ * In all C standards, a >> b is implementation-defined when a has a signed
+ * type and a negative value. See e.g. 6.5.7 in
+ * http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf
+ *
+ * GCC, MSVC, and Clang implement a >> b as an arithmetic shift.
+ *
+ * GCC: https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Integers-implementation.html#Integers-implementation
+ * MSVC: https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=vs-2019#right-shifts
+ * Clang: tested that Clang 7, 8 and 9 compile this to an arithmetic shift
+ *
+ * We implement arithmetic shift right simply as >> in these compilers
+ * and bail out in others.
+ */
+
+#if !(defined(_MSC_VER) || defined(__GNUC__) || (defined(__clang__) && (__clang_major__ >= 7)))
+
+static inline int8_t
+FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b)
+{
+ do {
+ KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n");
+ KRML_HOST_EXIT(255);
+ } while (0);
+}
+
+static inline int16_t
+FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b)
+{
+ do {
+ KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n");
+ KRML_HOST_EXIT(255);
+ } while (0);
+}
+
+static inline int32_t
+FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b)
+{
+ do {
+ KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n");
+ KRML_HOST_EXIT(255);
+ } while (0);
+}
+
+static inline int64_t
+FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b)
+{
+ do {
+ KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n");
+ KRML_HOST_EXIT(255);
+ } while (0);
+}
+
+#else
+
+static inline int8_t
+FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b)
+{
+ return (a >> b);
+}
+
+static inline int16_t
+FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b)
+{
+ return (a >> b);
+}
+
+static inline int32_t
+FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b)
+{
+ return (a >> b);
+}
+
+static inline int64_t
+FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b)
+{
+ return (a >> b);
+}
+
+#endif /* !(defined(_MSC_VER) ... ) */
+
+#endif /* __FSTAR_INT_H */
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h
new file mode 100644
index 0000000000..f55e5f824e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h
@@ -0,0 +1,16 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KRML_BUILTIN_H
+#define __KRML_BUILTIN_H
+
+/* For alloca, when using KaRaMeL's -falloca */
+#if (defined(_WIN32) || defined(_WIN64))
+#include <malloc.h>
+#endif
+
+/* If some globals need to be initialized before the main, then karamel will
+ * generate and try to link last a function with this type: */
+void krmlinit_globals(void);
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h
new file mode 100644
index 0000000000..aeca0ba715
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h
@@ -0,0 +1,27 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KRML_CALLCONV_H
+#define __KRML_CALLCONV_H
+
+/******************************************************************************/
+/* Some macros to ease compatibility (TODO: move to miTLS) */
+/******************************************************************************/
+
+/* We want to generate __cdecl safely without worrying about it being undefined.
+ * When using MSVC, these are always defined. When using MinGW, these are
+ * defined too. They have no meaning for other platforms, so we define them to
+ * be empty macros in other situations. */
+#ifndef _MSC_VER
+#ifndef __cdecl
+#define __cdecl
+#endif
+#ifndef __stdcall
+#define __stdcall
+#endif
+#ifndef __fastcall
+#define __fastcall
+#endif
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h
new file mode 100644
index 0000000000..964d1c52aa
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h
@@ -0,0 +1,32 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_COMPAT_H
+#define KRML_COMPAT_H
+
+#include <inttypes.h>
+
+/* A series of macros that define C implementations of types that are not Low*,
+ * to facilitate porting programs to Low*. */
+
+typedef struct {
+ uint32_t length;
+ const char *data;
+} FStar_Bytes_bytes;
+
+typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
+ krml_checked_int_t;
+
+#define RETURN_OR(x) \
+ do { \
+ int64_t __ret = x; \
+ if (__ret < INT32_MIN || INT32_MAX < __ret) { \
+ KRML_HOST_PRINTF( \
+ "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__, \
+ __LINE__); \
+ KRML_HOST_EXIT(252); \
+ } \
+ return (int32_t)__ret; \
+ } while (0)
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h
new file mode 100644
index 0000000000..f70006bd3f
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h
@@ -0,0 +1,57 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KRML_DEBUG_H
+#define __KRML_DEBUG_H
+
+#include <inttypes.h>
+
+#include "krml/internal/target.h"
+
+/******************************************************************************/
+/* Debugging helpers - intended only for KaRaMeL developers */
+/******************************************************************************/
+
+/* In support of "-wasm -d force-c": we might need this function to be
+ * forward-declared, because the dependency on WasmSupport appears very late,
+ * after SimplifyWasm, and sadly, after the topological order has been done. */
+void WasmSupport_check_buffer_size(uint32_t s);
+
+/* A series of GCC atrocities to trace function calls (karamel's [-d c-calls]
+ * option). Useful when trying to debug, say, Wasm, to compare traces. */
+/* clang-format off */
+#ifdef __GNUC__
+#define KRML_FORMAT(X) _Generic((X), \
+ uint8_t : "0x%08" PRIx8, \
+ uint16_t: "0x%08" PRIx16, \
+ uint32_t: "0x%08" PRIx32, \
+ uint64_t: "0x%08" PRIx64, \
+ int8_t : "0x%08" PRIx8, \
+ int16_t : "0x%08" PRIx16, \
+ int32_t : "0x%08" PRIx32, \
+ int64_t : "0x%08" PRIx64, \
+ default : "%s")
+
+#define KRML_FORMAT_ARG(X) _Generic((X), \
+ uint8_t : X, \
+ uint16_t: X, \
+ uint32_t: X, \
+ uint64_t: X, \
+ int8_t : X, \
+ int16_t : X, \
+ int32_t : X, \
+ int64_t : X, \
+ default : "unknown")
+/* clang-format on */
+
+#define KRML_DEBUG_RETURN(X) \
+ ({ \
+ __auto_type _ret = (X); \
+ KRML_HOST_PRINTF("returning: "); \
+ KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \
+ KRML_HOST_PRINTF(" \n"); \
+ _ret; \
+ })
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h
new file mode 100644
index 0000000000..b63967f480
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h
@@ -0,0 +1,375 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KRML_TARGET_H
+#define __KRML_TARGET_H
+
+#include <assert.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Since KaRaMeL emits the inline keyword unconditionally, we follow the
+ * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this
+ * __inline__ to ensure the code compiles with -std=c90 and earlier. */
+#ifdef __GNUC__
+#define inline __inline__
+#endif
+
+/******************************************************************************/
+/* Macros that KaRaMeL will generate. */
+/******************************************************************************/
+
+/* For "bare" targets that do not have a C stdlib, the user might want to use
+ * [-add-early-include '"mydefinitions.h"'] and override these. */
+#ifndef KRML_HOST_PRINTF
+#define KRML_HOST_PRINTF printf
+#endif
+
+#if ( \
+ (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+ (!(defined KRML_HOST_EPRINTF)))
+#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__)
+#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER)
+#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
+#ifndef KRML_HOST_EXIT
+#define KRML_HOST_EXIT exit
+#endif
+
+#ifndef KRML_HOST_MALLOC
+#define KRML_HOST_MALLOC malloc
+#endif
+
+#ifndef KRML_HOST_CALLOC
+#define KRML_HOST_CALLOC calloc
+#endif
+
+#ifndef KRML_HOST_FREE
+#define KRML_HOST_FREE free
+#endif
+
+#ifndef KRML_HOST_IGNORE
+#define KRML_HOST_IGNORE(x) (void)(x)
+#endif
+
+#ifndef KRML_NOINLINE
+#if defined(_MSC_VER)
+#define KRML_NOINLINE __declspec(noinline)
+#elif defined(__GNUC__)
+#define KRML_NOINLINE __attribute__((noinline, unused))
+#else
+#define KRML_NOINLINE
+#warning "The KRML_NOINLINE macro is not defined for this toolchain!"
+#warning "The compiler may defeat side-channel resistance with optimizations."
+#warning "Please locate target.h and try to fill it out with a suitable definition for this compiler."
+#endif
+#endif
+
+#ifndef KRML_PRE_ALIGN
+#ifdef _MSC_VER
+#define KRML_PRE_ALIGN(X) __declspec(align(X))
+#else
+#define KRML_PRE_ALIGN(X)
+#endif
+#endif
+
+#ifndef KRML_POST_ALIGN
+#ifdef _MSC_VER
+#define KRML_POST_ALIGN(X)
+#else
+#define KRML_POST_ALIGN(X) __attribute__((aligned(X)))
+#endif
+#endif
+
+/* MinGW-W64 does not support C11 aligned_alloc, but it supports
+ * MSVC's _aligned_malloc.
+ */
+#ifndef KRML_ALIGNED_MALLOC
+#ifdef __MINGW32__
+#include <_mingw.h>
+#endif
+#if ( \
+ defined(_MSC_VER) || \
+ (defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)))
+#define KRML_ALIGNED_MALLOC(X, Y) _aligned_malloc(Y, X)
+#else
+#define KRML_ALIGNED_MALLOC(X, Y) aligned_alloc(X, Y)
+#endif
+#endif
+
+/* Since aligned allocations with MinGW-W64 are done with
+ * _aligned_malloc (see above), such pointers must be freed with
+ * _aligned_free.
+ */
+#ifndef KRML_ALIGNED_FREE
+#ifdef __MINGW32__
+#include <_mingw.h>
+#endif
+#if ( \
+ defined(_MSC_VER) || \
+ (defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)))
+#define KRML_ALIGNED_FREE(X) _aligned_free(X)
+#else
+#define KRML_ALIGNED_FREE(X) free(X)
+#endif
+#endif
+
+#ifndef KRML_HOST_TIME
+
+#include <time.h>
+
+/* Prims_nat not yet in scope */
+inline static int32_t
+krml_time(void)
+{
+ return (int32_t)time(NULL);
+}
+
+#define KRML_HOST_TIME krml_time
+#endif
+
+/* In statement position, exiting is easy. */
+#define KRML_EXIT \
+ do { \
+ KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
+ KRML_HOST_EXIT(254); \
+ } while (0)
+
+/* In expression position, use the comma-operator and a malloc to return an
+ * expression of the right size. KaRaMeL passes t as the parameter to the macro.
+ */
+#define KRML_EABORT(t, msg) \
+ (KRML_HOST_PRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \
+ KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t))))
+
+/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of
+ * *elements*. Do an ugly, run-time check (some of which KaRaMeL can eliminate).
+ */
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
+#define _KRML_CHECK_SIZE_PRAGMA \
+ _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+#else
+#define _KRML_CHECK_SIZE_PRAGMA
+#endif
+
+#define KRML_CHECK_SIZE(size_elt, sz) \
+ do { \
+ _KRML_CHECK_SIZE_PRAGMA \
+ if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \
+ KRML_HOST_PRINTF( \
+ "Maximum allocatable size exceeded, aborting before overflow at " \
+ "%s:%d\n", \
+ __FILE__, __LINE__); \
+ KRML_HOST_EXIT(253); \
+ } \
+ } while (0)
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) \
+ _snprintf_s(buf, sz, _TRUNCATE, fmt, arg)
+#else
+#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg)
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
+#define KRML_DEPRECATED(x) __attribute__((deprecated(x)))
+#elif defined(__GNUC__)
+/* deprecated attribute is not defined in GCC < 4.5. */
+#define KRML_DEPRECATED(x)
+#elif defined(_MSC_VER)
+#define KRML_DEPRECATED(x) __declspec(deprecated(x))
+#endif
+
+/* Macros for prettier unrolling of loops */
+#define KRML_LOOP1(i, n, x) \
+ { \
+ x \
+ i += n; \
+ (void)i; \
+ }
+
+#define KRML_LOOP2(i, n, x) \
+ KRML_LOOP1(i, n, x) \
+ KRML_LOOP1(i, n, x)
+
+#define KRML_LOOP3(i, n, x) \
+ KRML_LOOP2(i, n, x) \
+ KRML_LOOP1(i, n, x)
+
+#define KRML_LOOP4(i, n, x) \
+ KRML_LOOP2(i, n, x) \
+ KRML_LOOP2(i, n, x)
+
+#define KRML_LOOP5(i, n, x) \
+ KRML_LOOP4(i, n, x) \
+ KRML_LOOP1(i, n, x)
+
+#define KRML_LOOP6(i, n, x) \
+ KRML_LOOP4(i, n, x) \
+ KRML_LOOP2(i, n, x)
+
+#define KRML_LOOP7(i, n, x) \
+ KRML_LOOP4(i, n, x) \
+ KRML_LOOP3(i, n, x)
+
+#define KRML_LOOP8(i, n, x) \
+ KRML_LOOP4(i, n, x) \
+ KRML_LOOP4(i, n, x)
+
+#define KRML_LOOP9(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP1(i, n, x)
+
+#define KRML_LOOP10(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP2(i, n, x)
+
+#define KRML_LOOP11(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP3(i, n, x)
+
+#define KRML_LOOP12(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP4(i, n, x)
+
+#define KRML_LOOP13(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP5(i, n, x)
+
+#define KRML_LOOP14(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP6(i, n, x)
+
+#define KRML_LOOP15(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP7(i, n, x)
+
+#define KRML_LOOP16(i, n, x) \
+ KRML_LOOP8(i, n, x) \
+ KRML_LOOP8(i, n, x)
+
+#define KRML_UNROLL_FOR(i, z, n, k, x) \
+ do { \
+ uint32_t i = z; \
+ KRML_LOOP##n(i, k, x) \
+ } while (0)
+
+#define KRML_ACTUAL_FOR(i, z, n, k, x) \
+ do { \
+ for (uint32_t i = z; i < n; i += k) { \
+ x \
+ } \
+ } while (0)
+
+#ifndef KRML_UNROLL_MAX
+#define KRML_UNROLL_MAX 16
+#endif
+
+/* 1 is the number of loop iterations, i.e. (n - z)/k as evaluated by krml */
+#if 0 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR0(i, z, n, k, x)
+#else
+#define KRML_MAYBE_FOR0(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 1 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 1, k, x)
+#else
+#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 2 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 2, k, x)
+#else
+#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 3 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 3, k, x)
+#else
+#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 4 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 4, k, x)
+#else
+#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 5 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 5, k, x)
+#else
+#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 6 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 6, k, x)
+#else
+#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 7 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 7, k, x)
+#else
+#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 8 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 8, k, x)
+#else
+#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 9 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 9, k, x)
+#else
+#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 10 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 10, k, x)
+#else
+#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 11 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 11, k, x)
+#else
+#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 12 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 12, k, x)
+#else
+#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 13 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 13, k, x)
+#else
+#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 14 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 14, k, x)
+#else
+#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 15 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 15, k, x)
+#else
+#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+
+#if 16 <= KRML_UNROLL_MAX
+#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 16, k, x)
+#else
+#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x)
+#endif
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h
new file mode 100644
index 0000000000..2cf1887adf
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h
@@ -0,0 +1,105 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_TYPES_H
+#define KRML_TYPES_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* Types which are either abstract, meaning that have to be implemented in C, or
+ * which are models, meaning that they are swapped out at compile-time for
+ * hand-written C types (in which case they're marked as noextract). */
+
+typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_;
+typedef int64_t FStar_Int64_t, FStar_Int64_t_;
+typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_;
+typedef int32_t FStar_Int32_t, FStar_Int32_t_;
+typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_;
+typedef int16_t FStar_Int16_t, FStar_Int16_t_;
+typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_;
+typedef int8_t FStar_Int8_t, FStar_Int8_t_;
+
+/* Only useful when building krmllib, because it's in the dependency graph of
+ * FStar.Int.Cast. */
+typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_;
+typedef int64_t FStar_Int63_t, FStar_Int63_t_;
+
+typedef double FStar_Float_float;
+typedef uint32_t FStar_Char_char;
+typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write;
+
+typedef void *FStar_Dyn_dyn;
+
+typedef const char *C_String_t, *C_String_t_, *C_Compat_String_t, *C_Compat_String_t_;
+
+typedef int exit_code;
+typedef FILE *channel;
+
+typedef unsigned long long TestLib_cycles;
+
+typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan;
+
+/* Now Prims.string is no longer illegal with the new model in LowStar.Printf;
+ * it's operations that produce Prims_string which are illegal. Bring the
+ * definition into scope by default. */
+typedef const char *Prims_string;
+
+#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__))
+#define IS_MSVC64 1
+#endif
+
+/* This code makes a number of assumptions and should be refined. In particular,
+ * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would
+ * be easier to just test for defined(__SIZEOF_INT128__) only? */
+#if (defined(__x86_64__) || \
+ defined(__x86_64) || \
+ defined(__aarch64__) || \
+ (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \
+ defined(__s390x__) || \
+ (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \
+ (defined(__mips__) && defined(__LP64__)) || \
+ (defined(__riscv) && __riscv_xlen == 64) || \
+ defined(__SIZEOF_INT128__))
+#define HAS_INT128 1
+#endif
+
+/* The uint128 type is a special case since we offer several implementations of
+ * it, depending on the compiler and whether the user wants the verified
+ * implementation or not. */
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#include <emmintrin.h>
+typedef __m128i FStar_UInt128_uint128;
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+typedef unsigned __int128 FStar_UInt128_uint128;
+#else
+typedef struct FStar_UInt128_uint128_s {
+ uint64_t low;
+ uint64_t high;
+} FStar_UInt128_uint128;
+#endif
+
+/* The former is defined once, here (otherwise, conflicts for test-c89. The
+ * latter is for internal use. */
+typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t;
+
+#include "krml/lowstar_endianness.h"
+
+#endif
+
+/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64,
+ * then don't bring the uint128 definitions into scope. */
+#ifndef __FStar_UInt_8_16_32_64_H
+
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#include "fstar_uint128_msvc.h"
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+#include "fstar_uint128_gcc64.h"
+#else
+#include "FStar_UInt128_Verified.h"
+#include "fstar_uint128_struct_endianness.h"
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h
new file mode 100644
index 0000000000..b44fa3f75d
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h
@@ -0,0 +1,5 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+/* This file is automatically included when compiling with -wasm -d force-c */
+#define WasmSupport_check_buffer_size(X)
diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h
new file mode 100644
index 0000000000..fa66dc8e81
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h
@@ -0,0 +1,243 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __LOWSTAR_ENDIANNESS_H
+#define __LOWSTAR_ENDIANNESS_H
+
+#include <string.h>
+#include <inttypes.h>
+
+/******************************************************************************/
+/* Implementing C.fst (part 2: endian-ness macros) */
+/******************************************************************************/
+
+/* ... for Linux */
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) || defined(__GLIBC__)
+#include <endian.h>
+
+/* ... for OSX */
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+
+/* ... for Solaris */
+#elif defined(__sun__)
+#include <sys/byteorder.h>
+#define htole64(x) LE_64(x)
+#define le64toh(x) LE_64(x)
+#define htobe64(x) BE_64(x)
+#define be64toh(x) BE_64(x)
+
+#define htole16(x) LE_16(x)
+#define le16toh(x) LE_16(x)
+#define htobe16(x) BE_16(x)
+#define be16toh(x) BE_16(x)
+
+#define htole32(x) LE_32(x)
+#define le32toh(x) LE_32(x)
+#define htobe32(x) BE_32(x)
+#define be32toh(x) BE_32(x)
+
+/* ... for the BSDs */
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+#include <sys/endian.h>
+#elif defined(__OpenBSD__)
+#include <endian.h>
+
+/* ... for Windows (MSVC)... not targeting XBOX 360! */
+#elif defined(_MSC_VER)
+
+#include <stdlib.h>
+#define htobe16(x) _byteswap_ushort(x)
+#define htole16(x) (x)
+#define be16toh(x) _byteswap_ushort(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) _byteswap_ulong(x)
+#define htole32(x) (x)
+#define be32toh(x) _byteswap_ulong(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) _byteswap_uint64(x)
+#define htole64(x) (x)
+#define be64toh(x) _byteswap_uint64(x)
+#define le64toh(x) (x)
+
+/* ... for Windows (GCC-like, e.g. mingw or clang) */
+#elif (defined(_WIN32) || defined(_WIN64) || defined(__EMSCRIPTEN__)) && \
+ (defined(__GNUC__) || defined(__clang__))
+
+#define htobe16(x) __builtin_bswap16(x)
+#define htole16(x) (x)
+#define be16toh(x) __builtin_bswap16(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) __builtin_bswap32(x)
+#define htole32(x) (x)
+#define be32toh(x) __builtin_bswap32(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) __builtin_bswap64(x)
+#define htole64(x) (x)
+#define be64toh(x) __builtin_bswap64(x)
+#define le64toh(x) (x)
+
+/* ... generic big-endian fallback code */
+/* ... AIX doesn't have __BYTE_ORDER__ (with XLC compiler) & is always big-endian */
+#elif (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) || defined(_AIX)
+
+/* byte swapping code inspired by:
+ * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h
+ * */
+
+#define htobe32(x) (x)
+#define be32toh(x) (x)
+#define htole32(x) \
+ (__extension__({ \
+ uint32_t _temp = (x); \
+ ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \
+ ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \
+ }))
+#define le32toh(x) (htole32((x)))
+
+#define htobe64(x) (x)
+#define be64toh(x) (x)
+#define htole64(x) \
+ (__extension__({ \
+ uint64_t __temp = (x); \
+ uint32_t __low = htobe32((uint32_t)__temp); \
+ uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \
+ (((uint64_t)__low) << 32) | __high; \
+ }))
+#define le64toh(x) (htole64((x)))
+
+/* ... generic little-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+#define htole32(x) (x)
+#define le32toh(x) (x)
+#define htobe32(x) \
+ (__extension__({ \
+ uint32_t _temp = (x); \
+ ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \
+ ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \
+ }))
+#define be32toh(x) (htobe32((x)))
+
+#define htole64(x) (x)
+#define le64toh(x) (x)
+#define htobe64(x) \
+ (__extension__({ \
+ uint64_t __temp = (x); \
+ uint32_t __low = htobe32((uint32_t)__temp); \
+ uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \
+ (((uint64_t)__low) << 32) | __high; \
+ }))
+#define be64toh(x) (htobe64((x)))
+
+/* ... couldn't determine endian-ness of the target platform */
+#else
+#error "Please define __BYTE_ORDER__!"
+
+#endif /* defined(__linux__) || ... */
+
+/* Loads and stores. These avoid undefined behavior due to unaligned memory
+ * accesses, via memcpy. */
+
+inline static uint16_t
+load16(uint8_t *b)
+{
+ uint16_t x;
+ memcpy(&x, b, 2);
+ return x;
+}
+
+inline static uint32_t
+load32(uint8_t *b)
+{
+ uint32_t x;
+ memcpy(&x, b, 4);
+ return x;
+}
+
+inline static uint64_t
+load64(uint8_t *b)
+{
+ uint64_t x;
+ memcpy(&x, b, 8);
+ return x;
+}
+
+inline static void
+store16(uint8_t *b, uint16_t i)
+{
+ memcpy(b, &i, 2);
+}
+
+inline static void
+store32(uint8_t *b, uint32_t i)
+{
+ memcpy(b, &i, 4);
+}
+
+inline static void
+store64(uint8_t *b, uint64_t i)
+{
+ memcpy(b, &i, 8);
+}
+
+/* Legacy accessors so that this header can serve as an implementation of
+ * C.Endianness */
+#define load16_le(b) (le16toh(load16(b)))
+#define store16_le(b, i) (store16(b, htole16(i)))
+#define load16_be(b) (be16toh(load16(b)))
+#define store16_be(b, i) (store16(b, htobe16(i)))
+
+#define load32_le(b) (le32toh(load32(b)))
+#define store32_le(b, i) (store32(b, htole32(i)))
+#define load32_be(b) (be32toh(load32(b)))
+#define store32_be(b, i) (store32(b, htobe32(i)))
+
+#define load64_le(b) (le64toh(load64(b)))
+#define store64_le(b, i) (store64(b, htole64(i)))
+#define load64_be(b) (be64toh(load64(b)))
+#define store64_be(b, i) (store64(b, htobe64(i)))
+
+/* Co-existence of LowStar.Endianness and FStar.Endianness generates name
+ * conflicts, because of course both insist on having no prefixes. Until a
+ * prefix is added, or until we truly retire FStar.Endianness, solve this issue
+ * in an elegant way. */
+#define load16_le0 load16_le
+#define store16_le0 store16_le
+#define load16_be0 load16_be
+#define store16_be0 store16_be
+
+#define load32_le0 load32_le
+#define store32_le0 store32_le
+#define load32_be0 load32_be
+#define store32_be0 store32_be
+
+#define load64_le0 load64_le
+#define store64_le0 store64_le
+#define load64_be0 load64_be
+#define store64_be0 store64_be
+
+#define load128_le0 load128_le
+#define store128_le0 store128_le
+#define load128_be0 load128_be
+#define store128_be0 store128_be
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/include/krmllib.h b/security/nss/lib/freebl/verified/karamel/include/krmllib.h
new file mode 100644
index 0000000000..1f461f351c
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/include/krmllib.h
@@ -0,0 +1,28 @@
+#ifndef __KRMLLIB_H
+#define __KRMLLIB_H
+
+/******************************************************************************/
+/* The all-in-one krmllib.h header */
+/******************************************************************************/
+
+/* This is a meta-header that is included by default in KaRaMeL generated
+ * programs. If you wish to have a more lightweight set of headers, or are
+ * targeting an environment where controlling these macros yourself is
+ * important, consider using:
+ *
+ * krml -minimal
+ *
+ * to disable the inclusion of this file (note: this also disables the default
+ * argument "-bundle FStar.*"). You can then include the headers of your choice
+ * one by one, using -add-early-include. */
+
+#include "krml/internal/target.h"
+#include "krml/internal/callconv.h"
+#include "krml/internal/builtin.h"
+#include "krml/internal/debug.h"
+#include "krml/internal/types.h"
+
+#include "krml/lowstar_endianness.h"
+#include "krml/fstar_int.h"
+
+#endif /* __KRMLLIB_H */
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h
new file mode 100644
index 0000000000..b9dcb38027
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#ifndef __FStar_UInt128_H
+#define __FStar_UInt128_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "krml/internal/compat.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/types.h"
+#include "krml/internal/target.h"
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+#define __FStar_UInt128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h
new file mode 100644
index 0000000000..e6d872c889
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h
@@ -0,0 +1,328 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#ifndef __FStar_UInt128_Verified_H
+#define __FStar_UInt128_Verified_H
+
+#include "FStar_UInt_8_16_32_64.h"
+#include <inttypes.h>
+#include <stdbool.h>
+#include "krml/internal/types.h"
+#include "krml/internal/target.h"
+
+static inline uint64_t
+FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+ return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U;
+}
+
+static inline uint64_t
+FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+ return FStar_UInt128_constant_time_carry(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return FStar_UInt128_sub_mod_impl(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low & b.low;
+ lit.high = a.high & b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low ^ b.low;
+ lit.high = a.high ^ b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low | b.low;
+ lit.high = a.high | b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = ~a.low;
+ lit.high = ~a.high;
+ return lit;
+}
+
+static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U) {
+ return a;
+ } else {
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low << s;
+ lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);
+ return lit;
+ }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = (uint64_t)0U;
+ lit.high = a.low << (s - FStar_UInt128_u32_64);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s < FStar_UInt128_u32_64) {
+ return FStar_UInt128_shift_left_small(a, s);
+ } else {
+ return FStar_UInt128_shift_left_large(a, s);
+ }
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U) {
+ return a;
+ } else {
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);
+ lit.high = a.high >> s;
+ return lit;
+ }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.high >> (s - FStar_UInt128_u32_64);
+ lit.high = (uint64_t)0U;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s < FStar_UInt128_u32_64) {
+ return FStar_UInt128_shift_right_small(a, s);
+ } else {
+ return FStar_UInt128_shift_right_large(a, s);
+ }
+}
+
+static inline bool
+FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.low == b.low && a.high == b.high;
+}
+
+static inline bool
+FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || (a.high == b.high && a.low > b.low);
+}
+
+static inline bool
+FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || (a.high == b.high && a.low < b.low);
+}
+
+static inline bool
+FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || (a.high == b.high && a.low >= b.low);
+}
+
+static inline bool
+FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || (a.high == b.high && a.low <= b.low);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+ lit.high =
+ (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a;
+ lit.high = (uint64_t)0U;
+ return lit;
+}
+
+static inline uint64_t
+FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+ return a.low;
+}
+
+static inline uint64_t
+FStar_UInt128_u64_mod_32(uint64_t a)
+{
+ return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+static inline uint64_t
+FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));
+ lit.high =
+ ((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32;
+ return lit;
+}
+
+static inline uint64_t
+FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) +
+ FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)));
+ lit.high =
+ (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) +
+ (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) +
+ ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) +
+ FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >>
+ FStar_UInt128_u32_32);
+ return lit;
+}
+
+#define __FStar_UInt128_Verified_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h
new file mode 100644
index 0000000000..876cf84226
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h
@@ -0,0 +1,219 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#ifndef __FStar_UInt_8_16_32_64_H
+#define __FStar_UInt_8_16_32_64_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "krml/internal/compat.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/types.h"
+#include "krml/internal/target.h"
+
+extern krml_checked_int_t FStar_UInt64_n;
+
+extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee);
+
+extern krml_checked_int_t FStar_UInt64___proj__Mk__item__v(uint64_t projectee);
+
+extern krml_checked_int_t FStar_UInt64_v(uint64_t x);
+
+extern uint64_t FStar_UInt64_uint_to_t(krml_checked_int_t x);
+
+extern uint64_t FStar_UInt64_zero;
+
+extern uint64_t FStar_UInt64_one;
+
+extern uint64_t FStar_UInt64_minus(uint64_t a);
+
+extern uint32_t FStar_UInt64_n_minus_one;
+
+static KRML_NOINLINE uint64_t
+FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a ^ b;
+ uint64_t minus_x = ~x + (uint64_t)1U;
+ uint64_t x_or_minus_x = x | minus_x;
+ uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+ return xnx - (uint64_t)1U;
+}
+
+static KRML_NOINLINE uint64_t
+FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a;
+ uint64_t y = b;
+ uint64_t x_xor_y = x ^ y;
+ uint64_t x_sub_y = x - y;
+ uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint64_t q = x_xor_y | x_sub_y_xor_y;
+ uint64_t x_xor_q = x ^ q;
+ uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+ return x_xor_q_ - (uint64_t)1U;
+}
+
+extern Prims_string FStar_UInt64_to_string(uint64_t uu___);
+
+extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu___);
+
+extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu___);
+
+extern uint64_t FStar_UInt64_of_string(Prims_string uu___);
+
+extern krml_checked_int_t FStar_UInt32_n;
+
+extern bool FStar_UInt32_uu___is_Mk(uint32_t projectee);
+
+extern krml_checked_int_t FStar_UInt32___proj__Mk__item__v(uint32_t projectee);
+
+extern krml_checked_int_t FStar_UInt32_v(uint32_t x);
+
+extern uint32_t FStar_UInt32_uint_to_t(krml_checked_int_t x);
+
+extern uint32_t FStar_UInt32_zero;
+
+extern uint32_t FStar_UInt32_one;
+
+extern uint32_t FStar_UInt32_minus(uint32_t a);
+
+extern uint32_t FStar_UInt32_n_minus_one;
+
+static KRML_NOINLINE uint32_t
+FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
+{
+ uint32_t x = a ^ b;
+ uint32_t minus_x = ~x + (uint32_t)1U;
+ uint32_t x_or_minus_x = x | minus_x;
+ uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
+ return xnx - (uint32_t)1U;
+}
+
+static KRML_NOINLINE uint32_t
+FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
+{
+ uint32_t x = a;
+ uint32_t y = b;
+ uint32_t x_xor_y = x ^ y;
+ uint32_t x_sub_y = x - y;
+ uint32_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint32_t q = x_xor_y | x_sub_y_xor_y;
+ uint32_t x_xor_q = x ^ q;
+ uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
+ return x_xor_q_ - (uint32_t)1U;
+}
+
+extern Prims_string FStar_UInt32_to_string(uint32_t uu___);
+
+extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu___);
+
+extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu___);
+
+extern uint32_t FStar_UInt32_of_string(Prims_string uu___);
+
+extern krml_checked_int_t FStar_UInt16_n;
+
+extern bool FStar_UInt16_uu___is_Mk(uint16_t projectee);
+
+extern krml_checked_int_t FStar_UInt16___proj__Mk__item__v(uint16_t projectee);
+
+extern krml_checked_int_t FStar_UInt16_v(uint16_t x);
+
+extern uint16_t FStar_UInt16_uint_to_t(krml_checked_int_t x);
+
+extern uint16_t FStar_UInt16_zero;
+
+extern uint16_t FStar_UInt16_one;
+
+extern uint16_t FStar_UInt16_minus(uint16_t a);
+
+extern uint32_t FStar_UInt16_n_minus_one;
+
+static KRML_NOINLINE uint16_t
+FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
+{
+ uint16_t x = a ^ b;
+ uint16_t minus_x = ~x + (uint16_t)1U;
+ uint16_t x_or_minus_x = x | minus_x;
+ uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
+ return xnx - (uint16_t)1U;
+}
+
+static KRML_NOINLINE uint16_t
+FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
+{
+ uint16_t x = a;
+ uint16_t y = b;
+ uint16_t x_xor_y = x ^ y;
+ uint16_t x_sub_y = x - y;
+ uint16_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint16_t q = x_xor_y | x_sub_y_xor_y;
+ uint16_t x_xor_q = x ^ q;
+ uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
+ return x_xor_q_ - (uint16_t)1U;
+}
+
+extern Prims_string FStar_UInt16_to_string(uint16_t uu___);
+
+extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu___);
+
+extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu___);
+
+extern uint16_t FStar_UInt16_of_string(Prims_string uu___);
+
+extern krml_checked_int_t FStar_UInt8_n;
+
+extern bool FStar_UInt8_uu___is_Mk(uint8_t projectee);
+
+extern krml_checked_int_t FStar_UInt8___proj__Mk__item__v(uint8_t projectee);
+
+extern krml_checked_int_t FStar_UInt8_v(uint8_t x);
+
+extern uint8_t FStar_UInt8_uint_to_t(krml_checked_int_t x);
+
+extern uint8_t FStar_UInt8_zero;
+
+extern uint8_t FStar_UInt8_one;
+
+extern uint8_t FStar_UInt8_minus(uint8_t a);
+
+extern uint32_t FStar_UInt8_n_minus_one;
+
+static KRML_NOINLINE uint8_t
+FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
+{
+ uint8_t x = a ^ b;
+ uint8_t minus_x = ~x + (uint8_t)1U;
+ uint8_t x_or_minus_x = x | minus_x;
+ uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
+ return xnx - (uint8_t)1U;
+}
+
+static KRML_NOINLINE uint8_t
+FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
+{
+ uint8_t x = a;
+ uint8_t y = b;
+ uint8_t x_xor_y = x ^ y;
+ uint8_t x_sub_y = x - y;
+ uint8_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint8_t q = x_xor_y | x_sub_y_xor_y;
+ uint8_t x_xor_q = x ^ q;
+ uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
+ return x_xor_q_ - (uint8_t)1U;
+}
+
+extern Prims_string FStar_UInt8_to_string(uint8_t uu___);
+
+extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu___);
+
+extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu___);
+
+extern uint8_t FStar_UInt8_of_string(Prims_string uu___);
+
+typedef uint8_t FStar_UInt8_byte;
+
+#define __FStar_UInt_8_16_32_64_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h
new file mode 100644
index 0000000000..a710d23dc0
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#ifndef __LowStar_Endianness_H
+#define __LowStar_Endianness_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "krml/internal/compat.h"
+#include "krml/lowstar_endianness.h"
+#include "krml/internal/types.h"
+#include "krml/internal/target.h"
+
+static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_le(uint8_t *x0);
+
+static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_be(uint8_t *x0);
+
+#define __LowStar_Endianness_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic
new file mode 100644
index 0000000000..d7a1fdfd70
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic
@@ -0,0 +1,56 @@
+# A basic Makefile that KaRaMeL copies in the output directory; this is not
+# guaranteed to work and will only work well for very simple projects. This
+# Makefile uses:
+# - the custom C files passed to your krml invocation
+# - the custom C flags passed to your krml invocation
+# - the -o option passed to your krml invocation
+
+include Makefile.include
+
+ifeq (,$(KRML_HOME))
+ $(error please define KRML_HOME to point to the root of your KaRaMeL git checkout)
+endif
+
+CFLAGS += -I. -I $(KRML_HOME)/include -I $(KRML_HOME)/krmllib/dist/minimal
+CFLAGS += -Wall -Wextra -Werror -std=c11 -Wno-unused-variable \
+ -Wno-unknown-warning-option -Wno-unused-but-set-variable -Wno-unused-function \
+ -Wno-unused-parameter -Wno-infinite-recursion \
+ -g -fwrapv -D_BSD_SOURCE -D_DEFAULT_SOURCE
+ifeq ($(OS),Windows_NT)
+CFLAGS += -D__USE_MINGW_ANSI_STDIO
+else
+CFLAGS += -fPIC
+endif
+CFLAGS += $(USER_CFLAGS)
+
+SOURCES += $(ALL_C_FILES) $(USER_C_FILES)
+ifneq (,$(BLACKLIST))
+ SOURCES := $(filter-out $(BLACKLIST),$(SOURCES))
+endif
+OBJS += $(patsubst %.c,%.o,$(SOURCES))
+
+all: $(USER_TARGET)
+
+$(USER_TARGET): $(OBJS)
+
+AR ?= ar
+
+%.a:
+ $(AR) cr $@ $^
+
+%.exe:
+ $(CC) $(CFLAGS) -o $@ $^ $(KRML_HOME)/krmllib/dist/generic/libkrmllib.a
+
+%.so:
+ $(CC) $(CFLAGS) -shared -o $@ $^
+
+%.d: %.c
+ @set -e; rm -f $@; \
+ $(CC) -MM -MG $(CFLAGS) $< > $@.$$$$; \
+ sed 's,\($(notdir $*)\)\.o[ :]*,$(dir $@)\1.o $@ : ,g' < $@.$$$$ > $@; \
+ rm -f $@.$$$$
+
+include $(patsubst %.c,%.d,$(SOURCES))
+
+clean:
+ rm -rf *.o *.d $(USER_TARGET)
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include
new file mode 100644
index 0000000000..ad53217184
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include
@@ -0,0 +1,5 @@
+USER_TARGET=libkrmllib.a
+USER_CFLAGS=
+USER_C_FILES=fstar_uint128.c
+ALL_C_FILES=
+ALL_H_FILES=FStar_UInt128.h FStar_UInt_8_16_32_64.h LowStar_Endianness.h
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h
new file mode 100644
index 0000000000..33cff6b6d4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h
@@ -0,0 +1,225 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+/******************************************************************************/
+/* Machine integers (128-bit arithmetic) */
+/******************************************************************************/
+
+/* This header contains two things.
+ *
+ * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and
+ * Clang, i.e. all the operations from FStar.UInt128.
+ *
+ * Second, 128-bit operations from C.Endianness (or LowStar.Endianness),
+ * suitable for any compiler and platform (via a series of ifdefs). This second
+ * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to
+ * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code.
+ * */
+
+/* This file is used for both the minimal and generic krmllib distributions. As
+ * such, it assumes that the machine integers have been bundled the exact same
+ * way in both cases. */
+
+#ifndef FSTAR_UINT128_GCC64
+#define FSTAR_UINT128_GCC64
+
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+#include "LowStar_Endianness.h"
+
+/* GCC + using native unsigned __int128 support */
+
+inline static uint128_t
+load128_le(uint8_t *b)
+{
+ uint128_t l = (uint128_t)load64_le(b);
+ uint128_t h = (uint128_t)load64_le(b + 8);
+ return (h << 64 | l);
+}
+
+inline static void
+store128_le(uint8_t *b, uint128_t n)
+{
+ store64_le(b, (uint64_t)n);
+ store64_le(b + 8, (uint64_t)(n >> 64));
+}
+
+inline static uint128_t
+load128_be(uint8_t *b)
+{
+ uint128_t h = (uint128_t)load64_be(b);
+ uint128_t l = (uint128_t)load64_be(b + 8);
+ return (h << 64 | l);
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store64_be(b, (uint64_t)(n >> 64));
+ store64_be(b + 8, (uint64_t)n);
+}
+
+inline static uint128_t
+FStar_UInt128_add(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_mul(uint128_t x, uint128_t y)
+{
+ return x * y;
+}
+
+inline static uint128_t
+FStar_UInt128_add_mod(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub_mod(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static uint128_t
+FStar_UInt128_logand(uint128_t x, uint128_t y)
+{
+ return x & y;
+}
+
+inline static uint128_t
+FStar_UInt128_logor(uint128_t x, uint128_t y)
+{
+ return x | y;
+}
+
+inline static uint128_t
+FStar_UInt128_logxor(uint128_t x, uint128_t y)
+{
+ return x ^ y;
+}
+
+inline static uint128_t
+FStar_UInt128_lognot(uint128_t x)
+{
+ return ~x;
+}
+
+inline static uint128_t
+FStar_UInt128_shift_left(uint128_t x, uint32_t y)
+{
+ return x << y;
+}
+
+inline static uint128_t
+FStar_UInt128_shift_right(uint128_t x, uint32_t y)
+{
+ return x >> y;
+}
+
+inline static uint128_t
+FStar_UInt128_uint64_to_uint128(uint64_t x)
+{
+ return (uint128_t)x;
+}
+
+inline static uint64_t
+FStar_UInt128_uint128_to_uint64(uint128_t x)
+{
+ return (uint64_t)x;
+}
+
+inline static uint128_t
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+ return ((uint128_t)x) * y;
+}
+
+inline static uint128_t
+FStar_UInt128_eq_mask(uint128_t x, uint128_t y)
+{
+ uint64_t mask =
+ FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) &
+ FStar_UInt64_eq_mask(x, y);
+ return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint128_t
+FStar_UInt128_gte_mask(uint128_t x, uint128_t y)
+{
+ uint64_t mask =
+ (FStar_UInt64_gte_mask(x >> 64, y >> 64) &
+ ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) |
+ (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y));
+ return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint64_t
+FStar_UInt128___proj__Mkuint128__item__low(uint128_t x)
+{
+ return (uint64_t)x;
+}
+
+inline static uint64_t
+FStar_UInt128___proj__Mkuint128__item__high(uint128_t x)
+{
+ return (uint64_t)(x >> 64);
+}
+
+inline static uint128_t
+FStar_UInt128_add_underspec(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub_underspec(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static bool
+FStar_UInt128_eq(uint128_t x, uint128_t y)
+{
+ return x == y;
+}
+
+inline static bool
+FStar_UInt128_gt(uint128_t x, uint128_t y)
+{
+ return x > y;
+}
+
+inline static bool
+FStar_UInt128_lt(uint128_t x, uint128_t y)
+{
+ return x < y;
+}
+
+inline static bool
+FStar_UInt128_gte(uint128_t x, uint128_t y)
+{
+ return x >= y;
+}
+
+inline static bool
+FStar_UInt128_lte(uint128_t x, uint128_t y)
+{
+ return x <= y;
+}
+
+inline static uint128_t
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+ return (uint128_t)x * (uint128_t)y;
+}
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
new file mode 100644
index 0000000000..e9b366e259
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h
@@ -0,0 +1,571 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KaRaMeL <https://github.com/FStarLang/karamel>
+ * then hand-edited to use MSVC intrinsics KaRaMeL invocation:
+ * C:\users\barrybo\mitls2c\karamel\_build\src\Karamel.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "krmllib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims
+ * F* version: 15104ff8
+ * KaRaMeL version: 318b7fa8
+ */
+
+#ifndef FSTAR_UINT128_MSVC
+#define FSTAR_UINT128_MSVC
+
+#include "krml/internal/types.h"
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+
+#ifndef _MSC_VER
+#error This file only works with the MSVC compiler
+#endif
+
+/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now
+ * done properly and only include this file when we know for sure we are on
+ * 64-bit MSVC. */
+
+#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128)
+#define HAS_OPTIMIZED 1
+#else
+#define HAS_OPTIMIZED 0
+#endif
+
+// Define .low and .high in terms of the __m128i fields, to reduce
+// the amount of churn in this file.
+#if HAS_OPTIMIZED
+#include <intrin.h>
+#include <immintrin.h>
+#define low m128i_u64[0]
+#define high m128i_u64[1]
+#endif
+
+inline static FStar_UInt128_uint128
+load128_le(uint8_t *b)
+{
+#if HAS_OPTIMIZED
+ return _mm_loadu_si128((__m128i *)b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = load64_le(b);
+ lit.high = load64_le(b + 8);
+ return lit;
+#endif
+}
+
+inline static void
+store128_le(uint8_t *b, FStar_UInt128_uint128 n)
+{
+ store64_le(b, n.low);
+ store64_le(b + 8, n.high);
+}
+
+inline static FStar_UInt128_uint128
+load128_be(uint8_t *b)
+{
+ uint64_t l = load64_be(b + 8);
+ uint64_t h = load64_be(b);
+#if HAS_OPTIMIZED
+ return _mm_set_epi64x(h, l);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = l;
+ lit.high = h;
+ return lit;
+#endif
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store64_be(b, n.high);
+ store64_be(b + 8, n.low);
+}
+
+inline static uint64_t
+FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+ return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U;
+}
+
+inline static uint64_t
+FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+ return FStar_UInt128_constant_time_carry(a, b);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+
+ unsigned char carry =
+ _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0
+ _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF
+ return _mm_set_epi64x(h, l);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_add(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low;
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_add(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+
+ unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l);
+ _subborrow_u64(borrow, a.high, b.high, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_sub(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_sub(a, b);
+#else
+ return FStar_UInt128_sub_mod_impl(a, b);
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_and_si128(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low & b.low;
+ lit.high = a.high & b.high;
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_xor_si128(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low ^ b.low;
+ lit.high = a.high ^ b.high;
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_or_si128(a, b);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low | b.low;
+ lit.high = a.high | b.high;
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+#if HAS_OPTIMIZED
+ return _mm_andnot_si128(a, a);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = ~a.low;
+ lit.high = ~a.high;
+ return lit;
+#endif
+}
+
+static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (hi << s) + (lo >> FStar_UInt128_u32_64 - s);
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U)
+ return a;
+ else {
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low << s;
+ lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);
+ return lit;
+ }
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = (uint64_t)0U;
+ lit.high = a.low << s - FStar_UInt128_u32_64;
+ return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+#if HAS_OPTIMIZED
+ if (s == 0) {
+ return a;
+ } else if (s < FStar_UInt128_u32_64) {
+ uint64_t l = a.low << s;
+ uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s);
+ return _mm_set_epi64x(h, l);
+ } else {
+ return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0);
+ }
+#else
+ if (s < FStar_UInt128_u32_64)
+ return FStar_UInt128_shift_left_small(a, s);
+ else
+ return FStar_UInt128_shift_left_large(a, s);
+#endif
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (lo >> s) + (hi << FStar_UInt128_u32_64 - s);
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U)
+ return a;
+ else {
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);
+ lit.high = a.high >> s;
+ return lit;
+ }
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.high >> s - FStar_UInt128_u32_64;
+ lit.high = (uint64_t)0U;
+ return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+#if HAS_OPTIMIZED
+ if (s == 0) {
+ return a;
+ } else if (s < FStar_UInt128_u32_64) {
+ uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s);
+ uint64_t h = a.high >> s;
+ return _mm_set_epi64x(h, l);
+ } else {
+ return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64));
+ }
+#else
+ if (s < FStar_UInt128_u32_64)
+ return FStar_UInt128_shift_right_small(a, s);
+ else
+ return FStar_UInt128_shift_right_large(a, s);
+#endif
+}
+
+inline static bool
+FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.low == b.low && a.high == b.high;
+}
+
+inline static bool
+FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || a.high == b.high && a.low > b.low;
+}
+
+inline static bool
+FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || a.high == b.high && a.low < b.low;
+}
+
+inline static bool
+FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || a.high == b.high && a.low >= b.low;
+}
+
+inline static bool
+FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || a.high == b.high && a.low <= b.low;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff
+ __m128i r32 = _mm_cmpeq_epi32(a, b);
+ // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half)
+ __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1));
+ // Bitwise and to compute (3&2),(2&3),(1&0),(0&1)
+ __m128i ret64 = _mm_and_si128(r32, s32);
+ // Swap the two 64-bit values to form s64
+ __m128i s64 =
+ _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2
+ // And them together
+ return _mm_and_si128(ret64, s64);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED && 0
+ // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each
+ // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each
+ // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3)
+ // then splat slot 0 to 3,2,1,0
+ __m128i gt = _mm_cmpgt_epi32(a, b);
+ __m128i eq = _mm_cmpeq_epi32(a, b);
+ __m128i ge = _mm_or_si128(gt, eq);
+ __m128i ge0 = ge;
+ __m128i eq0 = eq;
+ __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1
+ __m128i t1 = _mm_and_si128(eq0, ge1);
+ __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0
+ __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1
+ __m128i ge2 =
+ _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2
+ __m128i t2 =
+ _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2)
+ ret = _mm_or_si128(ret, t2);
+ __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3
+ __m128i ge3 =
+ _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3
+ __m128i t3 = _mm_and_si128(
+ eq0, _mm_and_si128(
+ eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3)
+ ret = _mm_or_si128(ret, t3);
+ return _mm_shuffle_epi32(
+ ret,
+ _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords.
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt64_gte_mask(a.high, b.high) &
+ ~FStar_UInt64_eq_mask(a.high, b.high) |
+ FStar_UInt64_eq_mask(a.high, b.high) &
+ FStar_UInt64_gte_mask(a.low, b.low);
+ lit.high = FStar_UInt64_gte_mask(a.high, b.high) &
+ ~FStar_UInt64_eq_mask(a.high, b.high) |
+ FStar_UInt64_eq_mask(a.high, b.high) &
+ FStar_UInt64_gte_mask(a.low, b.low);
+ return lit;
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+#if HAS_OPTIMIZED
+ return _mm_set_epi64x(0, a);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = a;
+ lit.high = (uint64_t)0U;
+ return lit;
+#endif
+}
+
+inline static uint64_t
+FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+ return a.low;
+}
+
+inline static uint64_t
+FStar_UInt128_u64_mod_32(uint64_t a)
+{
+ return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+inline static uint64_t
+FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+ l = _umul128(x, (uint64_t)y, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt128_u32_combine(
+ (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+ (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+ FStar_UInt128_u32_32),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));
+ lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+ (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+ FStar_UInt128_u32_32) >>
+ FStar_UInt128_u32_32;
+ return lit;
+#endif
+}
+
+/* Note: static headers bring scope collision issues when they define types!
+ * Because now client (karamel-generated) code will include this header and
+ * there might be type collisions if the client code uses quadruples of uint64s.
+ * So, we cannot use the karamel-generated name. */
+typedef struct K_quad_s {
+ uint64_t fst;
+ uint64_t snd;
+ uint64_t thd;
+ uint64_t f3;
+} K_quad;
+
+inline static K_quad
+FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y)
+{
+ K_quad tmp;
+ tmp.fst = FStar_UInt128_u64_mod_32(x);
+ tmp.snd = FStar_UInt128_u64_mod_32(
+ FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y));
+ tmp.thd = x >> FStar_UInt128_u32_32;
+ tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) +
+ (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >>
+ FStar_UInt128_u32_32);
+ return tmp;
+}
+
+static uint64_t
+FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y)
+{
+ K_quad scrut =
+ FStar_UInt128_mul_wide_impl_t_(x, y);
+ uint64_t u1 = scrut.fst;
+ uint64_t w3 = scrut.snd;
+ uint64_t x_ = scrut.thd;
+ uint64_t t_ = scrut.f3;
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt128_u32_combine_(
+ u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3);
+ lit.high =
+ x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +
+ (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >>
+ FStar_UInt128_u32_32);
+ return lit;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+ l = _umul128(x, y, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ return FStar_UInt128_mul_wide_impl(x, y);
+#endif
+}
+
+#undef low
+#undef high
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h
new file mode 100644
index 0000000000..61fe85c49e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h
@@ -0,0 +1,84 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H
+#define FSTAR_UINT128_STRUCT_ENDIANNESS_H
+
+/* Hand-written implementation of endianness-related uint128 functions
+ * for the extracted uint128 implementation */
+
+/* Access 64-bit fields within the int128. */
+#define HIGH64_OF(x) ((x)->high)
+#define LOW64_OF(x) ((x)->low)
+
+/* A series of definitions written using pointers. */
+
+inline static void
+load128_le_(uint8_t *b, uint128_t *r)
+{
+ LOW64_OF(r) = load64_le(b);
+ HIGH64_OF(r) = load64_le(b + 8);
+}
+
+inline static void
+store128_le_(uint8_t *b, uint128_t *n)
+{
+ store64_le(b, LOW64_OF(n));
+ store64_le(b + 8, HIGH64_OF(n));
+}
+
+inline static void
+load128_be_(uint8_t *b, uint128_t *r)
+{
+ HIGH64_OF(r) = load64_be(b);
+ LOW64_OF(r) = load64_be(b + 8);
+}
+
+inline static void
+store128_be_(uint8_t *b, uint128_t *n)
+{
+ store64_be(b, HIGH64_OF(n));
+ store64_be(b + 8, LOW64_OF(n));
+}
+
+#ifndef KRML_NOSTRUCT_PASSING
+
+inline static uint128_t
+load128_le(uint8_t *b)
+{
+ uint128_t r;
+ load128_le_(b, &r);
+ return r;
+}
+
+inline static void
+store128_le(uint8_t *b, uint128_t n)
+{
+ store128_le_(b, &n);
+}
+
+inline static uint128_t
+load128_be(uint8_t *b)
+{
+ uint128_t r;
+ load128_be_(b, &r);
+ return r;
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store128_be_(b, &n);
+}
+
+#else /* !defined(KRML_STRUCT_PASSING) */
+
+#define print128 print128_
+#define load128_le load128_le_
+#define store128_le store128_le_
+#define load128_be load128_be_
+#define store128_be store128_be_
+
+#endif /* KRML_STRUCT_PASSING */
+
+#endif
diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def
new file mode 100644
index 0000000000..c4ab8e38ed
--- /dev/null
+++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def
@@ -0,0 +1,11 @@
+LIBRARY libkrmllib
+
+EXPORTS
+ FStar_UInt64_eq_mask
+ FStar_UInt64_gte_mask
+ FStar_UInt32_eq_mask
+ FStar_UInt32_gte_mask
+ FStar_UInt16_eq_mask
+ FStar_UInt16_gte_mask
+ FStar_UInt8_eq_mask
+ FStar_UInt8_gte_mask
diff --git a/security/nss/lib/freebl/verified/lib_intrinsics.h b/security/nss/lib/freebl/verified/lib_intrinsics.h
new file mode 100644
index 0000000000..1206367d77
--- /dev/null
+++ b/security/nss/lib/freebl/verified/lib_intrinsics.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <sys/types.h>
+
+#if defined(__has_include)
+#if __has_include("config.h")
+#include "config.h"
+#endif
+#endif
+
+/*
+ GCC versions prior to 5.5 incorrectly optimize certain intrinsics.
+
+ See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81300
+
+ CLANG versions prior to 5 crash on certain intrinsics.
+
+ See https://bugs.llvm.org/show_bug.cgi?id=24943
+*/
+
+#if !defined(HACL_CAN_COMPILE_INTRINSICS) || \
+ (defined(__clang__) && (__clang_major__ < 5)) || \
+ (defined(__GNUC__) && !defined(__clang__) && \
+ (__GNUC__ < 5 || (__GNUC__ == 5 && (__GNUC_MINOR__ < 5))))
+
+#include "Hacl_IntTypes_Intrinsics.h"
+
+#if defined(HACL_CAN_COMPILE_UINT128)
+
+#include "Hacl_IntTypes_Intrinsics_128.h"
+
+#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_128_add_carry_u64(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_128_sub_borrow_u64(x1, x2, x3, x4))
+
+#else
+
+#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4))
+
+#endif // defined(HACL_CAN_COMPILE_UINT128)
+
+#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \
+ (Hacl_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4))
+
+#else // !defined(HACL_CAN_COMPILE_INTRINSICS)
+
+#if defined(_MSC_VER)
+#include <immintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+
+#define Lib_IntTypes_Intrinsics_add_carry_u32(x1, x2, x3, x4) \
+ (_addcarry_u32(x1, x2, x3, (unsigned int *)x4))
+
+#define Lib_IntTypes_Intrinsics_add_carry_u64(x1, x2, x3, x4) \
+ (_addcarry_u64(x1, x2, x3, (long long unsigned int *)x4))
+
+/*
+ GCC versions prior to 7.2 pass arguments to _subborrow_u{32,64}
+ in an incorrect order.
+
+ See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81294
+*/
+#if defined(__GNUC__) && !defined(__clang__) && \
+ (__GNUC__ < 7 || (__GNUC__ == 7 && (__GNUC_MINOR__ < 2)))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \
+ (_subborrow_u32(x1, x3, x2, (unsigned int *)x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+ (_subborrow_u64(x1, x3, x2, (long long unsigned int *)x4))
+
+#else
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u32(x1, x2, x3, x4) \
+ (_subborrow_u32(x1, x2, x3, (unsigned int *)x4))
+
+#define Lib_IntTypes_Intrinsics_sub_borrow_u64(x1, x2, x3, x4) \
+ (_subborrow_u64(x1, x2, x3, (long long unsigned int *)x4))
+
+#endif // GCC < 7.2
+
+#endif // !HACL_CAN_COMPILE_INTRINSICS
diff --git a/security/nss/lib/freebl/verified/libintvector.h b/security/nss/lib/freebl/verified/libintvector.h
new file mode 100644
index 0000000000..e987c1948e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/libintvector.h
@@ -0,0 +1,914 @@
+#ifndef __Vec_Intrin_H
+#define __Vec_Intrin_H
+
+#include <sys/types.h>
+
+/* We include config.h here to ensure that the various feature-flags are
+ * properly brought into scope. Users can either run the configure script, or
+ * write a config.h themselves and put it under version control. */
+#if defined(__has_include)
+#if __has_include("config.h")
+#include "config.h"
+#endif
+#endif
+
+/* # DEBUGGING:
+ * ============
+ * It is possible to debug the current definitions by using libintvector_debug.h
+ * See the include at the bottom of the file. */
+
+#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1)
+
+#if defined(__x86_64__) || defined(_M_X64)
+
+#if defined(HACL_CAN_COMPILE_VEC128)
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#include <smmintrin.h>
+
+typedef __m128i Lib_IntVector_Intrinsics_vec128;
+
+#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \
+ (_mm_aesenc_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \
+ (_mm_aesenclast_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \
+ (_mm_aeskeygenassist_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \
+ (_mm_clmulepi64_si128(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
+ (_mm_xor_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ (_mm_cmpeq_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ (_mm_cmpeq_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ (_mm_cmpgt_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ (_mm_cmpgt_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ (_mm_or_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ (_mm_and_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ (_mm_xor_si128(x0, _mm_set1_epi32(-1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
+ (_mm_slli_si128(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
+ (_mm_srli_si128(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ (_mm_slli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ (_mm_srli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
+ (_mm_slli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
+ (_mm_srli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \
+ (_mm_loadu_si128((__m128i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \
+ (_mm_loadu_si128((__m128i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))
+
+#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
+
+#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
+
+#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
+ (_mm_insert_epi8(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ (_mm_insert_epi32(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ (_mm_insert_epi64(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
+ (_mm_extract_epi8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ (_mm_extract_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ (_mm_extract_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ (_mm_setzero_si128())
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ (_mm_add_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ (_mm_sub_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ (_mm_mul_epu32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ (_mm_mul_epu32(x0, _mm_set1_epi64x(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ (_mm_add_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
+ (_mm_sub_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
+ (_mm_mullo_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
+ (_mm_mullo_epi32(x0, _mm_set1_epi32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_load128(x) \
+ ((__m128i)x)
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ (_mm_set1_epi64x(x)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \
+ (_mm_set_epi64x(x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ (_mm_set1_epi32(x))
+
+#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
+ (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
+ (_mm_unpacklo_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
+ (_mm_unpackhi_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
+ (_mm_unpacklo_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
+ (_mm_unpackhi_epi64(x1, x2))
+
+#endif /* HACL_CAN_COMPILE_VEC128 */
+
+#if defined(HACL_CAN_COMPILE_VEC256)
+
+#include <immintrin.h>
+
+typedef __m256i Lib_IntVector_Intrinsics_vec256;
+
+#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \
+ (_mm256_cmpeq_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \
+ (_mm256_cmpeq_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \
+ (_mm256_cmpgt_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \
+ (_mm256_cmpgt_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \
+ (_mm256_xor_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \
+ (_mm256_or_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \
+ (_mm256_and_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \
+ (_mm256_xor_si256(x0, _mm256_set1_epi32(-1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \
+ (_mm256_slli_si256(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \
+ (_mm256_srli_si256(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \
+ (_mm256_slli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \
+ (_mm256_srli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \
+ (_mm256_slli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \
+ (_mm256_srli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \
+ ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \
+ ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1))))))))))))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \
+ (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
+ (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \
+ (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \
+ (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
+
+#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \
+ (_mm256_loadu_si256((__m256i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \
+ (_mm256_loadu_si256((__m256i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \
+ (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \
+ (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
+
+#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
+
+#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
+
+#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \
+ (_mm256_insert_epi8(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \
+ (_mm256_insert_epi32(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \
+ (_mm256_insert_epi64(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \
+ (_mm256_extract_epi8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \
+ (_mm256_extract_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \
+ (_mm256_extract_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_zero \
+ (_mm256_setzero_si256())
+
+#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \
+ (_mm256_add_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \
+ (_mm256_sub_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \
+ (_mm256_mul_epu32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \
+ (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \
+ (_mm256_add_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \
+ (_mm256_sub_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \
+ (_mm256_mullo_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \
+ (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_load64(x1) \
+ (_mm256_set1_epi64x(x1)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \
+ (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load32(x) \
+ (_mm256_set1_epi32(x))
+
+#define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \
+ (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load128(x) \
+ (_mm256_set_m128i((__m128i)x))
+
+#define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \
+ (_mm256_set_m128i((__m128i)x1, (__m128i)x0))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \
+ (_mm256_unpacklo_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \
+ (_mm256_unpackhi_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \
+ (_mm256_unpacklo_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \
+ (_mm256_unpackhi_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \
+ (_mm256_permute2x128_si256(x1, x2, 0x20))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \
+ (_mm256_permute2x128_si256(x1, x2, 0x31))
+
+#endif /* HACL_CAN_COMPILE_VEC256 */
+
+#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE)
+
+#if defined(HACL_CAN_COMPILE_VEC128)
+
+#include <arm_neon.h>
+
+typedef uint32x4_t Lib_IntVector_Intrinsics_vec128;
+
+#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
+ (veorq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ (vceqq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ (vceqq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ (vcgtq_u32(x0, x1))
+
+#define high32(x0) \
+ (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32)))
+
+#define low32(x0) \
+ (vmovn_u64(vreinterpretq_u64_u32(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ (vorrq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ (vandq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ (vmvnq_u32(x0))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
+ (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
+ (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
+ (vshlq_n_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
+ (vshrq_n_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \
+ (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \
+ (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ (vextq_u32(x0, x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
+ (vextq_u64(x0, x0, x1))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2)))
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \
+ (vld1q_u32((const uint32_t*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \
+ (vld1q_u32((const uint32_t*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (x1)))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \
+ ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0)));
+
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
+ (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
+ (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1))))))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1))))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
+ (vsetq_lane_u8(x1, x0, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ (vsetq_lane_u32(x1, x0, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
+ (vgetq_lane_u8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ (vgetq_lane_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ (vdupq_n_u32(0))
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ (vaddq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
+ (vsubq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
+ (vmulq_lane_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
+ (vmulq_lane_u32(x0, vdupq_n_u32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_load128(x) \
+ ((uint32x4_t)(x))
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ (vdupq_n_u32(x)) /* hi lo */
+
+static inline Lib_IntVector_Intrinsics_vec128
+Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2)
+{
+ const uint64_t a[2] = { x1, x2 };
+ return vreinterpretq_u32_u64(vld1q_u64(a));
+}
+
+static inline Lib_IntVector_Intrinsics_vec128
+Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4)
+{
+ const uint32_t a[4] = { x1, x2, x3, x4 };
+ return vld1q_u32(a);
+}
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
+ (vzip1q_u32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
+ (vzip2q_u32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
+ (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
+ (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
+
+#endif /* HACL_CAN_COMPILE_VEC128 */
+
+/* IBM z architecture */
+#elif defined(__s390x__) /* this flag is for GCC only */
+
+#if defined(HACL_CAN_COMPILE_VEC128)
+
+#include <stdint.h>
+#include <vecintrin.h>
+
+/* The main vector 128 type
+ * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char,
+ * unsigned int, unsigned long long: the compiler complains that the parameter
+ * combination is invalid. */
+typedef unsigned char vector128_8 __attribute__((vector_size(16)));
+typedef unsigned int vector128_32 __attribute__((vector_size(16)));
+typedef unsigned long long vector128_64 __attribute__((vector_size(16)));
+
+typedef vector128_8 Lib_IntVector_Intrinsics_vec128;
+typedef vector128_8 vector128;
+
+#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \
+ (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x))))
+
+#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \
+ (vector128)(*((vector128_32*)(const uint8_t*)(x)))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \
+ (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x))))
+
+static inline void
+Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1)
+{
+ *((vector128_32 *)x0) = vec_revb((vector128_32)x1);
+}
+
+static inline void
+Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1)
+{
+ *((vector128_32 *)x0) = (vector128_32)x1;
+}
+
+static inline void
+Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1)
+{
+ *((vector128_64 *)x0) = vec_revb((vector128_64)x1);
+}
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ ((vector128)(vec_and((vector128)(x0), (vector128)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ ((unsigned int)(vec_extract((vector128_32)(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ ((unsigned long long)(vec_extract((vector128_64)(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \
+ ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \
+ ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \
+ ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \
+ ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \
+ (unsigned int)(x), (unsigned int)(x) }))
+
+#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
+ ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) }))
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x))))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1))))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ ((vector128)(vec_mulo((vector128_32)(x0), \
+ (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ ((vector128)(vec_or((vector128)(x0), (vector128)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \
+ ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) })))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \
+ ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) })))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
+ (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \
+ ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \
+ 0xffffffff >> (x1), 0xffffffff >> (x1) })))
+
+/* Doesn't work with vec_splat_u64 */
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) }))))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ ((vector128)((vector128_64)(x0) - (vector128_64)(x1)))
+
+static inline vector128
+Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1)
+{
+ return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1))));
+}
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ ((vector128){})
+
+#endif /* HACL_CAN_COMPILE_VEC128 */
+
+#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only
+
+#if defined(HACL_CAN_COMPILE_VEC128)
+
+#include <altivec.h>
+#include <string.h> // for memcpy
+#include <stdint.h>
+
+// The main vector 128 type
+// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char,
+// unsigned int, unsigned long long: the compiler complains that the parameter
+// combination is invalid.
+typedef vector unsigned char vector128_8;
+typedef vector unsigned int vector128_32;
+typedef vector unsigned long long vector128_64;
+
+typedef vector128_8 Lib_IntVector_Intrinsics_vec128;
+typedef vector128_8 vector128;
+
+#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \
+ ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x))))))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \
+ ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x))))))
+
+#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \
+ (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0))))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \
+ (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0))))
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ ((vector128)(vec_and((vector128)(x0), (vector128)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ ((unsigned int)(vec_extract((vector128_32)(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ ((unsigned long long)(vec_extract((vector128_64)(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \
+ ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \
+ ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \
+ ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \
+ ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \
+ (unsigned int)(x), (unsigned int)(x) }))
+
+#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
+ ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) }))
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) }))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1))))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ ((vector128)(vec_mule((vector128_32)(x0), \
+ (vector128_32)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ ((vector128)(vec_or((vector128)(x0), (vector128)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) })))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) })))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) })))
+
+// Doesn't work with vec_splat_u64
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) }))))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ ((vector128)((vector128_64)(x0) - (vector128_64)(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
+ ((vector128)(vec_xor((vector128)(x0), (vector128)(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ ((vector128){})
+
+#endif /* HACL_CAN_COMPILE_VEC128 */
+
+#endif // PowerPC64
+
+// DEBUGGING:
+// If libintvector_debug.h exists, use it to debug the current implementations.
+// Note that some flags must be enabled for the debugging to be effective:
+// see libintvector_debug.h for more details.
+#if defined(__has_include)
+#if __has_include("libintvector_debug.h")
+#include "libintvector_debug.h"
+#endif
+#endif
+
+#endif // __Vec_Intrin_H
diff --git a/security/nss/lib/freebl/win_rand.c b/security/nss/lib/freebl/win_rand.c
new file mode 100644
index 0000000000..b863776d21
--- /dev/null
+++ b/security/nss/lib/freebl/win_rand.c
@@ -0,0 +1,161 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "secrng.h"
+
+#ifdef XP_WIN
+#include <windows.h>
+#include <time.h>
+
+static BOOL
+CurrentClockTickTime(LPDWORD lpdwHigh, LPDWORD lpdwLow)
+{
+ LARGE_INTEGER liCount;
+
+ if (!QueryPerformanceCounter(&liCount))
+ return FALSE;
+
+ *lpdwHigh = liCount.u.HighPart;
+ *lpdwLow = liCount.u.LowPart;
+ return TRUE;
+}
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbuf)
+{
+ DWORD dwHigh, dwLow, dwVal;
+ int n = 0;
+ int nBytes;
+ time_t sTime;
+
+ if (maxbuf <= 0)
+ return 0;
+
+ CurrentClockTickTime(&dwHigh, &dwLow);
+
+ // get the maximally changing bits first
+ nBytes = sizeof(dwLow) > maxbuf ? maxbuf : sizeof(dwLow);
+ memcpy((char *)buf, &dwLow, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ nBytes = sizeof(dwHigh) > maxbuf ? maxbuf : sizeof(dwHigh);
+ memcpy(((char *)buf) + n, &dwHigh, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ // get the number of milliseconds that have elapsed since Windows started
+ dwVal = GetTickCount();
+
+ nBytes = sizeof(dwVal) > maxbuf ? maxbuf : sizeof(dwVal);
+ memcpy(((char *)buf) + n, &dwVal, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ // get the time in seconds since midnight Jan 1, 1970
+ time(&sTime);
+ nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime);
+ memcpy(((char *)buf) + n, &sTime, nBytes);
+ n += nBytes;
+
+ return n;
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ DWORD dwVal;
+ char buffer[256];
+ int nBytes;
+ MEMORYSTATUS sMem;
+ HANDLE hVal;
+ DWORD dwSerialNum;
+ DWORD dwComponentLen;
+ DWORD dwSysFlags;
+ char volName[128];
+ DWORD dwSectors, dwBytes, dwFreeClusters, dwNumClusters;
+
+ nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+ RNG_RandomUpdate(buffer, nBytes);
+
+ sMem.dwLength = sizeof(sMem);
+ GlobalMemoryStatus(&sMem); // assorted memory stats
+ RNG_RandomUpdate(&sMem, sizeof(sMem));
+
+ dwVal = GetLogicalDrives();
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal)); // bitfields in bits 0-25
+
+ dwVal = sizeof(buffer);
+ if (GetComputerName(buffer, &dwVal))
+ RNG_RandomUpdate(buffer, dwVal);
+
+ hVal = GetCurrentProcess(); // 4 or 8 byte pseudo handle (a
+ // constant!) of current process
+ RNG_RandomUpdate(&hVal, sizeof(hVal));
+
+ dwVal = GetCurrentProcessId(); // process ID (4 bytes)
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+ dwVal = GetCurrentThreadId(); // thread ID (4 bytes)
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+ volName[0] = '\0';
+ buffer[0] = '\0';
+ GetVolumeInformation(NULL,
+ volName,
+ sizeof(volName),
+ &dwSerialNum,
+ &dwComponentLen,
+ &dwSysFlags,
+ buffer,
+ sizeof(buffer));
+
+ RNG_RandomUpdate(volName, strlen(volName));
+ RNG_RandomUpdate(&dwSerialNum, sizeof(dwSerialNum));
+ RNG_RandomUpdate(&dwComponentLen, sizeof(dwComponentLen));
+ RNG_RandomUpdate(&dwSysFlags, sizeof(dwSysFlags));
+ RNG_RandomUpdate(buffer, strlen(buffer));
+
+ if (GetDiskFreeSpace(NULL, &dwSectors, &dwBytes, &dwFreeClusters,
+ &dwNumClusters)) {
+ RNG_RandomUpdate(&dwSectors, sizeof(dwSectors));
+ RNG_RandomUpdate(&dwBytes, sizeof(dwBytes));
+ RNG_RandomUpdate(&dwFreeClusters, sizeof(dwFreeClusters));
+ RNG_RandomUpdate(&dwNumClusters, sizeof(dwNumClusters));
+ }
+
+ nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+ RNG_RandomUpdate(buffer, nBytes);
+}
+
+/*
+ * The RtlGenRandom function is declared in <ntsecapi.h>, but the
+ * declaration is missing a calling convention specifier. So we
+ * declare it manually here.
+ */
+#define RtlGenRandom SystemFunction036
+DECLSPEC_IMPORT BOOLEAN WINAPI RtlGenRandom(
+ PVOID RandomBuffer,
+ ULONG RandomBufferLength);
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ size_t bytes = 0;
+
+ if (RtlGenRandom(dest, maxLen)) {
+ bytes = maxLen;
+ }
+ return bytes;
+}
+#endif /* is XP_WIN */