From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- security/nss/lib/freebl/mpi/README | 646 + security/nss/lib/freebl/mpi/doc/LICENSE | 11 + security/nss/lib/freebl/mpi/doc/LICENSE-MPL | 3 + security/nss/lib/freebl/mpi/doc/basecvt.pod | 65 + security/nss/lib/freebl/mpi/doc/build | 30 + security/nss/lib/freebl/mpi/doc/div.txt | 64 + security/nss/lib/freebl/mpi/doc/expt.txt | 94 + security/nss/lib/freebl/mpi/doc/gcd.pod | 28 + security/nss/lib/freebl/mpi/doc/invmod.pod | 34 + security/nss/lib/freebl/mpi/doc/isprime.pod | 63 + security/nss/lib/freebl/mpi/doc/lap.pod | 36 + security/nss/lib/freebl/mpi/doc/mpi-test.pod | 51 + security/nss/lib/freebl/mpi/doc/mul.txt | 77 + security/nss/lib/freebl/mpi/doc/pi.txt | 53 + security/nss/lib/freebl/mpi/doc/prime.txt | 6542 ++++++++ security/nss/lib/freebl/mpi/doc/prng.pod | 38 + security/nss/lib/freebl/mpi/doc/redux.txt | 86 + security/nss/lib/freebl/mpi/doc/sqrt.txt | 50 + security/nss/lib/freebl/mpi/doc/square.txt | 72 + security/nss/lib/freebl/mpi/doc/timing.txt | 213 + security/nss/lib/freebl/mpi/hpma512.s | 615 + security/nss/lib/freebl/mpi/hppa20.s | 904 ++ security/nss/lib/freebl/mpi/logtab.h | 28 + security/nss/lib/freebl/mpi/montmulf.c | 286 + security/nss/lib/freebl/mpi/montmulf.h | 65 + security/nss/lib/freebl/mpi/montmulf.il | 108 + security/nss/lib/freebl/mpi/montmulf.s | 1938 +++ security/nss/lib/freebl/mpi/montmulfv8.il | 108 + security/nss/lib/freebl/mpi/montmulfv8.s | 1818 +++ security/nss/lib/freebl/mpi/montmulfv9.il | 93 + security/nss/lib/freebl/mpi/montmulfv9.s | 2346 +++ security/nss/lib/freebl/mpi/mp_comba.c | 3235 ++++ .../nss/lib/freebl/mpi/mp_comba_amd64_masm.asm | 13066 +++++++++++++++ security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s | 16097 +++++++++++++++++++ security/nss/lib/freebl/mpi/mp_gf2m-priv.h | 73 + security/nss/lib/freebl/mpi/mp_gf2m.c | 677 + security/nss/lib/freebl/mpi/mp_gf2m.h | 28 + security/nss/lib/freebl/mpi/mpcpucache.c | 788 + security/nss/lib/freebl/mpi/mpcpucache_amd64.s | 861 + security/nss/lib/freebl/mpi/mpcpucache_x86.s | 902 ++ security/nss/lib/freebl/mpi/mpi-config.h | 56 + security/nss/lib/freebl/mpi/mpi-priv.h | 246 + security/nss/lib/freebl/mpi/mpi.c | 5241 ++++++ security/nss/lib/freebl/mpi/mpi.h | 363 + security/nss/lib/freebl/mpi/mpi_amd64.c | 32 + security/nss/lib/freebl/mpi/mpi_amd64_common.S | 409 + security/nss/lib/freebl/mpi/mpi_amd64_masm.asm | 388 + security/nss/lib/freebl/mpi/mpi_amd64_sun.s | 385 + security/nss/lib/freebl/mpi/mpi_arm.c | 175 + security/nss/lib/freebl/mpi/mpi_hp.c | 81 + security/nss/lib/freebl/mpi/mpi_i86pc.s | 313 + security/nss/lib/freebl/mpi/mpi_mips.s | 472 + security/nss/lib/freebl/mpi/mpi_sparc.c | 226 + security/nss/lib/freebl/mpi/mpi_sse2.s | 294 + security/nss/lib/freebl/mpi/mpi_x86.s | 541 + security/nss/lib/freebl/mpi/mpi_x86_asm.c | 531 + security/nss/lib/freebl/mpi/mpi_x86_os2.s | 538 + security/nss/lib/freebl/mpi/mplogic.c | 460 + security/nss/lib/freebl/mpi/mplogic.h | 55 + security/nss/lib/freebl/mpi/mpmontg.c | 1160 ++ security/nss/lib/freebl/mpi/mpprime.c | 610 + security/nss/lib/freebl/mpi/mpprime.h | 48 + security/nss/lib/freebl/mpi/mpv_sparc.c | 221 + security/nss/lib/freebl/mpi/mpv_sparcv8.s | 1607 ++ security/nss/lib/freebl/mpi/mpv_sparcv9.s | 1645 ++ security/nss/lib/freebl/mpi/mpvalpha.c | 183 + security/nss/lib/freebl/mpi/mulsqr.c | 84 + security/nss/lib/freebl/mpi/primes.c | 841 + security/nss/lib/freebl/mpi/vis_32.il | 1291 ++ security/nss/lib/freebl/mpi/vis_64.il | 997 ++ security/nss/lib/freebl/mpi/vis_proto.h | 234 + 71 files changed, 72019 insertions(+) create mode 100644 security/nss/lib/freebl/mpi/README create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE-MPL create mode 100644 security/nss/lib/freebl/mpi/doc/basecvt.pod create mode 100755 security/nss/lib/freebl/mpi/doc/build create mode 100644 security/nss/lib/freebl/mpi/doc/div.txt create mode 100644 security/nss/lib/freebl/mpi/doc/expt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/gcd.pod create mode 100644 security/nss/lib/freebl/mpi/doc/invmod.pod create mode 100644 security/nss/lib/freebl/mpi/doc/isprime.pod create mode 100644 security/nss/lib/freebl/mpi/doc/lap.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mpi-test.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mul.txt create mode 100644 security/nss/lib/freebl/mpi/doc/pi.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prime.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prng.pod create mode 100644 security/nss/lib/freebl/mpi/doc/redux.txt create mode 100644 security/nss/lib/freebl/mpi/doc/sqrt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/square.txt create mode 100644 security/nss/lib/freebl/mpi/doc/timing.txt create mode 100644 security/nss/lib/freebl/mpi/hpma512.s create mode 100644 security/nss/lib/freebl/mpi/hppa20.s create mode 100644 security/nss/lib/freebl/mpi/logtab.h create mode 100644 security/nss/lib/freebl/mpi/montmulf.c create mode 100644 security/nss/lib/freebl/mpi/montmulf.h create mode 100644 security/nss/lib/freebl/mpi/montmulf.il create mode 100644 security/nss/lib/freebl/mpi/montmulf.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.s create mode 100644 security/nss/lib/freebl/mpi/mp_comba.c create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m-priv.h create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.c create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.h create mode 100644 security/nss/lib/freebl/mpi/mpcpucache.c create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_amd64.s create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi-config.h create mode 100644 security/nss/lib/freebl/mpi/mpi-priv.h create mode 100644 security/nss/lib/freebl/mpi/mpi.c create mode 100644 security/nss/lib/freebl/mpi/mpi.h create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64.c create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_common.S create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mpi_arm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_hp.c create mode 100644 security/nss/lib/freebl/mpi/mpi_i86pc.s create mode 100644 security/nss/lib/freebl/mpi/mpi_mips.s create mode 100644 security/nss/lib/freebl/mpi/mpi_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpi_sse2.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_asm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_os2.s create mode 100644 security/nss/lib/freebl/mpi/mplogic.c create mode 100644 security/nss/lib/freebl/mpi/mplogic.h create mode 100644 security/nss/lib/freebl/mpi/mpmontg.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.h create mode 100644 security/nss/lib/freebl/mpi/mpv_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv8.s create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv9.s create mode 100644 security/nss/lib/freebl/mpi/mpvalpha.c create mode 100644 security/nss/lib/freebl/mpi/mulsqr.c create mode 100644 security/nss/lib/freebl/mpi/primes.c create mode 100644 security/nss/lib/freebl/mpi/vis_32.il create mode 100644 security/nss/lib/freebl/mpi/vis_64.il create mode 100644 security/nss/lib/freebl/mpi/vis_proto.h (limited to 'security/nss/lib/freebl/mpi') diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README new file mode 100644 index 0000000000..a49aa9d8d7 --- /dev/null +++ b/security/nss/lib/freebl/mpi/README @@ -0,0 +1,646 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +About the MPI Library +--------------------- + +The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision +signed integer arithmetic package. The implementation is not the most +efficient possible, but the code is small and should be fairly easily +portable to just about any machine that supports an ANSI C compiler, +as long as it is capable of at least 16-bit arithmetic (but also see +below for more on this). + +This library was written with an eye to cryptographic applications; +thus, some care is taken to make sure that temporary values are not +left lying around in memory when they are no longer in use. This adds +some overhead for zeroing buffers before they are released back into +the free pool; however, it gives you the assurance that there is only +one copy of your important values residing in your process's address +space at a time. Obviously, it is difficult to guarantee anything, in +a pre-emptive multitasking environment, but this at least helps you +keep a lid on the more obvious ways your data can get spread around in +memory. + + +Using the Library +----------------- + +To use the MPI library in your program, you must include the header: + +#include "mpi.h" + +This header provides all the type and function declarations you'll +need to use the library. Almost all the names defined by the library +begin with the prefix 'mp_', so it should be easy to keep them from +clashing with your program's namespace (he says, glibly, knowing full +well there are always pathological cases). + +There are a few things you may want to configure about the library. +By default, the MPI library uses an unsigned short for its digit type, +and an unsigned int for its word type. The word type must be big +enough to contain at least two digits, for the primitive arithmetic to +work out. On my machine, a short is 2 bytes and an int is 4 bytes -- +but if you have 64-bit ints, you might want to use a 4-byte digit and +an 8-byte word. I have tested the library using 1-byte digits and +2-byte words, as well. Whatever you choose to do, the things you need +to change are: + +(1) The type definitions for mp_digit and mp_word. + +(2) The macro DIGIT_FMT which tells mp_print() how to display a + single digit. This is just a printf() format string, so you + can adjust it appropriately. + +(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the + largest value expressible in an mp_digit and an mp_word, + respectively. + +Both the mp_digit and mp_word should be UNSIGNED integer types. The +code relies on having the full positive precision of the type used for +digits and words. + +The remaining type definitions should be left alone, for the most +part. The code in the library does not make any significant +assumptions about the sizes of things, but there is little if any +reason to change the other parameters, so I would recommend you leave +them as you found them. + + +Conventions +----------- + +Most functions in the library return a value of type mp_err. This +permits the library to communicate success or various kinds of failure +to the calling program. The return values currently defined are: + + MP_OKAY - okay, operation succeeded, all's well + MP_YES - okay, the answer is yes (same as MP_OKAY) + MP_NO - okay, but answer is no (not MP_OKAY) + MP_MEM - operation ran out of memory + MP_RANGE - input parameter was out of range + MP_BADARG - an invalid input parameter was provided + MP_UNDEF - no output value is defined for this input + +The only function which currently uses MP_UNDEF is mp_invmod(). +Division by zero is undefined, but the division functions will return +MP_RANGE for a zero divisor. MP_BADARG usually means you passed a +bogus mp_int structure to the function. MP_YES and MP_NO are not used +by the library itself; they're defined so you can use them in your own +extensions. + +If you need a readable interpretation of these error codes in your +program, you may also use the mp_strerror() function. This function +takes an mp_err as input, and returns a pointer to a human-readable +string describing the meaning of the error. These strings are stored +as constants within the library, so the caller should not attempt to +modify or free the memory associated with these strings. + +The library represents values in signed-magnitude format. Values +strictly less than zero are negative, all others are considered +positive (zero is positive by fiat). You can access the 'sign' member +of the mp_int structure directly, but better is to use the mp_cmp_z() +function, to find out which side of zero the value lies on. + +Most arithmetic functions have a single-digit variant, as well as the +full arbitrary-precision. An mp_digit is an unsigned value between 0 +and DIGIT_MAX inclusive. The radix is available as RADIX. The number +of bits in a given digit is given as DIGIT_BIT. + +Generally, input parameters are given before output parameters. +Unless otherwise specified, any input parameter can be re-used as an +output parameter, without confusing anything. + +The basic numeric type defined by the library is an mp_int. Virtually +all the functions in the library take a pointer to an mp_int as one of +their parameters. An explanation of how to create and use these +structures follows. And so, without further ado... + + +Initialization and Cleanup +-------------------------- + +The basic numeric type defined by the library is an 'mp_int'. +However, it is not sufficient to simply declare a variable of type +mp_int in your program. These variables also need to be initialized +before they can be used, to allocate the internal storage they require +for computation. + +This is done using one of the following functions: + + mp_init(mp_int *mp); + mp_init_copy(mp_int *mp, mp_int *from); + mp_init_size(mp_int *mp, mp_size p); + +Each of these requires a pointer to a structure of type mp_int. The +basic mp_init() simply initializes the mp_int to a default size, and +sets its value to zero. If you would like to initialize a copy of an +existing mp_int, use mp_init_copy(), where the 'from' parameter is the +mp_int you'd like to make a copy of. The third function, +mp_init_size(), permits you to specify how many digits of precision +should be preallocated for your mp_int. This can help the library +avoid unnecessary re-allocations later on. + +The default precision used by mp_init() can be retrieved using: + + precision = mp_get_prec(); + +This returns the number of digits that will be allocated. You can +change this value by using: + + mp_set_prec(unsigned int prec); + +Any positive value is acceptable -- if you pass zero, the default +precision will be re-set to the compiled-in library default (this is +specified in the header file 'mpi-config.h', and typically defaults to +8 or 16). + +Just as you must allocate an mp_int before you can use it, you must +clean up the structure when you are done with it. This is performed +using the mp_clear() function. Remember that any mp_int that you +create as a local variable in a function must be mp_clear()'d before +that function exits, or else the memory allocated to that mp_int will +be orphaned and unrecoverable. + +To set an mp_int to a given value, the following functions are given: + + mp_set(mp_int *mp, mp_digit d); + mp_set_int(mp_int *mp, long z); + mp_set_ulong(mp_int *mp, unsigned long z); + +The mp_set() function sets the mp_int to a single digit value, while +mp_set_int() sets the mp_int to a signed long integer value. + +To set an mp_int to zero, use: + + mp_zero(mp_int *mp); + + +Copying and Moving +------------------ + +If you have two initialized mp_int's, and you want to copy the value +of one into the other, use: + + mp_copy(from, to) + +This takes care of clearing the old value of 'to', and copies the new +value into it. If 'to' is not yet initialized, use mp_init_copy() +instead (see above). + +Note: The library tries, whenever possible, to avoid allocating +---- new memory. Thus, mp_copy() tries first to satisfy the needs + of the copy by re-using the memory already allocated to 'to'. + Only if this proves insufficient will mp_copy() actually + allocate new memory. + + For this reason, if you know a priori that 'to' has enough + available space to hold 'from', you don't need to check the + return value of mp_copy() for memory failure. The USED() + macro tells you how many digits are used by an mp_int, and + the ALLOC() macro tells you how many are allocated. + +If you have two initialized mp_int's, and you want to exchange their +values, use: + + mp_exch(a, b) + +This is better than using mp_copy() with a temporary, since it will +not (ever) touch the memory allocator -- it just swaps the exact +contents of the two structures. The mp_exch() function cannot fail; +if you pass it an invalid structure, it just ignores it, and does +nothing. + + +Basic Arithmetic +---------------- + +Once you have initialized your integers, you can operate on them. The +basic arithmetic functions on full mp_int values are: + +mp_add(a, b, c) - computes c = a + b +mp_sub(a, b, c) - computes c = a - b +mp_mul(a, b, c) - computes c = a * b +mp_sqr(a, b) - computes b = a * a +mp_div(a, b, q, r) - computes q, r such that a = bq + r +mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d +mp_expt(a, b, c) - computes c = a ** b +mp_2expt(a, k) - computes a = 2^k + +The mp_div_2d() function efficiently computes division by powers of +two. Either the q or r parameter may be NULL, in which case that +portion of the computation will be discarded. + +The algorithms used for some of the computations here are described in +the following files which are included with this distribution: + +mul.txt Describes the multiplication algorithm +div.txt Describes the division algorithm +expt.txt Describes the exponentiation algorithm +sqrt.txt Describes the square-root algorithm +square.txt Describes the squaring algorithm + +There are single-digit versions of most of these routines, as well. +In the following prototypes, 'd' is a single mp_digit: + +mp_add_d(a, d, c) - computes c = a + d +mp_sub_d(a, d, c) - computes c = a - d +mp_mul_d(a, d, c) - computes c = a * d +mp_mul_2(a, c) - computes c = a * 2 +mp_div_d(a, d, q, r) - computes q, r such that a = bq + r +mp_div_2(a, c) - computes c = a / 2 +mp_expt_d(a, d, c) - computes c = a ** d + +The mp_mul_2() and mp_div_2() functions take advantage of the internal +representation of an mp_int to do multiplication by two more quickly +than mp_mul_d() would. Other basic functions of an arithmetic variety +include: + +mp_zero(a) - assign 0 to a +mp_neg(a, c) - negate a: c = -a +mp_abs(a, c) - absolute value: c = |a| + + +Comparisons +----------- + +Several comparison functions are provided. Each of these, unless +otherwise specified, returns zero if the comparands are equal, < 0 if +the first is less than the second, and > 0 if the first is greater +than the second: + +mp_cmp_z(a) - compare a <=> 0 +mp_cmp_d(a, d) - compare a <=> d, d is a single digit +mp_cmp(a, b) - compare a <=> b +mp_cmp_mag(a, b) - compare |a| <=> |b| +mp_isodd(a) - return nonzero if odd, zero otherwise +mp_iseven(a) - return nonzero if even, zero otherwise + + +Modular Arithmetic +------------------ + +Modular variations of the basic arithmetic functions are also +supported. These are available if the MP_MODARITH parameter in +mpi-config.h is turned on (it is by default). The modular arithmetic +functions are: + +mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m +mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below) +mp_addmod(a, b, m, c) - compute c = (a + b) mod m +mp_submod(a, b, m, c) - compute c = (a - b) mod m +mp_mulmod(a, b, m, c) - compute c = (a * b) mod m +mp_sqrmod(a, m, c) - compute c = (a * a) mod m +mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m +mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m + +The mp_sqr() function squares its input argument. A call to mp_sqr(a, +c) is identical in meaning to mp_mul(a, a, c); however, if the +MP_SQUARE variable is set true in mpi-config.h (see below), then it +will be implemented with a different algorithm, that is supposed to +take advantage of the redundant computation that takes place during +squaring. Unfortunately, some compilers result in worse performance +on this code, so you can change the behaviour at will. There is a +utility program "mulsqr.c" that lets you test which does better on +your system. + +The mp_sqrmod() function is analogous to the mp_sqr() function; it +uses the mp_sqr() function rather than mp_mul(), and then performs the +modular reduction. This probably won't help much unless you are doing +a lot of them. + +See the file 'square.txt' for a synopsis of the algorithm used. + +Note: The mp_mod_d() function computes a modular reduction around +---- a single digit d. The result is a single digit c. + +Because an inverse is defined for a (mod m) if and only if (a, m) = 1 +(that is, if a and m are relatively prime), mp_invmod() may not be +able to compute an inverse for the arguments. In this case, it +returns the value MP_UNDEF, and does not modify c. If an inverse is +defined, however, it returns MP_OKAY, and sets c to the value of the +inverse (mod m). + +See the file 'redux.txt' for a description of the modular reduction +algorithm used by mp_exptmod(). + + +Greatest Common Divisor +----------------------- + +If The greates common divisor of two values can be found using one of the +following functions: + +mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm +mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b) +mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b) + +Also provided is a function to compute modular inverses, if they +exist: + +mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists + +The function mp_xgcd() computes the greatest common divisor, and also +returns values of x and y satisfying Bezout's identity. This is used +by mp_invmod() to find modular inverses. However, if you do not need +these values, you will find that mp_gcd() is MUCH more efficient, +since it doesn't need all the intermediate values that mp_xgcd() +requires in order to compute x and y. + +The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD +algorithm due to Josef Stein. + + +Input & Output Functions +------------------------ + +The following basic I/O routines are provided. These are present at +all times: + +mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int +mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int +mp_radix_size(mp, r) - return length of buffer needed by mp_toradix() +mp_raw_size(mp) - return length of buffer needed by mp_toraw() +mp_toradix(mp, str, r) - convert an mp_int to a string of radix r + digits +mp_toraw(mp, str) - convert an mp_int to a string of bytes +mp_tovalue(ch, r) - convert ch to its value when taken as + a radix r digit, or -1 if invalid +mp_strerror(err) - get a string describing mp_err value 'err' + +If you compile the MPI library with MP_IOFUNC defined, you will also +have access to the following additional I/O function: + +mp_print(mp, ofp) - print an mp_int as text to output stream ofp + +Note that mp_radix_size() returns a size in bytes guaranteed to be AT +LEAST big enough for the digits output by mp_toradix(). Because it +uses an approximation technique to figure out how many digits will be +needed, it may return a figure which is larger than necessary. Thus, +the caller should not rely on the value to determine how many bytes +will actually be written by mp_toradix(). The string mp_toradix() +creates will be NUL terminated, so the standard C library function +strlen() should be able to ascertain this for you, if you need it. + +The mp_read_radix() and mp_toradix() functions support bases from 2 to +64 inclusive. If you require more general radix conversion facilities +than this, you will need to write them yourself (that's why mp_div_d() +is provided, after all). + +Note: mp_read_radix() will accept as digits either capital or +---- lower-case letters. However, the current implementation of + mp_toradix() only outputs upper-case letters, when writing + bases betwee 10 and 36. The underlying code supports using + lower-case letters, but the interface stub does not have a + selector for it. You can add one yourself if you think it + is worthwhile -- I do not. Bases from 36 to 64 use lower- + case letters as distinct from upper-case. Bases 63 and + 64 use the characters '+' and '/' as digits. + + Note also that compiling with MP_IOFUNC defined will cause + inclusion of , so if you are trying to write code + which does not depend on the standard C library, you will + probably want to avoid this option. This is needed because + the mp_print() function takes a standard library FILE * as + one of its parameters, and uses the fprintf() function. + +The mp_toraw() function converts the integer to a sequence of bytes, +in big-endian ordering (most-significant byte first). Assuming your +bytes are 8 bits wide, this corresponds to base 256. The sign is +encoded as a single leading byte, whose value is 0 for zero or +positive values, or 1 for negative values. The mp_read_raw() function +reverses this process -- it takes a buffer of bytes, interprets the +first as a sign indicator (0 = zero/positive, nonzero = negative), and +the rest as a sequence of 1-byte digits in big-endian ordering. + +The mp_raw_size() function returns the exact number of bytes required +to store the given integer in "raw" format (as described in the +previous paragraph). Zero is returned in case of error; a valid +integer will require at least three bytes of storage. + +In previous versions of the MPI library, an "external representation +format" was supported. This was removed, however, because I found I +was never using it, it was not as portable as I would have liked, and +I decided it was a waste of space. + + +Other Functions +--------------- + +The files 'mpprime.h' and 'mpprime.c' define some routines which are +useful for divisibility testing and probabilistic primality testing. +The routines defined are: + +mpp_divis(a, b) - is a divisible by b? +mpp_divis_d(a, d) - is a divisible by digit d? +mpp_random(a) - set a to random value at current precision +mpp_random_size(a, prec) - set a to random value at given precision + +Note: The mpp_random() and mpp_random_size() functions use the C +---- library's rand() function to generate random values. It is + up to the caller to seed this generator before it is called. + These functions are not suitable for generating quantities + requiring cryptographic-quality randomness; they are intended + primarily for use in primality testing. + + Note too that the MPI library does not call srand(), so your + application should do this, if you ever want the sequence + to change. + +mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits + in v? If so, let w be the index of + that digit + +mpp_divis_primes(a, np) - is a divisible by any of the first np + primes? If so, set np to the prime + which divided a. + +mpp_fermat(a, d) - test if w^a = w (mod a). If so, + returns MP_YES, otherwise MP_NO. + +mpp_pprime(a, nt) - perform nt iterations of the Rabin- + Miller probabilistic primality test + on a. Returns MP_YES if all tests + passed, or MP_NO if any test fails. + +The mpp_fermat() function works based on Fermat's little theorem, a +consequence of which is that if p is a prime, and (w, p) = 1, then: + + w^p = w (mod p) + +Put another way, if w^p != w (mod p), then p is not prime. The test +is expensive to compute, but it helps to quickly eliminate an enormous +class of composite numbers prior to Rabin-Miller testing. + +Building the Library +-------------------- + +The MPI library is designed to be as self-contained as possible. You +should be able to compile it with your favourite ANSI C compiler, and +link it into your program directly. If you are on a Unix system using +the GNU C compiler (gcc), the following should work: + +% gcc -ansi -pedantic -Wall -O2 -c mpi.c + +The file 'mpi-config.h' defines several configurable parameters for +the library, which you can adjust to suit your application. At the +time of this writing, the available options are: + +MP_IOFUNC - Define true to include the mp_print() function, + which is moderately useful for debugging. This + implicitly includes . + +MP_MODARITH - Define true to include the modular arithmetic + functions. If you don't need modular arithmetic + in your application, you can set this to zero to + leave out all the modular routines. + +MP_LOGTAB - If true, the file "logtab.h" is included, which + is basically a static table of base 2 logarithms. + These are used to compute how big the buffers for + radix conversion need to be. If you set this false, + the library includes and uses log(). This + typically forces you to link against math libraries. + + +MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument + checking macro, ARGCHK(), gets expanded. If this + is set to zero, ARGCHK() expands to nothing; no + argument checks are performed. If this is 1, the + ARGCHK() macro expands to code that returns MP_BADARG + or similar at runtime. If it is 2, ARGCHK() expands + to an assert() call that aborts the program on a + bad input. + +MP_DEBUG - Turns on debugging output. This is probably not at + all useful unless you are debugging the library. It + tends to spit out a LOT of output. + +MP_DEFPREC - The default precision of a newly-created mp_int, in + digits. The precision can be changed at runtime by + the mp_set_prec() function, but this is its initial + value. + +MP_SQUARE - If this is set to a nonzero value, the mp_sqr() + function will use an alternate algorithm that takes + advantage of the redundant inner product computation + when both multiplicands are identical. Unfortunately, + with some compilers this is actually SLOWER than just + calling mp_mul() with the same argument twice. So + if you set MP_SQUARE to zero, mp_sqr() will be expan- + ded into a call to mp_mul(). This applies to all + the uses of mp_sqr(), including mp_sqrmod() and the + internal calls to s_mp_sqr() inside mpi.c + + The program 'mulsqr' (mulsqr.c) can be used to test + which works best for your configuration. Set up the + CC and CFLAGS variables in the Makefile, then type: + + make mulsqr + + Invoke it with arguments similar to the following: + + mulsqr 25000 1024 + + That is, 25000 products computed on 1024-bit values. + The output will compare the two timings, and recommend + a setting for MP_SQUARE. It is off by default. + +If you would like to use the mp_print() function (see above), be sure +to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the +'tests' subdirectory expect this to be defined (although the test +driver 'mpi-test' doesn't need it) + +The Makefile which comes with the library should take care of building +the library for you, if you have set the CC and CFLAGS variables at +the top of the file appropriately. By default, they are set up to +use the GNU C compiler: + +CC=gcc +CFLAGS=-ansi -pedantic -Wall -O2 + +If all goes well, the library should compile without warnings using +this combination. You should, of course, make whatever adjustments +you find necessary. + +The MPI library distribution comes with several additional programs +which are intended to demonstrate the use of the library, and provide +a framework for testing it. There are a handful of test driver +programs, in the files named 'mptest-X.c', where X is a digit. Also, +there are some simple command-line utilities (in the 'utils' +directory) for manipulating large numbers. These include: + +basecvt.c A radix-conversion program, supporting bases from + 2 to 64 inclusive. + +bbsrand.c A BBS (quadratic residue) pseudo-random number + generator. The file 'bbsrand.c' is just the driver + for the program; the real code lives in the files + 'bbs_rand.h' and 'bbs_rand.c' + +dec2hex.c Converts decimal to hexadecimal + +gcd.c Computes the greatest common divisor of two values. + If invoked as 'xgcd', also computes constants x and + y such that (a, b) = ax + by, in accordance with + Bezout's identity. + +hex2dec.c Converts hexadecimal to decimal + +invmod.c Computes modular inverses + +isprime.c Performs the Rabin-Miller probabilistic primality + test on a number. Values which fail this test are + definitely composite, and those which pass are very + likely to be prime (although there are no guarantees) + +lap.c Computes the order (least annihilating power) of + a value v modulo m. Very dumb algorithm. + +primegen.c Generates large (probable) primes. + +prng.c A pseudo-random number generator based on the + BBS generator code in 'bbs_rand.c' + +sieve.c Implements the Sieve of Eratosthenes, using a big + bitmap, to generate a list of prime numbers. + +fact.c Computes the factorial of an arbitrary precision + integer (iterative). + +exptmod.c Computes arbitrary precision modular exponentiation + from the command line (exptmod a b m -> a^b (mod m)) + +Most of these can be built from the Makefile that comes with the +library. Try 'make tools', if your environment supports it. + + +Acknowledgements: +---------------- + +The algorithms used in this library were drawn primarily from Volume +2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, +"Semi-Numerical Methods". Barrett's algorithm for modular reduction +came from Menezes, Oorschot, and Vanstone's _Handbook of Applied +Cryptography_, Chapter 14. + +Thanks are due to Tom St. Denis, for finding an obnoxious sign-related +bug in mp_read_raw() that made things break on platforms which use +signed chars. + +About the Author +---------------- + +This software was written by Michael J. Fromberger. You can contact +the author as follows: + +E-mail: + +Postal: 8000 Cummings Hall, Thayer School of Engineering + Dartmouth College, Hanover, New Hampshire, USA + +PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html + 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907 + +Last updated: 16-Jan-2000 diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE new file mode 100644 index 0000000000..35cca68ce9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE @@ -0,0 +1,11 @@ +Within this directory, each of the file listed below is licensed under +the terms given in the file LICENSE-MPL, also in this directory. + +basecvt.pod +gcd.pod +invmod.pod +isprime.pod +lap.pod +mpi-test.pod +prime.txt +prng.pod diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL new file mode 100644 index 0000000000..41dc2327f1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL @@ -0,0 +1,3 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod new file mode 100644 index 0000000000..c3d87fbc7e --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod @@ -0,0 +1,65 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + basecvt - radix conversion for arbitrary precision integers + +=head1 SYNOPSIS + + basecvt [values] + +=head1 DESCRIPTION + +The B program is a command-line tool for converting integers +of arbitrary precision from one radix to another. The current version +supports radix values from 2 (binary) to 64, inclusive. The first two +command line arguments specify the input and output radix, in base 10. +Any further arguments are taken to be integers notated in the input +radix, and these are converted to the output radix. The output is +written, one integer per line, to standard output. + +When reading integers, only digits considered "valid" for the input +radix are considered. Processing of an integer terminates when an +invalid input digit is encountered. So, for example, if you set the +input radix to 10 and enter '10ACF', B would assume that you +had entered '10' and ignore the rest of the string. + +If no values are provided, no output is written, but the program +simply terminates with a zero exit status. Error diagnostics are +written to standard error in the event of out-of-range radix +specifications. Regardless of the actual values of the input and +output radix, the radix arguments are taken to be in base 10 (decimal) +notation. + +=head1 DIGITS + +For radices from 2-10, standard ASCII decimal digits 0-9 are used for +both input and output. For radices from 11-36, the ASCII letters A-Z +are also included, following the convention used in hexadecimal. In +this range, input is accepted in either upper or lower case, although +on output only lower-case letters are used. + +For radices from 37-62, the output includes both upper- and lower-case +ASCII letters, and case matters. In this range, case is distinguished +both for input and for output values. + +For radices 63 and 64, the characters '+' (plus) and '/' (forward +solidus) are also used. These are derived from the MIME base64 +encoding scheme. The overall encoding is not the same as base64, +because the ASCII digits are used for the bottom of the range, and the +letters are shifted upward; however, the output will consist of the +same character set. + +This input and output behaviour is inherited from the MPI library used +by B, and so is not configurable at runtime. + +=head1 SEE ALSO + + dec2hex(1), hex2dec(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build new file mode 100755 index 0000000000..4d75b1e5a2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/build @@ -0,0 +1,30 @@ +#!/bin/sh +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +VERS="1.7p6" +SECT="1" +NAME="MPI Tools" + +echo "Building manual pages ..." +case $# in + 0) + files=`ls *.pod` + ;; + *) + files=$* + ;; +esac + +for name in $files +do + echo -n "$name ... " +# sname=`noext $name` + sname=`basename $name .pod` + pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT + echo "(done)" +done + +echo "Finished building." + diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt new file mode 100644 index 0000000000..c13fb6ef18 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/div.txt @@ -0,0 +1,64 @@ +Division + +This describes the division algorithm used by the MPI library. + +Input: a, b; a > b +Compute: Q, R; a = Qb + R + +The input numbers are normalized so that the high-order digit of b is +at least half the radix. This guarantees that we have a reasonable +way to guess at the digits of the quotient (this method was taken from +Knuth, vol. 2, with adaptations). + +To normalize, test the high-order digit of b. If it is less than half +the radix, multiply both a and b by d, where: + + radix - 1 + d = ----------- + bmax + 1 + +...where bmax is the high-order digit of b. Otherwise, set d = 1. + +Given normalize values for a and b, let the notation a[n] denote the +nth digit of a. Let #a be the number of significant figures of a (not +including any leading zeroes). + + Let R = 0 + Let p = #a - 1 + + while(p >= 0) + do + R = (R * radix) + a[p] + p = p - 1 + while(R < b and p >= 0) + + if(R < b) + break + + q = (R[#R - 1] * radix) + R[#R - 2] + q = q / b[#b - 1] + + T = b * q + + while(T > L) + q = q - 1 + T = T - b + endwhile + + L = L - T + + Q = (Q * radix) + q + + endwhile + +At this point, Q is the quotient, and R is the normalized remainder. +To denormalize R, compute: + + R = (R / d) + +At this point, you are finished. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt new file mode 100644 index 0000000000..bd9d6f1960 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/expt.txt @@ -0,0 +1,94 @@ +Exponentiation + +For exponentiation, the MPI library uses a simple and fairly standard +square-and-multiply method. The algorithm is this: + +Input: a, b +Output: a ** b + + s = 1 + + while(b != 0) + if(b is odd) + s = s * a + endif + + b = b / 2 + + x = x * x + endwhile + + return s + +The modular exponentiation is done the same way, except replacing: + + s = s * a + +with + s = (s * a) mod m + +and replacing + + x = x * x + +with + + x = (x * x) mod m + +Here is a sample exponentiation using the MPI library, as compared to +the same problem solved by the Unix 'bc' program on my system: + +Computation of 2,381,283 ** 235 + +'bc' says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +MPI says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +Diff says: +% diff bc.txt mp.txt +% + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod new file mode 100644 index 0000000000..b5b8fa34fd --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/gcd.pod @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + gcd - compute greatest common divisor of two integers + +=head1 SYNOPSIS + + gcd + +=head1 DESCRIPTION + +The B program computes the greatest common divisor of two +arbitrary-precision integers I and I. The result is written in +standard decimal notation to the standard output. + +If I is zero, B will print an error message and exit. + +=head1 SEE ALSO + +invmod(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod new file mode 100644 index 0000000000..0194f44884 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/invmod.pod @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + invmod - compute modular inverse of an integer + +=head1 SYNOPSIS + + invmod + +=head1 DESCRIPTION + +The B program computes the inverse of I, modulo I, if +that inverse exists. Both I and I are arbitrary-precision +integers in decimal notation. The result is written in standard +decimal notation to the standard output. + +If there is no inverse, the message: + + No inverse + +...will be printed to the standard output (an inverse exists if and +only if the greatest common divisor of I and I is 1). + +=head1 SEE ALSO + +gcd(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod new file mode 100644 index 0000000000..a8ec1f7ee3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/isprime.pod @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + isprime - probabilistic primality testing + +=head1 SYNOPSIS + + isprime + +=head1 DESCRIPTION + +The B program attempts to determine whether the arbitrary +precision integer I is prime. It first tests I for divisibility +by the first 170 or so small primes, and assuming I is not +divisible by any of these, applies 15 iterations of the Rabin-Miller +probabilistic primality test. + +If the program discovers that the number is composite, it will print: + + Not prime (reason) + +Where I is either: + + divisible by small prime x + +Or: + + failed nth pseudoprime test + +In the first case, I indicates the first small prime factor that +was found. In the second case, I indicates which of the +pseudoprime tests failed (numbered from 1) + +If this happens, the number is definitely not prime. However, if the +number succeeds, this message results: + + Probably prime, 1 in 4^15 chance of false positive + +If this happens, the number is prime with very high probability, but +its primality has not been absolutely proven, only demonstrated to a +very convincing degree. + +The value I can be input in standard decimal notation, or, if it is +prefixed with I, it will be read as hexadecimal. + +=head1 ENVIRONMENT + +You can control how many iterations of Rabin-Miller are performed on +the candidate number by setting the I environment variable +to an integer value before starting up B. This will change +the output slightly if the number passes all the tests. + +=head1 SEE ALSO + +gcd(1), invmod(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod new file mode 100644 index 0000000000..47539fbbf9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/lap.pod @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + lap - compute least annihilating power of a number + +=head1 SYNOPSIS + + lap + +=head1 DESCRIPTION + +The B program computes the order of I modulo I, for +arbitrary precision integers I and I. The B of I +modulo I is defined as the smallest positive value I for which +I raised to the Ith power, modulo I, is equal to 1. The +order may not exist, if I is composite. + +=head1 RESTRICTIONS + +This program is very slow, especially for large moduli. It is +intended as a way to help find primitive elements in a modular field, +but it does not do so in a particularly inefficient manner. It was +written simply to help verify that a particular candidate does not +have an obviously short cycle mod I. + +=head1 SEE ALSO + +gcd(1), invmod(1), isprime(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod new file mode 100644 index 0000000000..b05f866e5e --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + mpi-test - automated test program for MPI library + +=head1 SYNOPSIS + + mpi-test [quiet] + mpi-test list + mpi-test help + +=head1 DESCRIPTION + +The B program is a general unit test driver for the MPI +library. It is used to verify that the library works as it is +supposed to on your architecture. As with most such things, passing +all the tests in B does not guarantee the code is correct, +but if any of them fail, there are certainly problems. + +Each major function of the library can be tested individually. For a +list of the test suites understood by B, run it with the +I command line option: + + mpi-test list + +This will display a list of the available test suites and a brief +synopsis of what each one does. For a brief overview of this +document, run B I. + +B exits with a zero status if the selected test succeeds, or +a nonzero status if it fails. If a I which is not +understood by B is given, a diagnostic is printed to the +standard error, and the program exits with a result code of 2. If a +test fails, the result code will be 1, and a diagnostic is ordinarily +printed to the standard error. However, if the I option is +provided, these diagnostics will be suppressed. + +=head1 RESTRICTIONS + +Only a few canned test cases are provided. The solutions have been +verified using the GNU bc(1) program, so bugs there may cause problems +here; however, this is very unlikely, so if a test fails, it is almost +certainly my fault, not bc(1)'s. + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt new file mode 100644 index 0000000000..975f56ddbe --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mul.txt @@ -0,0 +1,77 @@ +Multiplication + +This describes the multiplication algorithm used by the MPI library. + +This is basically a standard "schoolbook" algorithm. It is slow -- +O(mn) for m = #a, n = #b -- but easy to implement and verify. +Basically, we run two nested loops, as illustrated here (R is the +radix): + +k = 0 +for j <- 0 to (#b - 1) + for i <- 0 to (#a - 1) + w = (a[j] * b[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +It is necessary that 'w' have room for at least two radix R digits. +The product of any two digits in radix R is at most: + + (R - 1)(R - 1) = R^2 - 2R + 1 + +Since a two-digit radix-R number can hold R^2 - 1 distinct values, +this insures that the product will fit into the two-digit register. + +To insure that two digits is enough for w, we must also show that +there is room for the carry-in from the previous multiplication, and +the current value of the product digit that is being recomputed. +Assuming each of these may be as big as R - 1 (and no larger, +certainly), two digits will be enough if and only if: + + (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1 + +Solving this equation shows that, indeed, this is the case: + + R^2 - 2R + 1 + 2R - 2 <= R^2 - 1 + + R^2 - 1 <= R^2 - 1 + +This suggests that a good radix would be one more than the largest +value that can be held in half a machine word -- so, for example, as +in this implementation, where we used a radix of 65536 on a machine +with 4-byte words. Another advantage of a radix of this sort is that +binary-level operations are easy on numbers in this representation. + +Here's an example multiplication worked out longhand in radix-10, +using the above algorithm: + + a = 999 + b = x 999 + ------------- + p = 98001 + +w = (a[jx] * b[ix]) + kin + c[ix + jx] +c[ix+jx] = w % RADIX +k = w / RADIX + product +ix jx a[jx] b[ix] kin w c[i+j] kout 000000 +0 0 9 9 0 81+0+0 1 8 000001 +0 1 9 9 8 81+8+0 9 8 000091 +0 2 9 9 8 81+8+0 9 8 000991 + 8 0 008991 +1 0 9 9 0 81+0+9 0 9 008901 +1 1 9 9 9 81+9+9 9 9 008901 +1 2 9 9 9 81+9+8 8 9 008901 + 9 0 098901 +2 0 9 9 0 81+0+9 0 9 098001 +2 1 9 9 9 81+9+8 8 9 098001 +2 2 9 9 9 81+9+9 9 9 098001 + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt new file mode 100644 index 0000000000..a6ef91137f --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/pi.txt @@ -0,0 +1,53 @@ +This file describes how pi is computed by the program in 'pi.c' (see +the utils subdirectory). + +Basically, we use Machin's formula, which is what everyone in the +world uses as a simple method for computing approximations to pi. +This works for up to a few thousand digits without too much effort. +Beyond that, though, it gets too slow. + +Machin's formula states: + + pi := 16 * arctan(1/5) - 4 * arctan(1/239) + +We compute this in integer arithmetic by first multiplying everything +through by 10^d, where 'd' is the number of digits of pi we wanted to +compute. It turns out, the last few digits will be wrong, but the +number that are wrong is usually very small (ordinarly only 2-3). +Having done this, we compute the arctan() function using the formula: + + 1 1 1 1 1 + arctan(1/x) := --- - ----- + ----- - ----- + ----- - ... + x 3 x^3 5 x^5 7 x^7 9 x^9 + +This is done iteratively by computing the first term manually, and +then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the +current figure. This is then added to (or subtracted from) a running +sum, as appropriate. The iteration continues until we overflow our +available precision and the current figure goes to zero under integer +division. At that point, we're finished. + +Actually, we get a couple extra bits of precision out of the fact that +we know we're computing y * arctan(1/x), by setting up the multiplier +as: + + y * 10^d + +... instead of just 10^d. There is also a bit of cleverness in how +the loop is constructed, to avoid special-casing the first term. +Check out the code for arctan() in 'pi.c', if you are interested in +seeing how it is set up. + +Thanks to Jason P. for this algorithm, which I assembled from notes +and programs found on his cool "Pile of Pi Programs" page, at: + + http://www.isr.umd.edu/~jasonp/pipage.html + +Thanks also to Henrik Johansson , from +whose pi program I borrowed the clever idea of pre-multiplying by x in +order to avoid a special case on the loop iteration. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt new file mode 100644 index 0000000000..694797d5f3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prime.txt @@ -0,0 +1,6542 @@ +2 +3 +5 +7 +11 +13 +17 +19 +23 +29 +31 +37 +41 +43 +47 +53 +59 +61 +67 +71 +73 +79 +83 +89 +97 +101 +103 +107 +109 +113 +127 +131 +137 +139 +149 +151 +157 +163 +167 +173 +179 +181 +191 +193 +197 +199 +211 +223 +227 +229 +233 +239 +241 +251 +257 +263 +269 +271 +277 +281 +283 +293 +307 +311 +313 +317 +331 +337 +347 +349 +353 +359 +367 +373 +379 +383 +389 +397 +401 +409 +419 +421 +431 +433 +439 +443 +449 +457 +461 +463 +467 +479 +487 +491 +499 +503 +509 +521 +523 +541 +547 +557 +563 +569 +571 +577 +587 +593 +599 +601 +607 +613 +617 +619 +631 +641 +643 +647 +653 +659 +661 +673 +677 +683 +691 +701 +709 +719 +727 +733 +739 +743 +751 +757 +761 +769 +773 +787 +797 +809 +811 +821 +823 +827 +829 +839 +853 +857 +859 +863 +877 +881 +883 +887 +907 +911 +919 +929 +937 +941 +947 +953 +967 +971 +977 +983 +991 +997 +1009 +1013 +1019 +1021 +1031 +1033 +1039 +1049 +1051 +1061 +1063 +1069 +1087 +1091 +1093 +1097 +1103 +1109 +1117 +1123 +1129 +1151 +1153 +1163 +1171 +1181 +1187 +1193 +1201 +1213 +1217 +1223 +1229 +1231 +1237 +1249 +1259 +1277 +1279 +1283 +1289 +1291 +1297 +1301 +1303 +1307 +1319 +1321 +1327 +1361 +1367 +1373 +1381 +1399 +1409 +1423 +1427 +1429 +1433 +1439 +1447 +1451 +1453 +1459 +1471 +1481 +1483 +1487 +1489 +1493 +1499 +1511 +1523 +1531 +1543 +1549 +1553 +1559 +1567 +1571 +1579 +1583 +1597 +1601 +1607 +1609 +1613 +1619 +1621 +1627 +1637 +1657 +1663 +1667 +1669 +1693 +1697 +1699 +1709 +1721 +1723 +1733 +1741 +1747 +1753 +1759 +1777 +1783 +1787 +1789 +1801 +1811 +1823 +1831 +1847 +1861 +1867 +1871 +1873 +1877 +1879 +1889 +1901 +1907 +1913 +1931 +1933 +1949 +1951 +1973 +1979 +1987 +1993 +1997 +1999 +2003 +2011 +2017 +2027 +2029 +2039 +2053 +2063 +2069 +2081 +2083 +2087 +2089 +2099 +2111 +2113 +2129 +2131 +2137 +2141 +2143 +2153 +2161 +2179 +2203 +2207 +2213 +2221 +2237 +2239 +2243 +2251 +2267 +2269 +2273 +2281 +2287 +2293 +2297 +2309 +2311 +2333 +2339 +2341 +2347 +2351 +2357 +2371 +2377 +2381 +2383 +2389 +2393 +2399 +2411 +2417 +2423 +2437 +2441 +2447 +2459 +2467 +2473 +2477 +2503 +2521 +2531 +2539 +2543 +2549 +2551 +2557 +2579 +2591 +2593 +2609 +2617 +2621 +2633 +2647 +2657 +2659 +2663 +2671 +2677 +2683 +2687 +2689 +2693 +2699 +2707 +2711 +2713 +2719 +2729 +2731 +2741 +2749 +2753 +2767 +2777 +2789 +2791 +2797 +2801 +2803 +2819 +2833 +2837 +2843 +2851 +2857 +2861 +2879 +2887 +2897 +2903 +2909 +2917 +2927 +2939 +2953 +2957 +2963 +2969 +2971 +2999 +3001 +3011 +3019 +3023 +3037 +3041 +3049 +3061 +3067 +3079 +3083 +3089 +3109 +3119 +3121 +3137 +3163 +3167 +3169 +3181 +3187 +3191 +3203 +3209 +3217 +3221 +3229 +3251 +3253 +3257 +3259 +3271 +3299 +3301 +3307 +3313 +3319 +3323 +3329 +3331 +3343 +3347 +3359 +3361 +3371 +3373 +3389 +3391 +3407 +3413 +3433 +3449 +3457 +3461 +3463 +3467 +3469 +3491 +3499 +3511 +3517 +3527 +3529 +3533 +3539 +3541 +3547 +3557 +3559 +3571 +3581 +3583 +3593 +3607 +3613 +3617 +3623 +3631 +3637 +3643 +3659 +3671 +3673 +3677 +3691 +3697 +3701 +3709 +3719 +3727 +3733 +3739 +3761 +3767 +3769 +3779 +3793 +3797 +3803 +3821 +3823 +3833 +3847 +3851 +3853 +3863 +3877 +3881 +3889 +3907 +3911 +3917 +3919 +3923 +3929 +3931 +3943 +3947 +3967 +3989 +4001 +4003 +4007 +4013 +4019 +4021 +4027 +4049 +4051 +4057 +4073 +4079 +4091 +4093 +4099 +4111 +4127 +4129 +4133 +4139 +4153 +4157 +4159 +4177 +4201 +4211 +4217 +4219 +4229 +4231 +4241 +4243 +4253 +4259 +4261 +4271 +4273 +4283 +4289 +4297 +4327 +4337 +4339 +4349 +4357 +4363 +4373 +4391 +4397 +4409 +4421 +4423 +4441 +4447 +4451 +4457 +4463 +4481 +4483 +4493 +4507 +4513 +4517 +4519 +4523 +4547 +4549 +4561 +4567 +4583 +4591 +4597 +4603 +4621 +4637 +4639 +4643 +4649 +4651 +4657 +4663 +4673 +4679 +4691 +4703 +4721 +4723 +4729 +4733 +4751 +4759 +4783 +4787 +4789 +4793 +4799 +4801 +4813 +4817 +4831 +4861 +4871 +4877 +4889 +4903 +4909 +4919 +4931 +4933 +4937 +4943 +4951 +4957 +4967 +4969 +4973 +4987 +4993 +4999 +5003 +5009 +5011 +5021 +5023 +5039 +5051 +5059 +5077 +5081 +5087 +5099 +5101 +5107 +5113 +5119 +5147 +5153 +5167 +5171 +5179 +5189 +5197 +5209 +5227 +5231 +5233 +5237 +5261 +5273 +5279 +5281 +5297 +5303 +5309 +5323 +5333 +5347 +5351 +5381 +5387 +5393 +5399 +5407 +5413 +5417 +5419 +5431 +5437 +5441 +5443 +5449 +5471 +5477 +5479 +5483 +5501 +5503 +5507 +5519 +5521 +5527 +5531 +5557 +5563 +5569 +5573 +5581 +5591 +5623 +5639 +5641 +5647 +5651 +5653 +5657 +5659 +5669 +5683 +5689 +5693 +5701 +5711 +5717 +5737 +5741 +5743 +5749 +5779 +5783 +5791 +5801 +5807 +5813 +5821 +5827 +5839 +5843 +5849 +5851 +5857 +5861 +5867 +5869 +5879 +5881 +5897 +5903 +5923 +5927 +5939 +5953 +5981 +5987 +6007 +6011 +6029 +6037 +6043 +6047 +6053 +6067 +6073 +6079 +6089 +6091 +6101 +6113 +6121 +6131 +6133 +6143 +6151 +6163 +6173 +6197 +6199 +6203 +6211 +6217 +6221 +6229 +6247 +6257 +6263 +6269 +6271 +6277 +6287 +6299 +6301 +6311 +6317 +6323 +6329 +6337 +6343 +6353 +6359 +6361 +6367 +6373 +6379 +6389 +6397 +6421 +6427 +6449 +6451 +6469 +6473 +6481 +6491 +6521 +6529 +6547 +6551 +6553 +6563 +6569 +6571 +6577 +6581 +6599 +6607 +6619 +6637 +6653 +6659 +6661 +6673 +6679 +6689 +6691 +6701 +6703 +6709 +6719 +6733 +6737 +6761 +6763 +6779 +6781 +6791 +6793 +6803 +6823 +6827 +6829 +6833 +6841 +6857 +6863 +6869 +6871 +6883 +6899 +6907 +6911 +6917 +6947 +6949 +6959 +6961 +6967 +6971 +6977 +6983 +6991 +6997 +7001 +7013 +7019 +7027 +7039 +7043 +7057 +7069 +7079 +7103 +7109 +7121 +7127 +7129 +7151 +7159 +7177 +7187 +7193 +7207 +7211 +7213 +7219 +7229 +7237 +7243 +7247 +7253 +7283 +7297 +7307 +7309 +7321 +7331 +7333 +7349 +7351 +7369 +7393 +7411 +7417 +7433 +7451 +7457 +7459 +7477 +7481 +7487 +7489 +7499 +7507 +7517 +7523 +7529 +7537 +7541 +7547 +7549 +7559 +7561 +7573 +7577 +7583 +7589 +7591 +7603 +7607 +7621 +7639 +7643 +7649 +7669 +7673 +7681 +7687 +7691 +7699 +7703 +7717 +7723 +7727 +7741 +7753 +7757 +7759 +7789 +7793 +7817 +7823 +7829 +7841 +7853 +7867 +7873 +7877 +7879 +7883 +7901 +7907 +7919 +7927 +7933 +7937 +7949 +7951 +7963 +7993 +8009 +8011 +8017 +8039 +8053 +8059 +8069 +8081 +8087 +8089 +8093 +8101 +8111 +8117 +8123 +8147 +8161 +8167 +8171 +8179 +8191 +8209 +8219 +8221 +8231 +8233 +8237 +8243 +8263 +8269 +8273 +8287 +8291 +8293 +8297 +8311 +8317 +8329 +8353 +8363 +8369 +8377 +8387 +8389 +8419 +8423 +8429 +8431 +8443 +8447 +8461 +8467 +8501 +8513 +8521 +8527 +8537 +8539 +8543 +8563 +8573 +8581 +8597 +8599 +8609 +8623 +8627 +8629 +8641 +8647 +8663 +8669 +8677 +8681 +8689 +8693 +8699 +8707 +8713 +8719 +8731 +8737 +8741 +8747 +8753 +8761 +8779 +8783 +8803 +8807 +8819 +8821 +8831 +8837 +8839 +8849 +8861 +8863 +8867 +8887 +8893 +8923 +8929 +8933 +8941 +8951 +8963 +8969 +8971 +8999 +9001 +9007 +9011 +9013 +9029 +9041 +9043 +9049 +9059 +9067 +9091 +9103 +9109 +9127 +9133 +9137 +9151 +9157 +9161 +9173 +9181 +9187 +9199 +9203 +9209 +9221 +9227 +9239 +9241 +9257 +9277 +9281 +9283 +9293 +9311 +9319 +9323 +9337 +9341 +9343 +9349 +9371 +9377 +9391 +9397 +9403 +9413 +9419 +9421 +9431 +9433 +9437 +9439 +9461 +9463 +9467 +9473 +9479 +9491 +9497 +9511 +9521 +9533 +9539 +9547 +9551 +9587 +9601 +9613 +9619 +9623 +9629 +9631 +9643 +9649 +9661 +9677 +9679 +9689 +9697 +9719 +9721 +9733 +9739 +9743 +9749 +9767 +9769 +9781 +9787 +9791 +9803 +9811 +9817 +9829 +9833 +9839 +9851 +9857 +9859 +9871 +9883 +9887 +9901 +9907 +9923 +9929 +9931 +9941 +9949 +9967 +9973 +10007 +10009 +10037 +10039 +10061 +10067 +10069 +10079 +10091 +10093 +10099 +10103 +10111 +10133 +10139 +10141 +10151 +10159 +10163 +10169 +10177 +10181 +10193 +10211 +10223 +10243 +10247 +10253 +10259 +10267 +10271 +10273 +10289 +10301 +10303 +10313 +10321 +10331 +10333 +10337 +10343 +10357 +10369 +10391 +10399 +10427 +10429 +10433 +10453 +10457 +10459 +10463 +10477 +10487 +10499 +10501 +10513 +10529 +10531 +10559 +10567 +10589 +10597 +10601 +10607 +10613 +10627 +10631 +10639 +10651 +10657 +10663 +10667 +10687 +10691 +10709 +10711 +10723 +10729 +10733 +10739 +10753 +10771 +10781 +10789 +10799 +10831 +10837 +10847 +10853 +10859 +10861 +10867 +10883 +10889 +10891 +10903 +10909 +10937 +10939 +10949 +10957 +10973 +10979 +10987 +10993 +11003 +11027 +11047 +11057 +11059 +11069 +11071 +11083 +11087 +11093 +11113 +11117 +11119 +11131 +11149 +11159 +11161 +11171 +11173 +11177 +11197 +11213 +11239 +11243 +11251 +11257 +11261 +11273 +11279 +11287 +11299 +11311 +11317 +11321 +11329 +11351 +11353 +11369 +11383 +11393 +11399 +11411 +11423 +11437 +11443 +11447 +11467 +11471 +11483 +11489 +11491 +11497 +11503 +11519 +11527 +11549 +11551 +11579 +11587 +11593 +11597 +11617 +11621 +11633 +11657 +11677 +11681 +11689 +11699 +11701 +11717 +11719 +11731 +11743 +11777 +11779 +11783 +11789 +11801 +11807 +11813 +11821 +11827 +11831 +11833 +11839 +11863 +11867 +11887 +11897 +11903 +11909 +11923 +11927 +11933 +11939 +11941 +11953 +11959 +11969 +11971 +11981 +11987 +12007 +12011 +12037 +12041 +12043 +12049 +12071 +12073 +12097 +12101 +12107 +12109 +12113 +12119 +12143 +12149 +12157 +12161 +12163 +12197 +12203 +12211 +12227 +12239 +12241 +12251 +12253 +12263 +12269 +12277 +12281 +12289 +12301 +12323 +12329 +12343 +12347 +12373 +12377 +12379 +12391 +12401 +12409 +12413 +12421 +12433 +12437 +12451 +12457 +12473 +12479 +12487 +12491 +12497 +12503 +12511 +12517 +12527 +12539 +12541 +12547 +12553 +12569 +12577 +12583 +12589 +12601 +12611 +12613 +12619 +12637 +12641 +12647 +12653 +12659 +12671 +12689 +12697 +12703 +12713 +12721 +12739 +12743 +12757 +12763 +12781 +12791 +12799 +12809 +12821 +12823 +12829 +12841 +12853 +12889 +12893 +12899 +12907 +12911 +12917 +12919 +12923 +12941 +12953 +12959 +12967 +12973 +12979 +12983 +13001 +13003 +13007 +13009 +13033 +13037 +13043 +13049 +13063 +13093 +13099 +13103 +13109 +13121 +13127 +13147 +13151 +13159 +13163 +13171 +13177 +13183 +13187 +13217 +13219 +13229 +13241 +13249 +13259 +13267 +13291 +13297 +13309 +13313 +13327 +13331 +13337 +13339 +13367 +13381 +13397 +13399 +13411 +13417 +13421 +13441 +13451 +13457 +13463 +13469 +13477 +13487 +13499 +13513 +13523 +13537 +13553 +13567 +13577 +13591 +13597 +13613 +13619 +13627 +13633 +13649 +13669 +13679 +13681 +13687 +13691 +13693 +13697 +13709 +13711 +13721 +13723 +13729 +13751 +13757 +13759 +13763 +13781 +13789 +13799 +13807 +13829 +13831 +13841 +13859 +13873 +13877 +13879 +13883 +13901 +13903 +13907 +13913 +13921 +13931 +13933 +13963 +13967 +13997 +13999 +14009 +14011 +14029 +14033 +14051 +14057 +14071 +14081 +14083 +14087 +14107 +14143 +14149 +14153 +14159 +14173 +14177 +14197 +14207 +14221 +14243 +14249 +14251 +14281 +14293 +14303 +14321 +14323 +14327 +14341 +14347 +14369 +14387 +14389 +14401 +14407 +14411 +14419 +14423 +14431 +14437 +14447 +14449 +14461 +14479 +14489 +14503 +14519 +14533 +14537 +14543 +14549 +14551 +14557 +14561 +14563 +14591 +14593 +14621 +14627 +14629 +14633 +14639 +14653 +14657 +14669 +14683 +14699 +14713 +14717 +14723 +14731 +14737 +14741 +14747 +14753 +14759 +14767 +14771 +14779 +14783 +14797 +14813 +14821 +14827 +14831 +14843 +14851 +14867 +14869 +14879 +14887 +14891 +14897 +14923 +14929 +14939 +14947 +14951 +14957 +14969 +14983 +15013 +15017 +15031 +15053 +15061 +15073 +15077 +15083 +15091 +15101 +15107 +15121 +15131 +15137 +15139 +15149 +15161 +15173 +15187 +15193 +15199 +15217 +15227 +15233 +15241 +15259 +15263 +15269 +15271 +15277 +15287 +15289 +15299 +15307 +15313 +15319 +15329 +15331 +15349 +15359 +15361 +15373 +15377 +15383 +15391 +15401 +15413 +15427 +15439 +15443 +15451 +15461 +15467 +15473 +15493 +15497 +15511 +15527 +15541 +15551 +15559 +15569 +15581 +15583 +15601 +15607 +15619 +15629 +15641 +15643 +15647 +15649 +15661 +15667 +15671 +15679 +15683 +15727 +15731 +15733 +15737 +15739 +15749 +15761 +15767 +15773 +15787 +15791 +15797 +15803 +15809 +15817 +15823 +15859 +15877 +15881 +15887 +15889 +15901 +15907 +15913 +15919 +15923 +15937 +15959 +15971 +15973 +15991 +16001 +16007 +16033 +16057 +16061 +16063 +16067 +16069 +16073 +16087 +16091 +16097 +16103 +16111 +16127 +16139 +16141 +16183 +16187 +16189 +16193 +16217 +16223 +16229 +16231 +16249 +16253 +16267 +16273 +16301 +16319 +16333 +16339 +16349 +16361 +16363 +16369 +16381 +16411 +16417 +16421 +16427 +16433 +16447 +16451 +16453 +16477 +16481 +16487 +16493 +16519 +16529 +16547 +16553 +16561 +16567 +16573 +16603 +16607 +16619 +16631 +16633 +16649 +16651 +16657 +16661 +16673 +16691 +16693 +16699 +16703 +16729 +16741 +16747 +16759 +16763 +16787 +16811 +16823 +16829 +16831 +16843 +16871 +16879 +16883 +16889 +16901 +16903 +16921 +16927 +16931 +16937 +16943 +16963 +16979 +16981 +16987 +16993 +17011 +17021 +17027 +17029 +17033 +17041 +17047 +17053 +17077 +17093 +17099 +17107 +17117 +17123 +17137 +17159 +17167 +17183 +17189 +17191 +17203 +17207 +17209 +17231 +17239 +17257 +17291 +17293 +17299 +17317 +17321 +17327 +17333 +17341 +17351 +17359 +17377 +17383 +17387 +17389 +17393 +17401 +17417 +17419 +17431 +17443 +17449 +17467 +17471 +17477 +17483 +17489 +17491 +17497 +17509 +17519 +17539 +17551 +17569 +17573 +17579 +17581 +17597 +17599 +17609 +17623 +17627 +17657 +17659 +17669 +17681 +17683 +17707 +17713 +17729 +17737 +17747 +17749 +17761 +17783 +17789 +17791 +17807 +17827 +17837 +17839 +17851 +17863 +17881 +17891 +17903 +17909 +17911 +17921 +17923 +17929 +17939 +17957 +17959 +17971 +17977 +17981 +17987 +17989 +18013 +18041 +18043 +18047 +18049 +18059 +18061 +18077 +18089 +18097 +18119 +18121 +18127 +18131 +18133 +18143 +18149 +18169 +18181 +18191 +18199 +18211 +18217 +18223 +18229 +18233 +18251 +18253 +18257 +18269 +18287 +18289 +18301 +18307 +18311 +18313 +18329 +18341 +18353 +18367 +18371 +18379 +18397 +18401 +18413 +18427 +18433 +18439 +18443 +18451 +18457 +18461 +18481 +18493 +18503 +18517 +18521 +18523 +18539 +18541 +18553 +18583 +18587 +18593 +18617 +18637 +18661 +18671 +18679 +18691 +18701 +18713 +18719 +18731 +18743 +18749 +18757 +18773 +18787 +18793 +18797 +18803 +18839 +18859 +18869 +18899 +18911 +18913 +18917 +18919 +18947 +18959 +18973 +18979 +19001 +19009 +19013 +19031 +19037 +19051 +19069 +19073 +19079 +19081 +19087 +19121 +19139 +19141 +19157 +19163 +19181 +19183 +19207 +19211 +19213 +19219 +19231 +19237 +19249 +19259 +19267 +19273 +19289 +19301 +19309 +19319 +19333 +19373 +19379 +19381 +19387 +19391 +19403 +19417 +19421 +19423 +19427 +19429 +19433 +19441 +19447 +19457 +19463 +19469 +19471 +19477 +19483 +19489 +19501 +19507 +19531 +19541 +19543 +19553 +19559 +19571 +19577 +19583 +19597 +19603 +19609 +19661 +19681 +19687 +19697 +19699 +19709 +19717 +19727 +19739 +19751 +19753 +19759 +19763 +19777 +19793 +19801 +19813 +19819 +19841 +19843 +19853 +19861 +19867 +19889 +19891 +19913 +19919 +19927 +19937 +19949 +19961 +19963 +19973 +19979 +19991 +19993 +19997 +20011 +20021 +20023 +20029 +20047 +20051 +20063 +20071 +20089 +20101 +20107 +20113 +20117 +20123 +20129 +20143 +20147 +20149 +20161 +20173 +20177 +20183 +20201 +20219 +20231 +20233 +20249 +20261 +20269 +20287 +20297 +20323 +20327 +20333 +20341 +20347 +20353 +20357 +20359 +20369 +20389 +20393 +20399 +20407 +20411 +20431 +20441 +20443 +20477 +20479 +20483 +20507 +20509 +20521 +20533 +20543 +20549 +20551 +20563 +20593 +20599 +20611 +20627 +20639 +20641 +20663 +20681 +20693 +20707 +20717 +20719 +20731 +20743 +20747 +20749 +20753 +20759 +20771 +20773 +20789 +20807 +20809 +20849 +20857 +20873 +20879 +20887 +20897 +20899 +20903 +20921 +20929 +20939 +20947 +20959 +20963 +20981 +20983 +21001 +21011 +21013 +21017 +21019 +21023 +21031 +21059 +21061 +21067 +21089 +21101 +21107 +21121 +21139 +21143 +21149 +21157 +21163 +21169 +21179 +21187 +21191 +21193 +21211 +21221 +21227 +21247 +21269 +21277 +21283 +21313 +21317 +21319 +21323 +21341 +21347 +21377 +21379 +21383 +21391 +21397 +21401 +21407 +21419 +21433 +21467 +21481 +21487 +21491 +21493 +21499 +21503 +21517 +21521 +21523 +21529 +21557 +21559 +21563 +21569 +21577 +21587 +21589 +21599 +21601 +21611 +21613 +21617 +21647 +21649 +21661 +21673 +21683 +21701 +21713 +21727 +21737 +21739 +21751 +21757 +21767 +21773 +21787 +21799 +21803 +21817 +21821 +21839 +21841 +21851 +21859 +21863 +21871 +21881 +21893 +21911 +21929 +21937 +21943 +21961 +21977 +21991 +21997 +22003 +22013 +22027 +22031 +22037 +22039 +22051 +22063 +22067 +22073 +22079 +22091 +22093 +22109 +22111 +22123 +22129 +22133 +22147 +22153 +22157 +22159 +22171 +22189 +22193 +22229 +22247 +22259 +22271 +22273 +22277 +22279 +22283 +22291 +22303 +22307 +22343 +22349 +22367 +22369 +22381 +22391 +22397 +22409 +22433 +22441 +22447 +22453 +22469 +22481 +22483 +22501 +22511 +22531 +22541 +22543 +22549 +22567 +22571 +22573 +22613 +22619 +22621 +22637 +22639 +22643 +22651 +22669 +22679 +22691 +22697 +22699 +22709 +22717 +22721 +22727 +22739 +22741 +22751 +22769 +22777 +22783 +22787 +22807 +22811 +22817 +22853 +22859 +22861 +22871 +22877 +22901 +22907 +22921 +22937 +22943 +22961 +22963 +22973 +22993 +23003 +23011 +23017 +23021 +23027 +23029 +23039 +23041 +23053 +23057 +23059 +23063 +23071 +23081 +23087 +23099 +23117 +23131 +23143 +23159 +23167 +23173 +23189 +23197 +23201 +23203 +23209 +23227 +23251 +23269 +23279 +23291 +23293 +23297 +23311 +23321 +23327 +23333 +23339 +23357 +23369 +23371 +23399 +23417 +23431 +23447 +23459 +23473 +23497 +23509 +23531 +23537 +23539 +23549 +23557 +23561 +23563 +23567 +23581 +23593 +23599 +23603 +23609 +23623 +23627 +23629 +23633 +23663 +23669 +23671 +23677 +23687 +23689 +23719 +23741 +23743 +23747 +23753 +23761 +23767 +23773 +23789 +23801 +23813 +23819 +23827 +23831 +23833 +23857 +23869 +23873 +23879 +23887 +23893 +23899 +23909 +23911 +23917 +23929 +23957 +23971 +23977 +23981 +23993 +24001 +24007 +24019 +24023 +24029 +24043 +24049 +24061 +24071 +24077 +24083 +24091 +24097 +24103 +24107 +24109 +24113 +24121 +24133 +24137 +24151 +24169 +24179 +24181 +24197 +24203 +24223 +24229 +24239 +24247 +24251 +24281 +24317 +24329 +24337 +24359 +24371 +24373 +24379 +24391 +24407 +24413 +24419 +24421 +24439 +24443 +24469 +24473 +24481 +24499 +24509 +24517 +24527 +24533 +24547 +24551 +24571 +24593 +24611 +24623 +24631 +24659 +24671 +24677 +24683 +24691 +24697 +24709 +24733 +24749 +24763 +24767 +24781 +24793 +24799 +24809 +24821 +24841 +24847 +24851 +24859 +24877 +24889 +24907 +24917 +24919 +24923 +24943 +24953 +24967 +24971 +24977 +24979 +24989 +25013 +25031 +25033 +25037 +25057 +25073 +25087 +25097 +25111 +25117 +25121 +25127 +25147 +25153 +25163 +25169 +25171 +25183 +25189 +25219 +25229 +25237 +25243 +25247 +25253 +25261 +25301 +25303 +25307 +25309 +25321 +25339 +25343 +25349 +25357 +25367 +25373 +25391 +25409 +25411 +25423 +25439 +25447 +25453 +25457 +25463 +25469 +25471 +25523 +25537 +25541 +25561 +25577 +25579 +25583 +25589 +25601 +25603 +25609 +25621 +25633 +25639 +25643 +25657 +25667 +25673 +25679 +25693 +25703 +25717 +25733 +25741 +25747 +25759 +25763 +25771 +25793 +25799 +25801 +25819 +25841 +25847 +25849 +25867 +25873 +25889 +25903 +25913 +25919 +25931 +25933 +25939 +25943 +25951 +25969 +25981 +25997 +25999 +26003 +26017 +26021 +26029 +26041 +26053 +26083 +26099 +26107 +26111 +26113 +26119 +26141 +26153 +26161 +26171 +26177 +26183 +26189 +26203 +26209 +26227 +26237 +26249 +26251 +26261 +26263 +26267 +26293 +26297 +26309 +26317 +26321 +26339 +26347 +26357 +26371 +26387 +26393 +26399 +26407 +26417 +26423 +26431 +26437 +26449 +26459 +26479 +26489 +26497 +26501 +26513 +26539 +26557 +26561 +26573 +26591 +26597 +26627 +26633 +26641 +26647 +26669 +26681 +26683 +26687 +26693 +26699 +26701 +26711 +26713 +26717 +26723 +26729 +26731 +26737 +26759 +26777 +26783 +26801 +26813 +26821 +26833 +26839 +26849 +26861 +26863 +26879 +26881 +26891 +26893 +26903 +26921 +26927 +26947 +26951 +26953 +26959 +26981 +26987 +26993 +27011 +27017 +27031 +27043 +27059 +27061 +27067 +27073 +27077 +27091 +27103 +27107 +27109 +27127 +27143 +27179 +27191 +27197 +27211 +27239 +27241 +27253 +27259 +27271 +27277 +27281 +27283 +27299 +27329 +27337 +27361 +27367 +27397 +27407 +27409 +27427 +27431 +27437 +27449 +27457 +27479 +27481 +27487 +27509 +27527 +27529 +27539 +27541 +27551 +27581 +27583 +27611 +27617 +27631 +27647 +27653 +27673 +27689 +27691 +27697 +27701 +27733 +27737 +27739 +27743 +27749 +27751 +27763 +27767 +27773 +27779 +27791 +27793 +27799 +27803 +27809 +27817 +27823 +27827 +27847 +27851 +27883 +27893 +27901 +27917 +27919 +27941 +27943 +27947 +27953 +27961 +27967 +27983 +27997 +28001 +28019 +28027 +28031 +28051 +28057 +28069 +28081 +28087 +28097 +28099 +28109 +28111 +28123 +28151 +28163 +28181 +28183 +28201 +28211 +28219 +28229 +28277 +28279 +28283 +28289 +28297 +28307 +28309 +28319 +28349 +28351 +28387 +28393 +28403 +28409 +28411 +28429 +28433 +28439 +28447 +28463 +28477 +28493 +28499 +28513 +28517 +28537 +28541 +28547 +28549 +28559 +28571 +28573 +28579 +28591 +28597 +28603 +28607 +28619 +28621 +28627 +28631 +28643 +28649 +28657 +28661 +28663 +28669 +28687 +28697 +28703 +28711 +28723 +28729 +28751 +28753 +28759 +28771 +28789 +28793 +28807 +28813 +28817 +28837 +28843 +28859 +28867 +28871 +28879 +28901 +28909 +28921 +28927 +28933 +28949 +28961 +28979 +29009 +29017 +29021 +29023 +29027 +29033 +29059 +29063 +29077 +29101 +29123 +29129 +29131 +29137 +29147 +29153 +29167 +29173 +29179 +29191 +29201 +29207 +29209 +29221 +29231 +29243 +29251 +29269 +29287 +29297 +29303 +29311 +29327 +29333 +29339 +29347 +29363 +29383 +29387 +29389 +29399 +29401 +29411 +29423 +29429 +29437 +29443 +29453 +29473 +29483 +29501 +29527 +29531 +29537 +29567 +29569 +29573 +29581 +29587 +29599 +29611 +29629 +29633 +29641 +29663 +29669 +29671 +29683 +29717 +29723 +29741 +29753 +29759 +29761 +29789 +29803 +29819 +29833 +29837 +29851 +29863 +29867 +29873 +29879 +29881 +29917 +29921 +29927 +29947 +29959 +29983 +29989 +30011 +30013 +30029 +30047 +30059 +30071 +30089 +30091 +30097 +30103 +30109 +30113 +30119 +30133 +30137 +30139 +30161 +30169 +30181 +30187 +30197 +30203 +30211 +30223 +30241 +30253 +30259 +30269 +30271 +30293 +30307 +30313 +30319 +30323 +30341 +30347 +30367 +30389 +30391 +30403 +30427 +30431 +30449 +30467 +30469 +30491 +30493 +30497 +30509 +30517 +30529 +30539 +30553 +30557 +30559 +30577 +30593 +30631 +30637 +30643 +30649 +30661 +30671 +30677 +30689 +30697 +30703 +30707 +30713 +30727 +30757 +30763 +30773 +30781 +30803 +30809 +30817 +30829 +30839 +30841 +30851 +30853 +30859 +30869 +30871 +30881 +30893 +30911 +30931 +30937 +30941 +30949 +30971 +30977 +30983 +31013 +31019 +31033 +31039 +31051 +31063 +31069 +31079 +31081 +31091 +31121 +31123 +31139 +31147 +31151 +31153 +31159 +31177 +31181 +31183 +31189 +31193 +31219 +31223 +31231 +31237 +31247 +31249 +31253 +31259 +31267 +31271 +31277 +31307 +31319 +31321 +31327 +31333 +31337 +31357 +31379 +31387 +31391 +31393 +31397 +31469 +31477 +31481 +31489 +31511 +31513 +31517 +31531 +31541 +31543 +31547 +31567 +31573 +31583 +31601 +31607 +31627 +31643 +31649 +31657 +31663 +31667 +31687 +31699 +31721 +31723 +31727 +31729 +31741 +31751 +31769 +31771 +31793 +31799 +31817 +31847 +31849 +31859 +31873 +31883 +31891 +31907 +31957 +31963 +31973 +31981 +31991 +32003 +32009 +32027 +32029 +32051 +32057 +32059 +32063 +32069 +32077 +32083 +32089 +32099 +32117 +32119 +32141 +32143 +32159 +32173 +32183 +32189 +32191 +32203 +32213 +32233 +32237 +32251 +32257 +32261 +32297 +32299 +32303 +32309 +32321 +32323 +32327 +32341 +32353 +32359 +32363 +32369 +32371 +32377 +32381 +32401 +32411 +32413 +32423 +32429 +32441 +32443 +32467 +32479 +32491 +32497 +32503 +32507 +32531 +32533 +32537 +32561 +32563 +32569 +32573 +32579 +32587 +32603 +32609 +32611 +32621 +32633 +32647 +32653 +32687 +32693 +32707 +32713 +32717 +32719 +32749 +32771 +32779 +32783 +32789 +32797 +32801 +32803 +32831 +32833 +32839 +32843 +32869 +32887 +32909 +32911 +32917 +32933 +32939 +32941 +32957 +32969 +32971 +32983 +32987 +32993 +32999 +33013 +33023 +33029 +33037 +33049 +33053 +33071 +33073 +33083 +33091 +33107 +33113 +33119 +33149 +33151 +33161 +33179 +33181 +33191 +33199 +33203 +33211 +33223 +33247 +33287 +33289 +33301 +33311 +33317 +33329 +33331 +33343 +33347 +33349 +33353 +33359 +33377 +33391 +33403 +33409 +33413 +33427 +33457 +33461 +33469 +33479 +33487 +33493 +33503 +33521 +33529 +33533 +33547 +33563 +33569 +33577 +33581 +33587 +33589 +33599 +33601 +33613 +33617 +33619 +33623 +33629 +33637 +33641 +33647 +33679 +33703 +33713 +33721 +33739 +33749 +33751 +33757 +33767 +33769 +33773 +33791 +33797 +33809 +33811 +33827 +33829 +33851 +33857 +33863 +33871 +33889 +33893 +33911 +33923 +33931 +33937 +33941 +33961 +33967 +33997 +34019 +34031 +34033 +34039 +34057 +34061 +34123 +34127 +34129 +34141 +34147 +34157 +34159 +34171 +34183 +34211 +34213 +34217 +34231 +34253 +34259 +34261 +34267 +34273 +34283 +34297 +34301 +34303 +34313 +34319 +34327 +34337 +34351 +34361 +34367 +34369 +34381 +34403 +34421 +34429 +34439 +34457 +34469 +34471 +34483 +34487 +34499 +34501 +34511 +34513 +34519 +34537 +34543 +34549 +34583 +34589 +34591 +34603 +34607 +34613 +34631 +34649 +34651 +34667 +34673 +34679 +34687 +34693 +34703 +34721 +34729 +34739 +34747 +34757 +34759 +34763 +34781 +34807 +34819 +34841 +34843 +34847 +34849 +34871 +34877 +34883 +34897 +34913 +34919 +34939 +34949 +34961 +34963 +34981 +35023 +35027 +35051 +35053 +35059 +35069 +35081 +35083 +35089 +35099 +35107 +35111 +35117 +35129 +35141 +35149 +35153 +35159 +35171 +35201 +35221 +35227 +35251 +35257 +35267 +35279 +35281 +35291 +35311 +35317 +35323 +35327 +35339 +35353 +35363 +35381 +35393 +35401 +35407 +35419 +35423 +35437 +35447 +35449 +35461 +35491 +35507 +35509 +35521 +35527 +35531 +35533 +35537 +35543 +35569 +35573 +35591 +35593 +35597 +35603 +35617 +35671 +35677 +35729 +35731 +35747 +35753 +35759 +35771 +35797 +35801 +35803 +35809 +35831 +35837 +35839 +35851 +35863 +35869 +35879 +35897 +35899 +35911 +35923 +35933 +35951 +35963 +35969 +35977 +35983 +35993 +35999 +36007 +36011 +36013 +36017 +36037 +36061 +36067 +36073 +36083 +36097 +36107 +36109 +36131 +36137 +36151 +36161 +36187 +36191 +36209 +36217 +36229 +36241 +36251 +36263 +36269 +36277 +36293 +36299 +36307 +36313 +36319 +36341 +36343 +36353 +36373 +36383 +36389 +36433 +36451 +36457 +36467 +36469 +36473 +36479 +36493 +36497 +36523 +36527 +36529 +36541 +36551 +36559 +36563 +36571 +36583 +36587 +36599 +36607 +36629 +36637 +36643 +36653 +36671 +36677 +36683 +36691 +36697 +36709 +36713 +36721 +36739 +36749 +36761 +36767 +36779 +36781 +36787 +36791 +36793 +36809 +36821 +36833 +36847 +36857 +36871 +36877 +36887 +36899 +36901 +36913 +36919 +36923 +36929 +36931 +36943 +36947 +36973 +36979 +36997 +37003 +37013 +37019 +37021 +37039 +37049 +37057 +37061 +37087 +37097 +37117 +37123 +37139 +37159 +37171 +37181 +37189 +37199 +37201 +37217 +37223 +37243 +37253 +37273 +37277 +37307 +37309 +37313 +37321 +37337 +37339 +37357 +37361 +37363 +37369 +37379 +37397 +37409 +37423 +37441 +37447 +37463 +37483 +37489 +37493 +37501 +37507 +37511 +37517 +37529 +37537 +37547 +37549 +37561 +37567 +37571 +37573 +37579 +37589 +37591 +37607 +37619 +37633 +37643 +37649 +37657 +37663 +37691 +37693 +37699 +37717 +37747 +37781 +37783 +37799 +37811 +37813 +37831 +37847 +37853 +37861 +37871 +37879 +37889 +37897 +37907 +37951 +37957 +37963 +37967 +37987 +37991 +37993 +37997 +38011 +38039 +38047 +38053 +38069 +38083 +38113 +38119 +38149 +38153 +38167 +38177 +38183 +38189 +38197 +38201 +38219 +38231 +38237 +38239 +38261 +38273 +38281 +38287 +38299 +38303 +38317 +38321 +38327 +38329 +38333 +38351 +38371 +38377 +38393 +38431 +38447 +38449 +38453 +38459 +38461 +38501 +38543 +38557 +38561 +38567 +38569 +38593 +38603 +38609 +38611 +38629 +38639 +38651 +38653 +38669 +38671 +38677 +38693 +38699 +38707 +38711 +38713 +38723 +38729 +38737 +38747 +38749 +38767 +38783 +38791 +38803 +38821 +38833 +38839 +38851 +38861 +38867 +38873 +38891 +38903 +38917 +38921 +38923 +38933 +38953 +38959 +38971 +38977 +38993 +39019 +39023 +39041 +39043 +39047 +39079 +39089 +39097 +39103 +39107 +39113 +39119 +39133 +39139 +39157 +39161 +39163 +39181 +39191 +39199 +39209 +39217 +39227 +39229 +39233 +39239 +39241 +39251 +39293 +39301 +39313 +39317 +39323 +39341 +39343 +39359 +39367 +39371 +39373 +39383 +39397 +39409 +39419 +39439 +39443 +39451 +39461 +39499 +39503 +39509 +39511 +39521 +39541 +39551 +39563 +39569 +39581 +39607 +39619 +39623 +39631 +39659 +39667 +39671 +39679 +39703 +39709 +39719 +39727 +39733 +39749 +39761 +39769 +39779 +39791 +39799 +39821 +39827 +39829 +39839 +39841 +39847 +39857 +39863 +39869 +39877 +39883 +39887 +39901 +39929 +39937 +39953 +39971 +39979 +39983 +39989 +40009 +40013 +40031 +40037 +40039 +40063 +40087 +40093 +40099 +40111 +40123 +40127 +40129 +40151 +40153 +40163 +40169 +40177 +40189 +40193 +40213 +40231 +40237 +40241 +40253 +40277 +40283 +40289 +40343 +40351 +40357 +40361 +40387 +40423 +40427 +40429 +40433 +40459 +40471 +40483 +40487 +40493 +40499 +40507 +40519 +40529 +40531 +40543 +40559 +40577 +40583 +40591 +40597 +40609 +40627 +40637 +40639 +40693 +40697 +40699 +40709 +40739 +40751 +40759 +40763 +40771 +40787 +40801 +40813 +40819 +40823 +40829 +40841 +40847 +40849 +40853 +40867 +40879 +40883 +40897 +40903 +40927 +40933 +40939 +40949 +40961 +40973 +40993 +41011 +41017 +41023 +41039 +41047 +41051 +41057 +41077 +41081 +41113 +41117 +41131 +41141 +41143 +41149 +41161 +41177 +41179 +41183 +41189 +41201 +41203 +41213 +41221 +41227 +41231 +41233 +41243 +41257 +41263 +41269 +41281 +41299 +41333 +41341 +41351 +41357 +41381 +41387 +41389 +41399 +41411 +41413 +41443 +41453 +41467 +41479 +41491 +41507 +41513 +41519 +41521 +41539 +41543 +41549 +41579 +41593 +41597 +41603 +41609 +41611 +41617 +41621 +41627 +41641 +41647 +41651 +41659 +41669 +41681 +41687 +41719 +41729 +41737 +41759 +41761 +41771 +41777 +41801 +41809 +41813 +41843 +41849 +41851 +41863 +41879 +41887 +41893 +41897 +41903 +41911 +41927 +41941 +41947 +41953 +41957 +41959 +41969 +41981 +41983 +41999 +42013 +42017 +42019 +42023 +42043 +42061 +42071 +42073 +42083 +42089 +42101 +42131 +42139 +42157 +42169 +42179 +42181 +42187 +42193 +42197 +42209 +42221 +42223 +42227 +42239 +42257 +42281 +42283 +42293 +42299 +42307 +42323 +42331 +42337 +42349 +42359 +42373 +42379 +42391 +42397 +42403 +42407 +42409 +42433 +42437 +42443 +42451 +42457 +42461 +42463 +42467 +42473 +42487 +42491 +42499 +42509 +42533 +42557 +42569 +42571 +42577 +42589 +42611 +42641 +42643 +42649 +42667 +42677 +42683 +42689 +42697 +42701 +42703 +42709 +42719 +42727 +42737 +42743 +42751 +42767 +42773 +42787 +42793 +42797 +42821 +42829 +42839 +42841 +42853 +42859 +42863 +42899 +42901 +42923 +42929 +42937 +42943 +42953 +42961 +42967 +42979 +42989 +43003 +43013 +43019 +43037 +43049 +43051 +43063 +43067 +43093 +43103 +43117 +43133 +43151 +43159 +43177 +43189 +43201 +43207 +43223 +43237 +43261 +43271 +43283 +43291 +43313 +43319 +43321 +43331 +43391 +43397 +43399 +43403 +43411 +43427 +43441 +43451 +43457 +43481 +43487 +43499 +43517 +43541 +43543 +43573 +43577 +43579 +43591 +43597 +43607 +43609 +43613 +43627 +43633 +43649 +43651 +43661 +43669 +43691 +43711 +43717 +43721 +43753 +43759 +43777 +43781 +43783 +43787 +43789 +43793 +43801 +43853 +43867 +43889 +43891 +43913 +43933 +43943 +43951 +43961 +43963 +43969 +43973 +43987 +43991 +43997 +44017 +44021 +44027 +44029 +44041 +44053 +44059 +44071 +44087 +44089 +44101 +44111 +44119 +44123 +44129 +44131 +44159 +44171 +44179 +44189 +44201 +44203 +44207 +44221 +44249 +44257 +44263 +44267 +44269 +44273 +44279 +44281 +44293 +44351 +44357 +44371 +44381 +44383 +44389 +44417 +44449 +44453 +44483 +44491 +44497 +44501 +44507 +44519 +44531 +44533 +44537 +44543 +44549 +44563 +44579 +44587 +44617 +44621 +44623 +44633 +44641 +44647 +44651 +44657 +44683 +44687 +44699 +44701 +44711 +44729 +44741 +44753 +44771 +44773 +44777 +44789 +44797 +44809 +44819 +44839 +44843 +44851 +44867 +44879 +44887 +44893 +44909 +44917 +44927 +44939 +44953 +44959 +44963 +44971 +44983 +44987 +45007 +45013 +45053 +45061 +45077 +45083 +45119 +45121 +45127 +45131 +45137 +45139 +45161 +45179 +45181 +45191 +45197 +45233 +45247 +45259 +45263 +45281 +45289 +45293 +45307 +45317 +45319 +45329 +45337 +45341 +45343 +45361 +45377 +45389 +45403 +45413 +45427 +45433 +45439 +45481 +45491 +45497 +45503 +45523 +45533 +45541 +45553 +45557 +45569 +45587 +45589 +45599 +45613 +45631 +45641 +45659 +45667 +45673 +45677 +45691 +45697 +45707 +45737 +45751 +45757 +45763 +45767 +45779 +45817 +45821 +45823 +45827 +45833 +45841 +45853 +45863 +45869 +45887 +45893 +45943 +45949 +45953 +45959 +45971 +45979 +45989 +46021 +46027 +46049 +46051 +46061 +46073 +46091 +46093 +46099 +46103 +46133 +46141 +46147 +46153 +46171 +46181 +46183 +46187 +46199 +46219 +46229 +46237 +46261 +46271 +46273 +46279 +46301 +46307 +46309 +46327 +46337 +46349 +46351 +46381 +46399 +46411 +46439 +46441 +46447 +46451 +46457 +46471 +46477 +46489 +46499 +46507 +46511 +46523 +46549 +46559 +46567 +46573 +46589 +46591 +46601 +46619 +46633 +46639 +46643 +46649 +46663 +46679 +46681 +46687 +46691 +46703 +46723 +46727 +46747 +46751 +46757 +46769 +46771 +46807 +46811 +46817 +46819 +46829 +46831 +46853 +46861 +46867 +46877 +46889 +46901 +46919 +46933 +46957 +46993 +46997 +47017 +47041 +47051 +47057 +47059 +47087 +47093 +47111 +47119 +47123 +47129 +47137 +47143 +47147 +47149 +47161 +47189 +47207 +47221 +47237 +47251 +47269 +47279 +47287 +47293 +47297 +47303 +47309 +47317 +47339 +47351 +47353 +47363 +47381 +47387 +47389 +47407 +47417 +47419 +47431 +47441 +47459 +47491 +47497 +47501 +47507 +47513 +47521 +47527 +47533 +47543 +47563 +47569 +47581 +47591 +47599 +47609 +47623 +47629 +47639 +47653 +47657 +47659 +47681 +47699 +47701 +47711 +47713 +47717 +47737 +47741 +47743 +47777 +47779 +47791 +47797 +47807 +47809 +47819 +47837 +47843 +47857 +47869 +47881 +47903 +47911 +47917 +47933 +47939 +47947 +47951 +47963 +47969 +47977 +47981 +48017 +48023 +48029 +48049 +48073 +48079 +48091 +48109 +48119 +48121 +48131 +48157 +48163 +48179 +48187 +48193 +48197 +48221 +48239 +48247 +48259 +48271 +48281 +48299 +48311 +48313 +48337 +48341 +48353 +48371 +48383 +48397 +48407 +48409 +48413 +48437 +48449 +48463 +48473 +48479 +48481 +48487 +48491 +48497 +48523 +48527 +48533 +48539 +48541 +48563 +48571 +48589 +48593 +48611 +48619 +48623 +48647 +48649 +48661 +48673 +48677 +48679 +48731 +48733 +48751 +48757 +48761 +48767 +48779 +48781 +48787 +48799 +48809 +48817 +48821 +48823 +48847 +48857 +48859 +48869 +48871 +48883 +48889 +48907 +48947 +48953 +48973 +48989 +48991 +49003 +49009 +49019 +49031 +49033 +49037 +49043 +49057 +49069 +49081 +49103 +49109 +49117 +49121 +49123 +49139 +49157 +49169 +49171 +49177 +49193 +49199 +49201 +49207 +49211 +49223 +49253 +49261 +49277 +49279 +49297 +49307 +49331 +49333 +49339 +49363 +49367 +49369 +49391 +49393 +49409 +49411 +49417 +49429 +49433 +49451 +49459 +49463 +49477 +49481 +49499 +49523 +49529 +49531 +49537 +49547 +49549 +49559 +49597 +49603 +49613 +49627 +49633 +49639 +49663 +49667 +49669 +49681 +49697 +49711 +49727 +49739 +49741 +49747 +49757 +49783 +49787 +49789 +49801 +49807 +49811 +49823 +49831 +49843 +49853 +49871 +49877 +49891 +49919 +49921 +49927 +49937 +49939 +49943 +49957 +49991 +49993 +49999 +50021 +50023 +50033 +50047 +50051 +50053 +50069 +50077 +50087 +50093 +50101 +50111 +50119 +50123 +50129 +50131 +50147 +50153 +50159 +50177 +50207 +50221 +50227 +50231 +50261 +50263 +50273 +50287 +50291 +50311 +50321 +50329 +50333 +50341 +50359 +50363 +50377 +50383 +50387 +50411 +50417 +50423 +50441 +50459 +50461 +50497 +50503 +50513 +50527 +50539 +50543 +50549 +50551 +50581 +50587 +50591 +50593 +50599 +50627 +50647 +50651 +50671 +50683 +50707 +50723 +50741 +50753 +50767 +50773 +50777 +50789 +50821 +50833 +50839 +50849 +50857 +50867 +50873 +50891 +50893 +50909 +50923 +50929 +50951 +50957 +50969 +50971 +50989 +50993 +51001 +51031 +51043 +51047 +51059 +51061 +51071 +51109 +51131 +51133 +51137 +51151 +51157 +51169 +51193 +51197 +51199 +51203 +51217 +51229 +51239 +51241 +51257 +51263 +51283 +51287 +51307 +51329 +51341 +51343 +51347 +51349 +51361 +51383 +51407 +51413 +51419 +51421 +51427 +51431 +51437 +51439 +51449 +51461 +51473 +51479 +51481 +51487 +51503 +51511 +51517 +51521 +51539 +51551 +51563 +51577 +51581 +51593 +51599 +51607 +51613 +51631 +51637 +51647 +51659 +51673 +51679 +51683 +51691 +51713 +51719 +51721 +51749 +51767 +51769 +51787 +51797 +51803 +51817 +51827 +51829 +51839 +51853 +51859 +51869 +51871 +51893 +51899 +51907 +51913 +51929 +51941 +51949 +51971 +51973 +51977 +51991 +52009 +52021 +52027 +52051 +52057 +52067 +52069 +52081 +52103 +52121 +52127 +52147 +52153 +52163 +52177 +52181 +52183 +52189 +52201 +52223 +52237 +52249 +52253 +52259 +52267 +52289 +52291 +52301 +52313 +52321 +52361 +52363 +52369 +52379 +52387 +52391 +52433 +52453 +52457 +52489 +52501 +52511 +52517 +52529 +52541 +52543 +52553 +52561 +52567 +52571 +52579 +52583 +52609 +52627 +52631 +52639 +52667 +52673 +52691 +52697 +52709 +52711 +52721 +52727 +52733 +52747 +52757 +52769 +52783 +52807 +52813 +52817 +52837 +52859 +52861 +52879 +52883 +52889 +52901 +52903 +52919 +52937 +52951 +52957 +52963 +52967 +52973 +52981 +52999 +53003 +53017 +53047 +53051 +53069 +53077 +53087 +53089 +53093 +53101 +53113 +53117 +53129 +53147 +53149 +53161 +53171 +53173 +53189 +53197 +53201 +53231 +53233 +53239 +53267 +53269 +53279 +53281 +53299 +53309 +53323 +53327 +53353 +53359 +53377 +53381 +53401 +53407 +53411 +53419 +53437 +53441 +53453 +53479 +53503 +53507 +53527 +53549 +53551 +53569 +53591 +53593 +53597 +53609 +53611 +53617 +53623 +53629 +53633 +53639 +53653 +53657 +53681 +53693 +53699 +53717 +53719 +53731 +53759 +53773 +53777 +53783 +53791 +53813 +53819 +53831 +53849 +53857 +53861 +53881 +53887 +53891 +53897 +53899 +53917 +53923 +53927 +53939 +53951 +53959 +53987 +53993 +54001 +54011 +54013 +54037 +54049 +54059 +54083 +54091 +54101 +54121 +54133 +54139 +54151 +54163 +54167 +54181 +54193 +54217 +54251 +54269 +54277 +54287 +54293 +54311 +54319 +54323 +54331 +54347 +54361 +54367 +54371 +54377 +54401 +54403 +54409 +54413 +54419 +54421 +54437 +54443 +54449 +54469 +54493 +54497 +54499 +54503 +54517 +54521 +54539 +54541 +54547 +54559 +54563 +54577 +54581 +54583 +54601 +54617 +54623 +54629 +54631 +54647 +54667 +54673 +54679 +54709 +54713 +54721 +54727 +54751 +54767 +54773 +54779 +54787 +54799 +54829 +54833 +54851 +54869 +54877 +54881 +54907 +54917 +54919 +54941 +54949 +54959 +54973 +54979 +54983 +55001 +55009 +55021 +55049 +55051 +55057 +55061 +55073 +55079 +55103 +55109 +55117 +55127 +55147 +55163 +55171 +55201 +55207 +55213 +55217 +55219 +55229 +55243 +55249 +55259 +55291 +55313 +55331 +55333 +55337 +55339 +55343 +55351 +55373 +55381 +55399 +55411 +55439 +55441 +55457 +55469 +55487 +55501 +55511 +55529 +55541 +55547 +55579 +55589 +55603 +55609 +55619 +55621 +55631 +55633 +55639 +55661 +55663 +55667 +55673 +55681 +55691 +55697 +55711 +55717 +55721 +55733 +55763 +55787 +55793 +55799 +55807 +55813 +55817 +55819 +55823 +55829 +55837 +55843 +55849 +55871 +55889 +55897 +55901 +55903 +55921 +55927 +55931 +55933 +55949 +55967 +55987 +55997 +56003 +56009 +56039 +56041 +56053 +56081 +56087 +56093 +56099 +56101 +56113 +56123 +56131 +56149 +56167 +56171 +56179 +56197 +56207 +56209 +56237 +56239 +56249 +56263 +56267 +56269 +56299 +56311 +56333 +56359 +56369 +56377 +56383 +56393 +56401 +56417 +56431 +56437 +56443 +56453 +56467 +56473 +56477 +56479 +56489 +56501 +56503 +56509 +56519 +56527 +56531 +56533 +56543 +56569 +56591 +56597 +56599 +56611 +56629 +56633 +56659 +56663 +56671 +56681 +56687 +56701 +56711 +56713 +56731 +56737 +56747 +56767 +56773 +56779 +56783 +56807 +56809 +56813 +56821 +56827 +56843 +56857 +56873 +56891 +56893 +56897 +56909 +56911 +56921 +56923 +56929 +56941 +56951 +56957 +56963 +56983 +56989 +56993 +56999 +57037 +57041 +57047 +57059 +57073 +57077 +57089 +57097 +57107 +57119 +57131 +57139 +57143 +57149 +57163 +57173 +57179 +57191 +57193 +57203 +57221 +57223 +57241 +57251 +57259 +57269 +57271 +57283 +57287 +57301 +57329 +57331 +57347 +57349 +57367 +57373 +57383 +57389 +57397 +57413 +57427 +57457 +57467 +57487 +57493 +57503 +57527 +57529 +57557 +57559 +57571 +57587 +57593 +57601 +57637 +57641 +57649 +57653 +57667 +57679 +57689 +57697 +57709 +57713 +57719 +57727 +57731 +57737 +57751 +57773 +57781 +57787 +57791 +57793 +57803 +57809 +57829 +57839 +57847 +57853 +57859 +57881 +57899 +57901 +57917 +57923 +57943 +57947 +57973 +57977 +57991 +58013 +58027 +58031 +58043 +58049 +58057 +58061 +58067 +58073 +58099 +58109 +58111 +58129 +58147 +58151 +58153 +58169 +58171 +58189 +58193 +58199 +58207 +58211 +58217 +58229 +58231 +58237 +58243 +58271 +58309 +58313 +58321 +58337 +58363 +58367 +58369 +58379 +58391 +58393 +58403 +58411 +58417 +58427 +58439 +58441 +58451 +58453 +58477 +58481 +58511 +58537 +58543 +58549 +58567 +58573 +58579 +58601 +58603 +58613 +58631 +58657 +58661 +58679 +58687 +58693 +58699 +58711 +58727 +58733 +58741 +58757 +58763 +58771 +58787 +58789 +58831 +58889 +58897 +58901 +58907 +58909 +58913 +58921 +58937 +58943 +58963 +58967 +58979 +58991 +58997 +59009 +59011 +59021 +59023 +59029 +59051 +59053 +59063 +59069 +59077 +59083 +59093 +59107 +59113 +59119 +59123 +59141 +59149 +59159 +59167 +59183 +59197 +59207 +59209 +59219 +59221 +59233 +59239 +59243 +59263 +59273 +59281 +59333 +59341 +59351 +59357 +59359 +59369 +59377 +59387 +59393 +59399 +59407 +59417 +59419 +59441 +59443 +59447 +59453 +59467 +59471 +59473 +59497 +59509 +59513 +59539 +59557 +59561 +59567 +59581 +59611 +59617 +59621 +59627 +59629 +59651 +59659 +59663 +59669 +59671 +59693 +59699 +59707 +59723 +59729 +59743 +59747 +59753 +59771 +59779 +59791 +59797 +59809 +59833 +59863 +59879 +59887 +59921 +59929 +59951 +59957 +59971 +59981 +59999 +60013 +60017 +60029 +60037 +60041 +60077 +60083 +60089 +60091 +60101 +60103 +60107 +60127 +60133 +60139 +60149 +60161 +60167 +60169 +60209 +60217 +60223 +60251 +60257 +60259 +60271 +60289 +60293 +60317 +60331 +60337 +60343 +60353 +60373 +60383 +60397 +60413 +60427 +60443 +60449 +60457 +60493 +60497 +60509 +60521 +60527 +60539 +60589 +60601 +60607 +60611 +60617 +60623 +60631 +60637 +60647 +60649 +60659 +60661 +60679 +60689 +60703 +60719 +60727 +60733 +60737 +60757 +60761 +60763 +60773 +60779 +60793 +60811 +60821 +60859 +60869 +60887 +60889 +60899 +60901 +60913 +60917 +60919 +60923 +60937 +60943 +60953 +60961 +61001 +61007 +61027 +61031 +61043 +61051 +61057 +61091 +61099 +61121 +61129 +61141 +61151 +61153 +61169 +61211 +61223 +61231 +61253 +61261 +61283 +61291 +61297 +61331 +61333 +61339 +61343 +61357 +61363 +61379 +61381 +61403 +61409 +61417 +61441 +61463 +61469 +61471 +61483 +61487 +61493 +61507 +61511 +61519 +61543 +61547 +61553 +61559 +61561 +61583 +61603 +61609 +61613 +61627 +61631 +61637 +61643 +61651 +61657 +61667 +61673 +61681 +61687 +61703 +61717 +61723 +61729 +61751 +61757 +61781 +61813 +61819 +61837 +61843 +61861 +61871 +61879 +61909 +61927 +61933 +61949 +61961 +61967 +61979 +61981 +61987 +61991 +62003 +62011 +62017 +62039 +62047 +62053 +62057 +62071 +62081 +62099 +62119 +62129 +62131 +62137 +62141 +62143 +62171 +62189 +62191 +62201 +62207 +62213 +62219 +62233 +62273 +62297 +62299 +62303 +62311 +62323 +62327 +62347 +62351 +62383 +62401 +62417 +62423 +62459 +62467 +62473 +62477 +62483 +62497 +62501 +62507 +62533 +62539 +62549 +62563 +62581 +62591 +62597 +62603 +62617 +62627 +62633 +62639 +62653 +62659 +62683 +62687 +62701 +62723 +62731 +62743 +62753 +62761 +62773 +62791 +62801 +62819 +62827 +62851 +62861 +62869 +62873 +62897 +62903 +62921 +62927 +62929 +62939 +62969 +62971 +62981 +62983 +62987 +62989 +63029 +63031 +63059 +63067 +63073 +63079 +63097 +63103 +63113 +63127 +63131 +63149 +63179 +63197 +63199 +63211 +63241 +63247 +63277 +63281 +63299 +63311 +63313 +63317 +63331 +63337 +63347 +63353 +63361 +63367 +63377 +63389 +63391 +63397 +63409 +63419 +63421 +63439 +63443 +63463 +63467 +63473 +63487 +63493 +63499 +63521 +63527 +63533 +63541 +63559 +63577 +63587 +63589 +63599 +63601 +63607 +63611 +63617 +63629 +63647 +63649 +63659 +63667 +63671 +63689 +63691 +63697 +63703 +63709 +63719 +63727 +63737 +63743 +63761 +63773 +63781 +63793 +63799 +63803 +63809 +63823 +63839 +63841 +63853 +63857 +63863 +63901 +63907 +63913 +63929 +63949 +63977 +63997 +64007 +64013 +64019 +64033 +64037 +64063 +64067 +64081 +64091 +64109 +64123 +64151 +64153 +64157 +64171 +64187 +64189 +64217 +64223 +64231 +64237 +64271 +64279 +64283 +64301 +64303 +64319 +64327 +64333 +64373 +64381 +64399 +64403 +64433 +64439 +64451 +64453 +64483 +64489 +64499 +64513 +64553 +64567 +64577 +64579 +64591 +64601 +64609 +64613 +64621 +64627 +64633 +64661 +64663 +64667 +64679 +64693 +64709 +64717 +64747 +64763 +64781 +64783 +64793 +64811 +64817 +64849 +64853 +64871 +64877 +64879 +64891 +64901 +64919 +64921 +64927 +64937 +64951 +64969 +64997 +65003 +65011 +65027 +65029 +65033 +65053 +65063 +65071 +65089 +65099 +65101 +65111 +65119 +65123 +65129 +65141 +65147 +65167 +65171 +65173 +65179 +65183 +65203 +65213 +65239 +65257 +65267 +65269 +65287 +65293 +65309 +65323 +65327 +65353 +65357 +65371 +65381 +65393 +65407 +65413 +65419 +65423 +65437 +65447 +65449 +65479 +65497 +65519 +65521 diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod new file mode 100644 index 0000000000..6da4d4a9c4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prng.pod @@ -0,0 +1,38 @@ +=head1 NAME + + prng - pseudo-random number generator + +=head1 SYNOPSIS + + prng [count] + +=head1 DESCRIPTION + +B generates 32-bit pseudo-random integers using the +Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using +the standard C library's rand() function, which itself seeded from the +system clock and the process ID number. Thus, the values generated +are not particularly useful for cryptographic applications, but they +are in general much better than the typical output of the usual +multiplicative congruency generator used by most runtime libraries. + +You may optionally specify how many random values should be generated +by giving a I argument on the command line. If you do not +specify a count, only one random value will be generated. The results +are output to the standard output in decimal notation, one value per +line. + +=head1 RESTRICTIONS + +As stated above, B uses the C library's rand() function to seed +the generator, so it is not terribly suitable for cryptographic +applications. Also note that each time you run the program, a new +seed is generated, so it is better to run it once with a I +parameter than it is to run it multiple times to generate several +values. + +=head1 AUTHOR + + Michael J. Fromberger + Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved + Thayer School of Engineering, Dartmouth College, Hanover, NH USA diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt new file mode 100644 index 0000000000..0df0f0390a --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/redux.txt @@ -0,0 +1,86 @@ +Modular Reduction + +Usually, modular reduction is accomplished by long division, using the +mp_div() or mp_mod() functions. However, when performing modular +exponentiation, you spend a lot of time reducing by the same modulus +again and again. For this purpose, doing a full division for each +multiplication is quite inefficient. + +For this reason, the mp_exptmod() function does not perform modular +reductions in the usual way, but instead takes advantage of an +algorithm due to Barrett, as described by Menezes, Oorschot and +VanStone in their book _Handbook of Applied Cryptography_, published +by the CRC Press (see Chapter 14 for details). This method reduces +most of the computation of reduction to efficient shifting and masking +operations, and avoids the multiple-precision division entirely. + +Here is a brief synopsis of Barrett reduction, as it is implemented in +this library. + +Let b denote the radix of the computation (one more than the maximum +value that can be denoted by an mp_digit). Let m be the modulus, and +let k be the number of significant digits of m. Let x be the value to +be reduced modulo m. By the Division Theorem, there exist unique +integers Q and R such that: + + x = Qm + R, 0 <= R < m + +Barrett reduction takes advantage of the fact that you can easily +approximate Q to within two, given a value M such that: + + 2k + b + M = floor( ----- ) + m + +Computation of M requires a full-precision division step, so if you +are only doing a single reduction by m, you gain no advantage. +However, when multiple reductions by the same m are required, this +division need only be done once, beforehand. Using this, we can use +the following equation to compute Q', an approximation of Q: + + x + floor( ------ ) M + k-1 + b +Q' = floor( ----------------- ) + k+1 + b + +The divisions by b^(k-1) and b^(k+1) and the floor() functions can be +efficiently implemented with shifts and masks, leaving only a single +multiplication to be performed to get this approximation. It can be +shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with +two additional subtractions to bring the value into line with the +actual value of Q. + +Once we've got Q', we basically multiply that by m and subtract from +x, yielding: + + x - Q'm = Qm + R - Q'm + +Since we know the constraint on Q', this is one of: + + R + m + R + 2m + R + +Since R < m by the Division Theorem, we can simply subtract off m +until we get a value in the correct range, which will happen with no +more than 2 subtractions: + + v = x - Q'm + + while(v >= m) + v = v - m + endwhile + + +In random performance trials, modular exponentiation using this method +of reduction gave around a 40% speedup over using the division for +reduction. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt new file mode 100644 index 0000000000..4529cbfc46 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt @@ -0,0 +1,50 @@ +Square Root + +A simple iterative algorithm is used to compute the greatest integer +less than or equal to the square root. Essentially, this is Newton's +linear approximation, computed by finding successive values of the +equation: + + x[k]^2 - V +x[k+1] = x[k] - ------------ + 2 x[k] + +...where V is the value for which the square root is being sought. In +essence, what is happening here is that we guess a value for the +square root, then figure out how far off we were by squaring our guess +and subtracting the target. Using this value, we compute a linear +approximation for the error, and adjust the "guess". We keep doing +this until the precision gets low enough that the above equation +yields a quotient of zero. At this point, our last guess is one +greater than the square root we're seeking. + +The initial guess is computed by dividing V by 4, which is a heuristic +I have found to be fairly good on average. This also has the +advantage of being very easy to compute efficiently, even for large +values. + +So, the resulting algorithm works as follows: + + x = V / 4 /* compute initial guess */ + + loop + t = (x * x) - V /* Compute absolute error */ + u = 2 * x /* Adjust by tangent slope */ + t = t / u + + /* Loop is done if error is zero */ + if(t == 0) + break + + /* Adjust guess by error term */ + x = x - t + end + + x = x - 1 + +The result of the computation is the value of x. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt new file mode 100644 index 0000000000..edbb97882c --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/square.txt @@ -0,0 +1,72 @@ +Squaring Algorithm + +When you are squaring a value, you can take advantage of the fact that +half the multiplications performed by the more general multiplication +algorithm (see 'mul.txt' for a description) are redundant when the +multiplicand equals the multiplier. + +In particular, the modified algorithm is: + +k = 0 +for j <- 0 to (#a - 1) + w = c[2*j] + (a[j] ^ 2); + k = w div R + + for i <- j+1 to (#a - 1) + w = (2 * a[j] * a[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +On the surface, this looks identical to the multiplication algorithm; +however, note the following differences: + + - precomputation of the leading term in the outer loop + + - i runs from j+1 instead of from zero + + - doubling of a[i] * a[j] in the inner product + +Unfortunately, the construction of the inner product is such that we +need more than two digits to represent the inner product, in some +cases. In a C implementation, this means that some gymnastics must be +performed in order to handle overflow, for which C has no direct +abstraction. We do this by observing the following: + +If we have multiplied a[i] and a[j], and the product is more than half +the maximum value expressible in two digits, then doubling this result +will overflow into a third digit. If this occurs, we take note of the +overflow, and double it anyway -- C integer arithmetic ignores +overflow, so the two digits we get back should still be valid, modulo +the overflow. + +Having doubled this value, we now have to add in the remainders and +the digits already computed by earlier steps. If we did not overflow +in the previous step, we might still cause an overflow here. That +will happen whenever the maximum value expressible in two digits, less +the amount we have to add, is greater than the result of the previous +step. Thus, the overflow computation is: + + + u = 0 + w = a[i] * a[j] + + if(w > (R - 1)/ 2) + u = 1; + + w = w * 2 + v = c[i + j] + k + + if(u == 0 && (R - 1 - v) < w) + u = 1 + +If there is an overflow, u will be 1, otherwise u will be 0. The rest +of the parameters are the same as they are in the above description. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt new file mode 100644 index 0000000000..58f37c9dff --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/timing.txt @@ -0,0 +1,213 @@ +MPI Library Timing Tests + +Hardware/OS +(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3 +(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3 +(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20 +(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac +(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1 +(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2 + +Compiler +(1) MIPSpro C 7.2.1 -O3 optimizations +(2) GCC 2.95.1 -O3 optimizations +(3) IBM AIX xlc -O3 optimizations (version unknown) +(4) EGCS 2.91.66 -O3 optimizations +(5) Metrowerks CodeWarrior 5.0 C, all optimizations +(6) MIPSpro C 7.30 -O3 optimizations +(7) same as (6), with optimized libmalloc.so + +Timings are given in seconds, computed using the C library's clock() +function. The first column gives the hardware and compiler +configuration used for the test. The second column indicates the +number of tests that were aggregated to get the statistics for that +size. These were compiled using 16 bit digits. + +Source data were generated randomly using a fixed seed, so they should +be internally consistent, but may vary on different systems depending +on the C library. Also, since the resolution of the timer accessed by +clock() varies, there may be some variance in the precision of these +measurements. + +Prime Generation (primegen) + +128 bits: +A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46 +A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55 +B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29 +C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14 +D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70 +A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48 +A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07 + +192 bits: +A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96 +A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55 +B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97 +C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24 +D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63 +A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84 +A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88 + +256 bits: +A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79 +A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11 +B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35 +C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91 +D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00 +A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46 +A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60 + +320 bits: +A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81 +A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03 +B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80 +C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59 +D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73 +A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01 +A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78 + +384 bits: +A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89 +A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14 +B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78 +C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13 +D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81 +A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55 +A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02 + +448 bits: +A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63 +A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86 +B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86 +C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36 +D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17 +A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58 +A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16 + +512 bits: +A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35 +A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18 +B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45 +C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22 +D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11 +A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83 +A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02 + +Modular Exponentation (metime) + +The following results are aggregated from 200 pseudo-randomly +generated tests, based on a fixed seed. + + base, exponent, and modulus size (bits) +P/C 128 192 256 320 384 448 512 640 768 896 1024 +------- ----------------------------------------------------------------- +A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040 +A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668 +B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840 +C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507 +D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899 +E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317 +A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880 +A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855 + +Multiplication and Squaring tests, (mulsqr) + +The following results are aggregated from 500000 pseudo-randomly +generated tests, based on a per-run wall-clock seed. Times are given +in seconds, except where indicated in microseconds (us). + +(A1) + +bits multiply square ad percent time/mult time/square +64 9.33 9.15 > 1.9 18.7us 18.3us +128 10.88 10.44 > 4.0 21.8us 20.9us +192 13.30 11.89 > 10.6 26.7us 23.8us +256 14.88 12.64 > 15.1 29.8us 25.3us +320 18.64 15.01 > 19.5 37.3us 30.0us +384 23.11 17.70 > 23.4 46.2us 35.4us +448 28.28 20.88 > 26.2 56.6us 41.8us +512 34.09 24.51 > 28.1 68.2us 49.0us +640 47.86 33.25 > 30.5 95.7us 66.5us +768 64.91 43.54 > 32.9 129.8us 87.1us +896 84.49 55.48 > 34.3 169.0us 111.0us +1024 107.25 69.21 > 35.5 214.5us 138.4us +1536 227.97 141.91 > 37.8 456.0us 283.8us +2048 394.05 242.15 > 38.5 788.1us 484.3us + +(A2) + +bits multiply square ad percent time/mult time/square +64 7.87 7.95 < 1.0 15.7us 15.9us +128 9.40 9.19 > 2.2 18.8us 18.4us +192 11.15 10.59 > 5.0 22.3us 21.2us +256 12.02 11.16 > 7.2 24.0us 22.3us +320 14.62 13.43 > 8.1 29.2us 26.9us +384 17.72 15.80 > 10.8 35.4us 31.6us +448 21.24 18.51 > 12.9 42.5us 37.0us +512 25.36 21.78 > 14.1 50.7us 43.6us +640 34.57 29.00 > 16.1 69.1us 58.0us +768 46.10 37.60 > 18.4 92.2us 75.2us +896 58.94 47.72 > 19.0 117.9us 95.4us +1024 73.76 59.12 > 19.8 147.5us 118.2us +1536 152.00 118.80 > 21.8 304.0us 237.6us +2048 259.41 199.57 > 23.1 518.8us 399.1us + +(B3) + +bits multiply square ad percent time/mult time/square +64 2.60 2.47 > 5.0 5.20us 4.94us +128 4.43 4.06 > 8.4 8.86us 8.12us +192 7.03 6.10 > 13.2 14.1us 12.2us +256 10.44 8.59 > 17.7 20.9us 17.2us +320 14.44 11.64 > 19.4 28.9us 23.3us +384 19.12 15.08 > 21.1 38.2us 30.2us +448 24.55 19.09 > 22.2 49.1us 38.2us +512 31.03 23.53 > 24.2 62.1us 47.1us +640 45.05 33.80 > 25.0 90.1us 67.6us +768 63.02 46.05 > 26.9 126.0us 92.1us +896 83.74 60.29 > 28.0 167.5us 120.6us +1024 106.73 76.65 > 28.2 213.5us 153.3us +1536 228.94 160.98 > 29.7 457.9us 322.0us +2048 398.08 275.93 > 30.7 796.2us 551.9us + +(C4) + +bits multiply square ad percent time/mult time/square +64 1.34 1.28 > 4.5 2.68us 2.56us +128 2.76 2.59 > 6.2 5.52us 5.18us +192 4.52 4.16 > 8.0 9.04us 8.32us +256 6.64 5.99 > 9.8 13.3us 12.0us +320 9.20 8.13 > 11.6 18.4us 16.3us +384 12.01 10.58 > 11.9 24.0us 21.2us +448 15.24 13.33 > 12.5 30.5us 26.7us +512 19.02 16.46 > 13.5 38.0us 32.9us +640 27.56 23.54 > 14.6 55.1us 47.1us +768 37.89 31.78 > 16.1 75.8us 63.6us +896 49.24 41.42 > 15.9 98.5us 82.8us +1024 62.59 52.18 > 16.6 125.2us 104.3us +1536 131.66 107.72 > 18.2 263.3us 215.4us +2048 226.45 182.95 > 19.2 453.0us 365.9us + +(A7) + +bits multiply square ad percent time/mult time/square +64 1.74 1.71 > 1.7 3.48us 3.42us +128 3.48 2.96 > 14.9 6.96us 5.92us +192 5.74 4.60 > 19.9 11.5us 9.20us +256 8.75 6.61 > 24.5 17.5us 13.2us +320 12.5 8.99 > 28.1 25.0us 18.0us +384 16.9 11.9 > 29.6 33.8us 23.8us +448 22.2 15.2 > 31.7 44.4us 30.4us +512 28.3 19.0 > 32.7 56.6us 38.0us +640 42.4 28.0 > 34.0 84.8us 56.0us +768 59.4 38.5 > 35.2 118.8us 77.0us +896 79.5 51.2 > 35.6 159.0us 102.4us +1024 102.6 65.5 > 36.2 205.2us 131.0us +1536 224.3 140.6 > 37.3 448.6us 281.2us +2048 393.4 244.3 > 37.9 786.8us 488.6us + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s new file mode 100644 index 0000000000..ae9da630d1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/hpma512.s @@ -0,0 +1,615 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + * + * This PA-RISC 2.0 function computes the product of two unsigned integers, + * and adds the result to a previously computed integer. The multiplicand + * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in + * memory in little-double-wordian order. The multiplier is an unsigned + * 64-bit integer. The previously computed integer to which the product is + * added is located in the result ("res") area, and is assumed to be a + * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory + * in little-double-wordian order. This value normally will be the result + * of a previously computed nine doubleword result. It is not necessary + * to pad the multiplicand with an additional 64-bit zero doubleword. + * + * Multiplicand, multiplier, and addend ideally should be aligned at + * 16-byte boundaries for best performance. The code will function + * correctly for alignment at eight-byte boundaries which are not 16-byte + * boundaries, but the execution may be slightly slower due to even/odd + * bank conflicts on PA-RISC 8000 processors. + * + * This function is designed to accept the same calling sequence as Bill + * Ackerman's "maxpy_little" function. The carry from the ninth doubleword + * of the result is written to the tenth word of the result, as is done by + * Bill Ackerman's function. The final carry also is returned as an + * integer, which may be ignored. The function prototype may be either + * of the following: + * + * void multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * or + * int multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * + * where: "l" originally denoted vector lengths. This parameter is + * ignored. This function always assumes a multiplicand length of + * 512 bits (eight doublewords), and addend and result lengths of + * 576 bits (nine doublewords). + * + * "m" is a pointer to the doubleword multiplier, ideally aligned + * on a 16-byte boundary. + * + * "a" is a pointer to the eight-doubleword multiplicand, stored + * in little-double-wordian order, and ideally aligned on a 16-byte + * boundary. + * + * "res" is a pointer to the nine doubleword addend, and to the + * nine-doubleword product computed by this function. The result + * also is stored in little-double-wordian order, and ideally is + * aligned on a 16-byte boundary. It is expected that the alignment + * of the "res" area may alternate between even/odd doubleword + * boundaries for successive calls for 512-bit x 512-bit + * multiplications. + * + * The code for this function has been scheduled to use the parallelism + * of the PA-RISC 8000 series microprocessors as well as the author was + * able. Comments and/or suggestions for improvement are welcomed. + * + * The code is "64-bit safe". This means it may be called in either + * the 32ILP context or the 64LP context. All 64-bits of registers are + * saved and restored. + * + * This code is self-contained. It requires no other header files in order + * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic + * definitions for registers and stack offsets are included within this + * one source file. + * + * This is a leaf routine. As such, minimal use is made of the stack area. + * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight + * general registers, and 128 bytes are used to move intermediate products + * from the floating-point registers to the general registers. Stack + * protocols assure proper alignment of these areas. + * + */ + + +/* ====================================================================*/ +/* symbolic definitions for PA-RISC registers */ +/* in the MIPS style, avoids lots of case shifts */ +/* assigments (except t4) preserve register number parity */ +/* ====================================================================*/ + +#define zero %r0 /* permanent zero */ +#define t5 %r1 /* temp register, altered by addil */ + +#define rp %r2 /* return pointer */ + +#define s1 %r3 /* callee saves register*/ +#define s0 %r4 /* callee saves register*/ +#define s3 %r5 /* callee saves register*/ +#define s2 %r6 /* callee saves register*/ +#define s5 %r7 /* callee saves register*/ +#define s4 %r8 /* callee saves register*/ +#define s7 %r9 /* callee saves register*/ +#define s6 %r10 /* callee saves register*/ + +#define t1 %r19 /* caller saves register*/ +#define t0 %r20 /* caller saves register*/ +#define t3 %r21 /* caller saves register*/ +#define t2 %r22 /* caller saves register*/ + +#define a3 %r23 /* fourth argument register, high word */ +#define a2 %r24 /* third argument register, low word*/ +#define a1 %r25 /* second argument register, high word*/ +#define a0 %r26 /* first argument register, low word*/ + +#define v0 %r28 /* high order return value*/ +#define v1 %r29 /* low order return value*/ + +#define sp %r30 /* stack pointer*/ +#define t4 %r31 /* temporary register */ + +#define fa0 %fr4 /* first argument register*/ +#define fa1 %fr5 /* second argument register*/ +#define fa2 %fr6 /* third argument register*/ +#define fa3 %fr7 /* fourth argument register*/ + +#define fa0r %fr4R /* first argument register*/ +#define fa1r %fr5R /* second argument register*/ +#define fa2r %fr6R /* third argument register*/ +#define fa3r %fr7R /* fourth argument register*/ + +#define ft0 %fr8 /* caller saves register*/ +#define ft1 %fr9 /* caller saves register*/ +#define ft2 %fr10 /* caller saves register*/ +#define ft3 %fr11 /* caller saves register*/ + +#define ft0r %fr8R /* caller saves register*/ +#define ft1r %fr9R /* caller saves register*/ +#define ft2r %fr10R /* caller saves register*/ +#define ft3r %fr11R /* caller saves register*/ + +#define ft4 %fr22 /* caller saves register*/ +#define ft5 %fr23 /* caller saves register*/ +#define ft6 %fr24 /* caller saves register*/ +#define ft7 %fr25 /* caller saves register*/ +#define ft8 %fr26 /* caller saves register*/ +#define ft9 %fr27 /* caller saves register*/ +#define ft10 %fr28 /* caller saves register*/ +#define ft11 %fr29 /* caller saves register*/ +#define ft12 %fr30 /* caller saves register*/ +#define ft13 %fr31 /* caller saves register*/ + +#define ft4r %fr22R /* caller saves register*/ +#define ft5r %fr23R /* caller saves register*/ +#define ft6r %fr24R /* caller saves register*/ +#define ft7r %fr25R /* caller saves register*/ +#define ft8r %fr26R /* caller saves register*/ +#define ft9r %fr27R /* caller saves register*/ +#define ft10r %fr28R /* caller saves register*/ +#define ft11r %fr29R /* caller saves register*/ +#define ft12r %fr30R /* caller saves register*/ +#define ft13r %fr31R /* caller saves register*/ + + + +/* ================================================================== */ +/* functional definitions for PA-RISC registers */ +/* ================================================================== */ + +/* general registers */ + +#define T1 a0 /* temp, (length parameter ignored) */ + +#define pM a1 /* -> 64-bit multiplier */ +#define T2 a1 /* temp, (after fetching multiplier) */ + +#define pA a2 /* -> multiplicand vector (8 64-bit words) */ +#define T3 a2 /* temp, (after fetching multiplicand) */ + +#define pR a3 /* -> addend vector (8 64-bit doublewords, + result vector (9 64-bit words) */ + +#define S0 s0 /* callee saves summand registers */ +#define S1 s1 +#define S2 s2 +#define S3 s3 +#define S4 s4 +#define S5 s5 +#define S6 s6 +#define S7 s7 + +#define S8 v0 /* caller saves summand registers */ +#define S9 v1 +#define S10 t0 +#define S11 t1 +#define S12 t2 +#define S13 t3 +#define S14 t4 +#define S15 t5 + + + +/* floating-point registers */ + +#define M fa0 /* multiplier double word */ +#define MR fa0r /* low order half of multiplier double word */ +#define ML fa0 /* high order half of multiplier double word */ + +#define A0 fa2 /* multiplicand double word 0 */ +#define A0R fa2r /* low order half of multiplicand double word */ +#define A0L fa2 /* high order half of multiplicand double word */ + +#define A1 fa3 /* multiplicand double word 1 */ +#define A1R fa3r /* low order half of multiplicand double word */ +#define A1L fa3 /* high order half of multiplicand double word */ + +#define A2 ft0 /* multiplicand double word 2 */ +#define A2R ft0r /* low order half of multiplicand double word */ +#define A2L ft0 /* high order half of multiplicand double word */ + +#define A3 ft1 /* multiplicand double word 3 */ +#define A3R ft1r /* low order half of multiplicand double word */ +#define A3L ft1 /* high order half of multiplicand double word */ + +#define A4 ft2 /* multiplicand double word 4 */ +#define A4R ft2r /* low order half of multiplicand double word */ +#define A4L ft2 /* high order half of multiplicand double word */ + +#define A5 ft3 /* multiplicand double word 5 */ +#define A5R ft3r /* low order half of multiplicand double word */ +#define A5L ft3 /* high order half of multiplicand double word */ + +#define A6 ft4 /* multiplicand double word 6 */ +#define A6R ft4r /* low order half of multiplicand double word */ +#define A6L ft4 /* high order half of multiplicand double word */ + +#define A7 ft5 /* multiplicand double word 7 */ +#define A7R ft5r /* low order half of multiplicand double word */ +#define A7L ft5 /* high order half of multiplicand double word */ + +#define P0 ft6 /* product word 0 */ +#define P1 ft7 /* product word 0 */ +#define P2 ft8 /* product word 0 */ +#define P3 ft9 /* product word 0 */ +#define P4 ft10 /* product word 0 */ +#define P5 ft11 /* product word 0 */ +#define P6 ft12 /* product word 0 */ +#define P7 ft13 /* product word 0 */ + + + + +/* ====================================================================== */ +/* symbolic definitions for HP-UX stack offsets */ +/* symbolic definitions for memory NOPs */ +/* ====================================================================== */ + +#define ST_SZ 192 /* stack area total size */ + +#define SV0 -192(sp) /* general register save area */ +#define SV1 -184(sp) +#define SV2 -176(sp) +#define SV3 -168(sp) +#define SV4 -160(sp) +#define SV5 -152(sp) +#define SV6 -144(sp) +#define SV7 -136(sp) + +#define XF0 -128(sp) /* data transfer area */ +#define XF1 -120(sp) /* for floating-pt to integer regs */ +#define XF2 -112(sp) +#define XF3 -104(sp) +#define XF4 -96(sp) +#define XF5 -88(sp) +#define XF6 -80(sp) +#define XF7 -72(sp) +#define XF8 -64(sp) +#define XF9 -56(sp) +#define XF10 -48(sp) +#define XF11 -40(sp) +#define XF12 -32(sp) +#define XF13 -24(sp) +#define XF14 -16(sp) +#define XF15 -8(sp) + +#define mnop proberi (sp),3,zero /* memory NOP */ + + + + +/* ====================================================================== */ +/* assembler formalities */ +/* ====================================================================== */ + +#ifdef __LP64__ + .level 2.0W +#else + .level 2.0 +#endif + .space $TEXT$ + .subspa $CODE$ + .align 16 + +/* ====================================================================== */ +/* here to compute 64-bit x 512-bit product + 512-bit addend */ +/* ====================================================================== */ + +multacc512 + .PROC + .CALLINFO + .ENTRY + fldd 0(pM),M ; multiplier double word + ldo ST_SZ(sp),sp ; push stack + + fldd 0(pA),A0 ; multiplicand double word 0 + std S1,SV1 ; save s1 + + fldd 16(pA),A2 ; multiplicand double word 2 + std S3,SV3 ; save s3 + + fldd 32(pA),A4 ; multiplicand double word 4 + std S5,SV5 ; save s5 + + fldd 48(pA),A6 ; multiplicand double word 6 + std S7,SV7 ; save s7 + + + std S0,SV0 ; save s0 + fldd 8(pA),A1 ; multiplicand double word 1 + xmpyu MR,A0L,P0 ; A0 cross 32-bit word products + xmpyu ML,A0R,P2 + + std S2,SV2 ; save s2 + fldd 24(pA),A3 ; multiplicand double word 3 + xmpyu MR,A2L,P4 ; A2 cross 32-bit word products + xmpyu ML,A2R,P6 + + std S4,SV4 ; save s4 + fldd 40(pA),A5 ; multiplicand double word 5 + + std S6,SV6 ; save s6 + fldd 56(pA),A7 ; multiplicand double word 7 + + + fstd P0,XF0 ; MR * A0L + xmpyu MR,A0R,P0 ; A0 right 32-bit word product + xmpyu MR,A1L,P1 ; A1 cross 32-bit word product + + fstd P2,XF2 ; ML * A0R + xmpyu ML,A0L,P2 ; A0 left 32-bit word product + xmpyu ML,A1R,P3 ; A1 cross 32-bit word product + + fstd P4,XF4 ; MR * A2L + xmpyu MR,A2R,P4 ; A2 right 32-bit word product + xmpyu MR,A3L,P5 ; A3 cross 32-bit word product + + fstd P6,XF6 ; ML * A2R + xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product + xmpyu ML,A3R,P7 ; A3 cross 32-bit word product + + + ldd XF0,S0 ; MR * A0L + fstd P1,XF1 ; MR * A1L + + ldd XF2,S2 ; ML * A0R + fstd P3,XF3 ; ML * A1R + + ldd XF4,S4 ; MR * A2L + fstd P5,XF5 ; MR * A3L + xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products + xmpyu ML,A1L,P3 + + ldd XF6,S6 ; ML * A2R + fstd P7,XF7 ; ML * A3R + xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products + xmpyu ML,A3L,P7 + + + fstd P0,XF0 ; MR * A0R + ldd XF1,S1 ; MR * A1L + nop + add S0,S2,T1 ; A0 cross product sum + + fstd P2,XF2 ; ML * A0L + ldd XF3,S3 ; ML * A1R + add,dc zero,zero,S0 ; A0 cross product sum carry + depd,z T1,31,32,S2 ; A0 cross product sum << 32 + + fstd P4,XF4 ; MR * A2R + ldd XF5,S5 ; MR * A3L + shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32 + add S4,S6,T3 ; A2 cross product sum + + fstd P6,XF6 ; ML * A2L + ldd XF7,S7 ; ML * A3R + add,dc zero,zero,S4 ; A2 cross product sum carry + depd,z T3,31,32,S6 ; A2 cross product sum << 32 + + + ldd XF0,S8 ; MR * A0R + fstd P1,XF1 ; MR * A1R + xmpyu MR,A4L,P0 ; A4 cross 32-bit word product + xmpyu MR,A5L,P1 ; A5 cross 32-bit word product + + ldd XF2,S10 ; ML * A0L + fstd P3,XF3 ; ML * A1L + xmpyu ML,A4R,P2 ; A4 cross 32-bit word product + xmpyu ML,A5R,P3 ; A5 cross 32-bit word product + + ldd XF4,S12 ; MR * A2R + fstd P5,XF5 ; MR * A3L + xmpyu MR,A6L,P4 ; A6 cross 32-bit word product + xmpyu MR,A7L,P5 ; A7 cross 32-bit word product + + ldd XF6,S14 ; ML * A2L + fstd P7,XF7 ; ML * A3L + xmpyu ML,A6R,P6 ; A6 cross 32-bit word product + xmpyu ML,A7R,P7 ; A7 cross 32-bit word product + + + fstd P0,XF0 ; MR * A4L + ldd XF1,S9 ; MR * A1R + shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32 + add S1,S3,T1 ; A1 cross product sum + + fstd P2,XF2 ; ML * A4R + ldd XF3,S11 ; ML * A1L + add,dc zero,zero,S1 ; A1 cross product sum carry + depd,z T1,31,32,S3 ; A1 cross product sum << 32 + + fstd P4,XF4 ; MR * A6L + ldd XF5,S13 ; MR * A3R + shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32 + add S5,S7,T3 ; A3 cross product sum + + fstd P6,XF6 ; ML * A6R + ldd XF7,S15 ; ML * A3L + add,dc zero,zero,S5 ; A3 cross product sum carry + depd,z T3,31,32,S7 ; A3 cross product sum << 32 + + + shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword + + add,dc S0,S10,S10 ; M * A0 left doubleword + add S3,S9,S9 ; M * A1 right doubleword + + add,dc S1,S11,S11 ; M * A1 left doubleword + add S6,S12,S12 ; M * A2 right doubleword + + + ldd 24(pR),S3 ; Addend word 3 + fstd P1,XF1 ; MR * A5L + add,dc S4,S14,S14 ; M * A2 left doubleword + xmpyu MR,A5R,P1 ; A5 right 32-bit word product + + ldd 8(pR),S1 ; Addend word 1 + fstd P3,XF3 ; ML * A5R + add S7,S13,S13 ; M * A3 right doubleword + xmpyu ML,A5L,P3 ; A5 left 32-bit word product + + ldd 0(pR),S7 ; Addend word 0 + fstd P5,XF5 ; MR * A7L + add,dc S5,S15,S15 ; M * A3 left doubleword + xmpyu MR,A7R,P5 ; A7 right 32-bit word product + + ldd 16(pR),S5 ; Addend word 2 + fstd P7,XF7 ; ML * A7R + add S10,S9,S9 ; P1 doubleword + xmpyu ML,A7L,P7 ; A7 left 32-bit word products + + + ldd XF0,S0 ; MR * A4L + fstd P1,XF9 ; MR * A5R + add,dc S11,S12,S12 ; P2 doubleword + xmpyu MR,A4R,P0 ; A4 right 32-bit word product + + ldd XF2,S2 ; ML * A4R + fstd P3,XF11 ; ML * A5L + add,dc S14,S13,S13 ; P3 doubleword + xmpyu ML,A4L,P2 ; A4 left 32-bit word product + + ldd XF6,S6 ; ML * A6R + fstd P5,XF13 ; MR * A7R + add,dc zero,S15,T2 ; P4 partial doubleword + xmpyu MR,A6R,P4 ; A6 right 32-bit word product + + ldd XF4,S4 ; MR * A6L + fstd P7,XF15 ; ML * A7L + add S7,S8,S8 ; R0 + P0, new R0 doubleword + xmpyu ML,A6L,P6 ; A6 left 32-bit word product + + + fstd P0,XF0 ; MR * A4R + ldd XF7,S7 ; ML * A7R + add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword + + fstd P2,XF2 ; ML * A4L + ldd XF1,S1 ; MR * A5L + add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword + + fstd P4,XF4 ; MR * A6R + ldd XF5,S5 ; MR * A7L + add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword + + fstd P6,XF6 ; ML * A6L + ldd XF3,S3 ; ML * A5R + add,dc zero,T2,T2 ; c + partial P4 + add S0,S2,T1 ; A4 cross product sum + + + std S8,0(pR) ; save R0 + add,dc zero,zero,S0 ; A4 cross product sum carry + depd,z T1,31,32,S2 ; A4 cross product sum << 32 + + std S9,8(pR) ; save R1 + shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32 + add S4,S6,T3 ; A6 cross product sum + + std S12,16(pR) ; save R2 + add,dc zero,zero,S4 ; A6 cross product sum carry + depd,z T3,31,32,S6 ; A6 cross product sum << 32 + + + std S13,24(pR) ; save R3 + shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32 + add S1,S3,T1 ; A5 cross product sum + + ldd XF0,S8 ; MR * A4R + add,dc zero,zero,S1 ; A5 cross product sum carry + depd,z T1,31,32,S3 ; A5 cross product sum << 32 + + ldd XF2,S10 ; ML * A4L + ldd XF9,S9 ; MR * A5R + shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32 + add S5,S7,T3 ; A7 cross product sum + + ldd XF4,S12 ; MR * A6R + ldd XF11,S11 ; ML * A5L + add,dc zero,zero,S5 ; A7 cross product sum carry + depd,z T3,31,32,S7 ; A7 cross product sum << 32 + + ldd XF6,S14 ; ML * A6L + ldd XF13,S13 ; MR * A7R + shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A4 right doubleword + + + ldd XF15,S15 ; ML * A7L + add,dc S0,S10,S10 ; M * A4 left doubleword + add S3,S9,S9 ; M * A5 right doubleword + + add,dc S1,S11,S11 ; M * A5 left doubleword + add S6,S12,S12 ; M * A6 right doubleword + + ldd 32(pR),S0 ; Addend word 4 + ldd 40(pR),S1 ; Addend word 5 + add,dc S4,S14,S14 ; M * A6 left doubleword + add S7,S13,S13 ; M * A7 right doubleword + + ldd 48(pR),S2 ; Addend word 6 + ldd 56(pR),S3 ; Addend word 7 + add,dc S5,S15,S15 ; M * A7 left doubleword + add S8,T2,S8 ; P4 doubleword + + ldd 64(pR),S4 ; Addend word 8 + ldd SV5,s5 ; restore s5 + add,dc S10,S9,S9 ; P5 doubleword + add,dc S11,S12,S12 ; P6 doubleword + + + ldd SV6,s6 ; restore s6 + ldd SV7,s7 ; restore s7 + add,dc S14,S13,S13 ; P7 doubleword + add,dc zero,S15,S15 ; P8 doubleword + + add S0,S8,S8 ; new R4 doubleword + + ldd SV0,s0 ; restore s0 + std S8,32(pR) ; save R4 + add,dc S1,S9,S9 ; new R5 doubleword + + ldd SV1,s1 ; restore s1 + std S9,40(pR) ; save R5 + add,dc S2,S12,S12 ; new R6 doubleword + + ldd SV2,s2 ; restore s2 + std S12,48(pR) ; save R6 + add,dc S3,S13,S13 ; new R7 doubleword + + ldd SV3,s3 ; restore s3 + std S13,56(pR) ; save R7 + add,dc S4,S15,S15 ; new R8 doubleword + + ldd SV4,s4 ; restore s4 + std S15,64(pR) ; save result[8] + add,dc zero,zero,v0 ; return carry from R8 + + CMPIB,*= 0,v0,$L0 ; if no overflow, exit + LDO 8(pR),pR + +$FINAL1 ; Final carry propagation + LDD 64(pR),v0 + LDO 8(pR),pR + ADDI 1,v0,v0 + CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry. + STD v0,56(pR) +$L0 + bv zero(rp) ; -> caller + ldo -ST_SZ(sp),sp ; pop stack + +/* ====================================================================== */ +/* end of module */ +/* ====================================================================== */ + + + bve (rp) + .EXIT + nop + .PROCEND + .SPACE $TEXT$ + .SUBSPA $CODE$ + .EXPORT multacc512,ENTRY + + .end diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s new file mode 100644 index 0000000000..c72de8a12b --- /dev/null +++ b/security/nss/lib/freebl/mpi/hppa20.s @@ -0,0 +1,904 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef __LP64__ + .LEVEL 2.0W +#else +; .LEVEL 1.1 +; .ALLOW 2.0N + .LEVEL 2.0 +#endif + .SPACE $TEXT$,SORT=8 + .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24 + +; *************************************************************** +; +; maxpy_[little/big] +; +; *************************************************************** + +; There is no default -- you must specify one or the other. +#define LITTLE_WORDIAN 1 + +#ifdef LITTLE_WORDIAN +#define EIGHT 8 +#define SIXTEEN 16 +#define THIRTY_TWO 32 +#define UN_EIGHT -8 +#define UN_SIXTEEN -16 +#define UN_TWENTY_FOUR -24 +#endif + +#ifdef BIG_WORDIAN +#define EIGHT -8 +#define SIXTEEN -16 +#define THIRTY_TWO -32 +#define UN_EIGHT 8 +#define UN_SIXTEEN 16 +#define UN_TWENTY_FOUR 24 +#endif + +; This performs a multiple-precision integer version of "daxpy", +; Using the selected addressing direction. "Little-wordian" means that +; the least significant word of a number is stored at the lowest address. +; "Big-wordian" means that the most significant word is at the lowest +; address. Either way, the incoming address of the vector is that +; of the least significant word. That means that, for little-wordian +; addressing, we move the address upward as we propagate carries +; from the least significant word to the most significant. For +; big-wordian we move the address downward. + +; We use the following registers: +; +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = address of scalar +; r24 = arg3 = multiplicand vector +; r23 = arg4 = result vector +; +; fr9 = scalar loaded once only from r25 + +; The cycle counts shown in the bodies below are simply the result of a +; scheduling by hand. The actual PCX-U hardware does it differently. +; The intention is that the overall speed is the same. + +; The pipeline startup and shutdown code is constructed in the usual way, +; by taking the loop bodies and removing unnecessary instructions. +; We have left the comments describing cycle numbers in the code. +; These are intended for reference when comparing with the main loop, +; and have no particular relationship to actual cycle numbers. + +#ifdef LITTLE_WORDIAN +maxpy_little +#else +maxpy_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately. + FLDD 0(%r25),%fr9 ; fr9 = scalar + +; First startup + + FLDD 0(%r24),%fr24 ; Cycle 1 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3 + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + FSTD %fr24,-96(%sp) + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Second startup + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + FSTD %fr26,-88(%sp) ; Cycle 2 + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + LDD -104(%sp),%r31 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) +; MFCTL %cr16,%r21 ; for timing +; STD %r21,-112(%sp) + +; Here is the loop. + +$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + ADD %r3,%r1,%r1 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + FSTD %fr29,-72(%sp) + LDD -96(%sp),%r3 + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + ADD %r21,%r3,%r3 + LDD -56(%sp),%r20 + STD %r1,UN_SIXTEEN(%r23) + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + FLDD EIGHT(%r24),%fr28 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + STD %r28,UN_EIGHT(%r23) + LDD -72(%sp),%r29 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + FSTD %fr25,-80(%sp) + LDD 0(%r23),%r3 + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r22,%r1,%r1 + ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) + +$ENDLOOP + +; Shutdown code, first stage. + +; MFCTL %cr16,%r21 ; for timing +; STD %r21,UN_SIXTEEN(%r23) +; LDD -112(%sp),%r21 +; STD %r21,UN_EIGHT(%r23) + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + CMPIB,= 0,%r26,$ONEMORE + FSTD %fr30,-56(%sp) + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + ADD %r3,%r1,%r1 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + FSTD %fr29,-72(%sp) + STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9 + LDD -96(%sp),%r3 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN4 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + ADD %r21,%r3,%r3 ; Cycle 6 + LDD -56(%sp),%r20 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + LDD -72(%sp),%r29 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, second stage. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 + + STD %r28,UN_EIGHT(%r23) ; Cycle 9 + + LDD 0(%r23),%r3 ; Cycle 11 + +; Shutdown code, third stage. + + LDO SIXTEEN(%r23),%r23 + ADD %r3,%r22,%r1 +$JOIN1 ADD,DC %r0,%r0,%r21 + CMPIB,*= 0,%r21,$L0 ; if no overflow, exit + STD %r1,UN_SIXTEEN(%r23) + +; Final carry propagation + +$FINAL1 LDO EIGHT(%r23),%r23 + LDD UN_SIXTEEN(%r23),%r21 + ADDI 1,%r21,%r21 + CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry. + STD %r21,UN_SIXTEEN(%r23) + B $L0 + NOP + +; Here is the code that handles the difficult cases N=1, N=2, and N=3. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$N_IS_SMALL + CMPIB,= 0,%r26,$N_IS_ONE + FSTD %fr24,-96(%sp) ; Cycle 10 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + FSTD %fr31,-64(%sp) ; Cycle 12 + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + CMPIB,= 2,%r26,$N_IS_THREE + FSTD %fr30,-56(%sp) + +; N = 2 + FSTD %fr26,-88(%sp) ; Cycle 2 + FSTD %fr28,-104(%sp) ; Cycle 3 + LDD -96(%sp),%r3 ; Cycle 4 + FSTD %fr29,-72(%sp) + B $JOIN4 + ADD %r0,%r0,%r22 + +$N_IS_THREE + FLDD SIXTEEN(%r24),%fr24 + FSTD %fr26,-88(%sp) ; Cycle 2 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + B $JOIN3 + ADD %r0,%r0,%r22 + +$N_IS_ONE + FSTD %fr25,-80(%sp) + FSTD %fr27,-48(%sp) + FSTD %fr26,-88(%sp) ; Cycle 2 + B $JOIN5 + ADD %r0,%r0,%r22 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is quite tricky, because of the way the +; carry chains and SHRPD chains have been chopped up. + +$ONEMORE + + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + ADD %r3,%r1,%r1 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 +$JOIN3 + XMPYU %fr9L,%fr24R,%fr24 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + LDD -104(%sp),%r31 ; Cycle 8 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + FSTD %fr24,-96(%sp) + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + ADD %r22,%r1,%r1 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Shutdown code, stage 1-1/2. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN5 + LDD -96(%sp),%r3 ; moved from cycle 4 + LDD -80(%sp),%r21 + ADD %r21,%r3,%r3 ; Cycle 6 + ADD,DC %r0,%r0,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + SHRPD %r19,%r3,32,%r3 ; Cycle 8 + ADD %r21,%r1,%r1 ; Cycle 9 + ADD,DC %r3,%r4,%r4 ; Cycle 10 + LDD 0(%r23),%r3 ; Cycle 11 + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, stage 2-1/2. + + ADD,DC %r0,%r4,%r4 ; Cycle 1 + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + STD %r1,UN_SIXTEEN(%r23) + ADD,DC %r21,%r4,%r1 + B $JOIN1 + LDO EIGHT(%r23),%r23 + +; exit + +$L0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + + .PROCEND + +; *************************************************************** +; +; add_diag_[little/big] +; +; *************************************************************** + +; The arguments are as follows: +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = vector to square +; r24 = arg3 = result vector + +#ifdef LITTLE_WORDIAN +add_diag_little +#else +add_diag_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately. + NOP + +; Startup code + + FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body) + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + FSTD %fr29,-88(%sp) + FSTD %fr27,-72(%sp) ; Cycle 7 + CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body) + FSTD %fr30,-96(%sp) + FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2 + LDD -88(%sp),%r22 ; Cycle 3 + LDD -72(%sp),%r31 ; Cycle 4 + XMPYU %fr7R,%fr7R,%fr28 + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -96(%sp),%r20 ; Cycle 6 + FSTD %fr28,-80(%sp) + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7 + FSTD %fr24,-64(%sp) + +; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body". + +$DIAGLOOP + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDO SIXTEEN(%r25),%r25 + LDD 0(%r24),%r1 + FSTD %fr31,-104(%sp) + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD,DC %r22,%r3,%r3 + FLDD UN_SIXTEEN(%r25),%fr7 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r1,%r3,%r3 + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + LDD -80(%sp),%r21 + STD %r3,0(%r24) + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDD -64(%sp),%r29 + LDD EIGHT(%r24),%r1 + ADD,DC %r4,%r20,%r20 ; Cycle 6 + LDD -104(%sp),%r19 + FSTD %fr29,-88(%sp) + ADD %r20,%r1,%r1 ; Cycle 7 + FSTD %fr27,-72(%sp) + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r28 + FSTD %fr30,-96(%sp) + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD,DC %r21,%r4,%r4 + FLDD UN_EIGHT(%r25),%fr7 + STD %r1,UN_TWENTY_FOUR(%r24) + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r28,%r4,%r4 + XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4 + LDD -88(%sp),%r22 + STD %r4,UN_SIXTEEN(%r24) + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -72(%sp),%r31 + LDD UN_EIGHT(%r24),%r28 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + LDD -96(%sp),%r20 + FSTD %fr28,-80(%sp) + ADD %r19,%r28,%r28 ; Cycle 7 + FSTD %fr24,-64(%sp) + ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +$ENDDIAGLOOP + + ADD,DC %r0,%r22,%r22 + CMPIB,= 0,%r26,$ONEMOREDIAG + SHRPD %r31,%r0,31,%r3 + +; Shutdown code, first stage. + + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + LDD EIGHT(%r24),%r1 ; Cycle 5 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + LDD -104(%sp),%r19 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, second stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r4,%r1,%r4 + STD %r4,UN_SIXTEEN(%r24); Cycle 4 + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r0,%r22 ; Cycle 8 + CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit + STD %r28,UN_EIGHT(%r24) + +; Final carry propagation + +$FDIAG2 + LDO EIGHT(%r24),%r24 + LDD UN_EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry. + STD %r26,UN_EIGHT(%r24) + + B $Z0 + NOP + +; Here is the code that handles the difficult case N=1. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$DIAG_N_IS_ONE + + LDD -88(%sp),%r22 + LDD -72(%sp),%r31 + B $JOINDIAG + LDD -96(%sp),%r20 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is the "alternate body". It will, of course, +; give us opposite registers from the other case, so we need +; completely different shutdown code. + +$ONEMOREDIAG + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + FLDD 0(%r25),%fr7 ; Cycle 2 + SHRPD %r0,%r31,31,%r4 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + XMPYU %fr7R,%fr7R,%fr29 + LDD EIGHT(%r24),%r1 ; Cycle 5 + XMPYU %fr7L,%fr7R,%fr27 + XMPYU %fr7L,%fr7L,%fr30 + LDD -104(%sp),%r19 ; Cycle 6 + FSTD %fr29,-88(%sp) + ADD,DC %r4,%r20,%r20 + FSTD %fr27,-72(%sp) ; Cycle 7 + ADD %r20,%r1,%r1 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, first stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + FSTD %fr30,-96(%sp) + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + LDD -88(%sp),%r22 + ADD %r4,%r1,%r4 + LDD -72(%sp),%r31 ; Cycle 4 + STD %r4,UN_SIXTEEN(%r24) + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + LDD -96(%sp),%r20 ; Cycle 6 + ADD,DC %r3,%r19,%r19 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r22,%r22 ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +; Shutdown code, second stage. + +$JOINDIAG + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r3,%r28,%r3 + STD %r3,0(%r24) ; Cycle 4 + LDD EIGHT(%r24),%r1 ; Cycle 5 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r0,%r21 ; Cycle 8 + CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit + STD %r1,EIGHT(%r24) + +; Final carry propagation + +$FDIAG1 + LDO EIGHT(%r24),%r24 + LDD EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry. + STD %r26,EIGHT(%r24) + +$Z0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + .PROCEND +; .ALLOW + + .SPACE $TEXT$ + .SUBSPA $CODE$ +#ifdef LITTLE_WORDIAN +#ifdef __GNUC__ +; GNU-as (as of 2.19) does not support LONG_RETURN + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR +#else + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif +#else + .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif + .END + + +; How to use "maxpy_PA20_little" and "maxpy_PA20_big" +; +; The routine "maxpy_PA20_little" or "maxpy_PA20_big" +; performs a 64-bit x any-size multiply, and adds the +; result to an area of memory. That is, it performs +; something like +; +; A B C D +; * Z +; __________ +; P Q R S T +; +; and then adds the "PQRST" vector into an area of memory, +; handling all carries. +; +; Digression on nomenclature and endian-ness: +; +; Each of the capital letters in the above represents a 64-bit +; quantity. That is, you could think of the discussion as +; being in terms of radix-16-quintillion arithmetic. The data +; type being manipulated is "unsigned long long int". This +; requires the 64-bit extension of the HP-UX C compiler, +; available at release 10. You need these compiler flags to +; enable these extensions: +; +; -Aa +e +DA2.0 +DS2.0 +; +; (The first specifies ANSI C, the second enables the +; extensions, which are beyond ANSI C, and the third and +; fourth tell the compiler to use whatever features of the +; PA2.0 architecture it wishes, in order to made the code more +; efficient. Since the presence of the assembly code will +; make the program unable to run on anything less than PA2.0, +; you might as well gain the performance enhancements in the C +; code as well.) +; +; Questions of "endian-ness" often come up, usually in the +; context of byte ordering in a word. These routines have a +; similar issue, that could be called "wordian-ness". +; Independent of byte ordering (PA is always big-endian), one +; can make two choices when representing extremely large +; numbers as arrays of 64-bit doublewords in memory. +; +; "Little-wordian" layout means that the least significant +; word of a number is stored at the lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 0 +; | | +; | |_______address 8 +; | +; address 32 +; +; "Big-wordian" means that the most significant word is at the +; lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 32 +; | | +; | |_______address 24 +; | +; address 0 +; +; When you compile the file, you must specify one or the other, with +; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN". +; +; Incidentally, you assemble this file as part of your +; project with the same C compiler as the rest of the program. +; My "makefile" for a superprecision arithmetic package has +; the following stuff: +; +; # definitions: +; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1 +; CFLAGS = +O3 +; LDFLAGS = -L /usr/lib -Wl,-aarchive +; +; # general build rule for ".s" files: +; .s.o: +; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN +; +; # Now any bind step that calls for pa20.o will assemble pa20.s +; +; End of digression, back to arithmetic: +; +; The way we multiply two huge numbers is, of course, to multiply +; the "ABCD" vector by each of the "WXYZ" doublewords, adding +; the result vectors with increasing offsets, the way we learned +; in school, back before we all used calculators: +; +; A B C D +; * W X Y Z +; __________ +; P Q R S T +; E F G H I +; M N O P Q +; + R S T U V +; _______________ +; F I N A L S U M +; +; So we call maxpy_PA20_big (in my case; my package is +; big-wordian) repeatedly, giving the W, X, Y, and Z arguments +; in turn as the "scalar", and giving the "ABCD" vector each +; time. We direct it to add its result into an area of memory +; that we have cleared at the start. We skew the exact +; location into that area with each call. +; +; The prototype for the function is +; +; extern void maxpy_PA20_big( +; int length, /* Number of doublewords in the multiplicand vector. */ +; const long long int *scalaraddr, /* Address to fetch the scalar. */ +; const long long int *multiplicand, /* The multiplicand vector. */ +; long long int *result); /* Where to accumulate the result. */ +; +; (You should place a copy of this prototype in an include file +; or in your C file.) +; +; Now, IN ALL CASES, the given address for the multiplicand or +; the result is that of the LEAST SIGNIFICANT DOUBLEWORD. +; That word is, of course, the word at which the routine +; starts processing. "maxpy_PA20_little" then increases the +; addresses as it computes. "maxpy_PA20_big" decreases them. +; +; In our example above, "length" would be 4 in each case. +; "multiplicand" would be the "ABCD" vector. Specifically, +; the address of the element "D". "scalaraddr" would be the +; address of "W", "X", "Y", or "Z" on the four calls that we +; would make. (The order doesn't matter, of course.) +; "result" would be the appropriate address in the result +; area. When multiplying by "Z", that would be the least +; significant word. When multiplying by "Y", it would be the +; next higher word (8 bytes higher if little-wordian; 8 bytes +; lower if big-wordian), and so on. The size of the result +; area must be the the sum of the sizes of the multiplicand +; and multiplier vectors, and must be initialized to zero +; before we start. +; +; Whenever the routine adds its partial product into the result +; vector, it follows carry chains as far as they need to go. +; +; Here is the super-precision multiply routine that I use for +; my package. The package is big-wordian. I have taken out +; handling of exponents (it's a floating point package): +; +; static void mul_PA20( +; int size, +; const long long int *arg1, +; const long long int *arg2, +; long long int *result) +; { +; int i; +; +; for (i=0 ; i<2*size ; i++) result[i] = 0ULL; +; +; for (i=0 ; i +#else +#define floor(d) ((double)((unsigned long long)(d))) +#endif + +static double +upper32(double x) +{ + return floor(x * TwoToMinus32); +} + +static double +lower32(double x, double y) +{ + return x - TwoTo32 * floor(x * TwoToMinus32); +} + +static double +mod(double x, double oneoverm, double m) +{ + return x - m * floor(x * oneoverm); +} + +#endif + +static void +cleanup(double *dt, int from, int tlen) +{ + int i; + double tmp, tmp1, x, x1; + + tmp = tmp1 = Zero; + /* original code ** + for(i=2*from;i<2*tlen-2;i++) + { + x=dt[i]; + dt[i]=lower32(x,Zero)+tmp1; + tmp1=tmp; + tmp=upper32(x); + } + dt[tlen-2]+=tmp1; + dt[tlen-1]+=tmp; + **end original code ***/ + /* new code ***/ + for (i = 2 * from; i < 2 * tlen; i += 2) { + x = dt[i]; + x1 = dt[i + 1]; + dt[i] = lower32(x, Zero) + tmp; + dt[i + 1] = lower32(x1, Zero) + tmp1; + tmp = upper32(x); + tmp1 = upper32(x1); + } + /** end new code **/ +} + +void +conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +{ + int i; + long long t, t1, a, b, c, d; + + t1 = 0; + a = (long long)d16[0]; + b = (long long)d16[1]; + for (i = 0; i < ilen - 1; i++) { + c = (long long)d16[2 * i + 2]; + t1 += (unsigned int)a; + t = (a >> 32); + d = (long long)d16[2 * i + 3]; + t1 += (b & 0xffff) << 16; + t += (b >> 16) + (t1 >> 32); + i32[i] = (unsigned int)t1; + t1 = t; + a = c; + b = d; + } + t1 += (unsigned int)a; + t = (a >> 32); + t1 += (b & 0xffff) << 16; + i32[i] = (unsigned int)t1; +} + +void +conv_i32_to_d32(double *d32, unsigned int *i32, int len) +{ + int i; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) + d32[i] = (double)(i32[i]); +} + +void +conv_i32_to_d16(double *d16, unsigned int *i32, int len) +{ + int i; + unsigned int a; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) { + a = i32[i]; + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len) +{ + int i = 0; + unsigned int a; + +#pragma pipeloop(0) +#ifdef RF_INLINE_MACROS + for (; i < len - 3; i += 4) { + i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, + &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i]))); + } +#endif + for (; i < len; i++) { + a = i32[i]; + d32[i] = (double)(i32[i]); + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +{ + long long acc; + int i; + + if (i32[len] > 0) + i = -1; + else { + for (i = len - 1; i >= 0; i--) { + if (i32[i] != nint[i]) + break; + } + } + if ((i < 0) || (i32[i] > nint[i])) { + acc = 0; + for (i = 0; i < len; i++) { + acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]); + i32[i] = (unsigned int)acc; + acc = acc >> 32; + } + } +} + +/* +** the lengths of the input arrays should be at least the following: +** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +** all of them should be different from one another +** +*/ +void +mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0) +{ + int i, j, jj; + int tmp; + double digit, m2j, nextm2j, a, b; + double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; + + pdm1 = &(dm1[0]); + pdm2 = &(dm2[0]); + pdn = &(dn[0]); + pdm2[2 * nlen] = Zero; + + if (nlen != 16) { + for (i = 0; i < 4 * nlen + 2; i++) + dt[i] = Zero; + + a = dt[0] = pdm1[0] * pdm2[0]; + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + + pdtj = &(dt[0]); + for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { + m2j = pdm2[j]; + a = pdtj[0] + pdn[0] * digit; + b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + +#pragma pipeloop(0) + for (i = 1; i < nlen; i++) { + pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; + } + if ((jj == 30)) { + cleanup(dt, j / 2 + 1, 2 * nlen + 1); + jj = 0; + } + + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } else { + a = dt[0] = pdm1[0] * pdm2[0]; + + dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = + dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] = + dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] = + dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] = + dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] = + dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = + dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] = + dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] = + dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] = + dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] = + dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero; + + pdn_0 = pdn[0]; + pdm1_0 = pdm1[0]; + + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + pdtj = &(dt[0]); + + for (j = 0; j < 32; j++, pdtj++) { + + m2j = pdm2[j]; + a = pdtj[0] + pdn_0 * digit; + b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + + /**** this loop will be fully unrolled: + for(i=1;i<16;i++) + { + pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit; + } + *************************************/ + pdtj[2] += pdm1[1] * m2j + pdn[1] * digit; + pdtj[4] += pdm1[2] * m2j + pdn[2] * digit; + pdtj[6] += pdm1[3] * m2j + pdn[3] * digit; + pdtj[8] += pdm1[4] * m2j + pdn[4] * digit; + pdtj[10] += pdm1[5] * m2j + pdn[5] * digit; + pdtj[12] += pdm1[6] * m2j + pdn[6] * digit; + pdtj[14] += pdm1[7] * m2j + pdn[7] * digit; + pdtj[16] += pdm1[8] * m2j + pdn[8] * digit; + pdtj[18] += pdm1[9] * m2j + pdn[9] * digit; + pdtj[20] += pdm1[10] * m2j + pdn[10] * digit; + pdtj[22] += pdm1[11] * m2j + pdn[11] * digit; + pdtj[24] += pdm1[12] * m2j + pdn[12] * digit; + pdtj[26] += pdm1[13] * m2j + pdn[13] * digit; + pdtj[28] += pdm1[14] * m2j + pdn[14] * digit; + pdtj[30] += pdm1[15] * m2j + pdn[15] * digit; + /* no need for cleenup, cannot overflow */ + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } + + conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1); + + adjust_montf_result(result, nint, nlen); +} diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h new file mode 100644 index 0000000000..69bed4acb1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.h @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* The functions that are to be called from outside of the .s file have the + * following interfaces and array size requirements: + */ + +void conv_i32_to_d32(double *d32, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles, so that each double + * corresponds to an int. len is the number of items converted. + * Does not allocate the output array. + * The pointers d32 and i32 should point to arrays of size at least len + * (doubles and unsigned ints, respectively) + */ + +void conv_i32_to_d16(double *d16, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles so that each element + * of the int array is converted to a pair of doubles, the first one + * corresponding to the lower (least significant) 16 bits of the int and + * the second one corresponding to the upper (most significant) 16 bits of + * the 32-bit int. len is the number of ints converted. + * Does not allocate the output array. + * The pointer d16 should point to an array of doubles of size at least + * 2*len and i32 should point an array of ints of size at least len + */ + +void conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len); + +/* Does the above two conversions together, it is much faster than doing + * both of those in succession + */ + +void mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0); + +/* Does the Montgomery multiplication of the numbers stored in the arrays + * pointed to by dm1 and dm2, writing the result to the array pointed to by + * result. It uses the array pointed to by dt as a temporary work area. + * nint should point to the modulus in the array-of-integers representation, + * dn should point to its array-of-doubles as obtained as a result of the + * function call conv_i32_to_d32(dn, nint, nlen); + * nlen is the length of the array containing the modulus. + * The representation used for dm1 is the one that is a result of the function + * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the + * result of the function call conv_i32_to_d16(dm2, m2, nlen). + * Note that m1 and m2 should both be of length nlen, so they should be + * padded with 0's if necessary before the conversion. The result comes in + * this form (int representation, padded with 0's). + * dn0 is the value of the 16 least significant bits of n0'. + * The function does not allocate memory for any of the arrays, so the + * pointers should point to arrays with the following minimal sizes: + * result - nlen+1 + * dm1 - nlen + * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons ) + * dt - 4*nlen+2 + * dn - nlen + * nint - nlen + * No two arrays should point to overlapping areas of memory. + */ diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il new file mode 100644 index 0000000000..4952d0fb82 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.il @@ -0,0 +1,108 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! +! double upper32(double /*frs1*/); +! + .inline upper32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + + fdtox %f10,%f10 + fitod %f10,%f0 + .end + +! +! double lower32(double /*frs1*/, double /* Zero */); +! + .inline lower32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f12 + + fdtox %f10,%f10 + fmovs %f12,%f10 + fxtod %f10,%f0 + .end + +! +! double mod(double /*x*/, double /*1/m*/, double /*m*/); +! + .inline mod,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f2 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f6 + + fmuld %f2,%f4,%f4 + fdtox %f4,%f4 + fxtod %f4,%f4 + fmuld %f4,%f6,%f4 + fsubd %f2,%f4,%f0 + .end + + +! +! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! double * /* 0 */, +! double * /*result16*/, double * /* result32 */ +! float * /*source - should be unsigned int* +! converted to float* */); +! + .inline i16_to_d16_and_d32x4,24 + ldd [%o0],%f2 ! 1/(2^16) + ldd [%o1],%f4 ! 2^16 + ldd [%o2],%f22 + + fmovd %f22,%f6 + ld [%o5],%f7 + fmovd %f22,%f10 + ld [%o5+4],%f11 + fmovd %f22,%f14 + ld [%o5+8],%f15 + fmovd %f22,%f18 + ld [%o5+12],%f19 + fxtod %f6,%f6 + std %f6,[%o4] + fxtod %f10,%f10 + std %f10,[%o4+8] + fxtod %f14,%f14 + std %f14,[%o4+16] + fxtod %f18,%f18 + std %f18,[%o4+24] + fmuld %f2,%f6,%f8 + fmuld %f2,%f10,%f12 + fmuld %f2,%f14,%f16 + fmuld %f2,%f18,%f20 + fdtox %f8,%f8 + fdtox %f12,%f12 + fdtox %f16,%f16 + fdtox %f20,%f20 + fxtod %f8,%f8 + std %f8,[%o3+8] + fxtod %f12,%f12 + std %f12,[%o3+24] + fxtod %f16,%f16 + std %f16,[%o3+40] + fxtod %f20,%f20 + std %f20,[%o3+56] + fmuld %f8,%f4,%f8 + fmuld %f12,%f4,%f12 + fmuld %f16,%f4,%f16 + fmuld %f20,%f4,%f20 + fsubd %f6,%f8,%f8 + std %f8,[%o3] + fsubd %f10,%f12,%f12 + std %f12,[%o3+16] + fsubd %f14,%f16,%f16 + std %f16,[%o3+32] + fsubd %f18,%f20,%f20 + std %f20,[%o3+48] + .end + + diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s new file mode 100644 index 0000000000..69d2a3c51b --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.s @@ -0,0 +1,1938 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr + .file "montmulf.c" + + .section ".data",#alloc,#write + .align 8 +TwoTo16: /* frequency 1.0 confidence 0.0 */ + .word 1089470464 + .word 0 + .type TwoTo16,#object + .size TwoTo16,8 +TwoToMinus16: /* frequency 1.0 confidence 0.0 */ + .word 1055916032 + .word 0 + .type TwoToMinus16,#object + .size TwoToMinus16,8 +Zero: /* frequency 1.0 confidence 0.0 */ + .word 0 + .word 0 + .type Zero,#object + .size Zero,8 +TwoTo32: /* frequency 1.0 confidence 0.0 */ + .word 1106247680 + .word 0 + .type TwoTo32,#object + .size TwoTo32,8 +TwoToMinus32: /* frequency 1.0 confidence 0.0 */ + .word 1039138816 + .word 0 + .type TwoToMinus32,#object + .size TwoToMinus32,8 + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE cleanup +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global cleanup + cleanup: /* frequency 1.0 confidence 0.0 */ +! FILE montmulf.c + +! 1 !#define RF_INLINE_MACROS +! 3 !static double TwoTo16=65536.0; +! 4 !static double TwoToMinus16=1.0/65536.0; +! 5 !static double Zero=0.0; +! 6 !static double TwoTo32=65536.0*65536.0; +! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0); +! 9 !#ifdef RF_INLINE_MACROS +! 11 !double upper32(double); +! 12 !double lower32(double, double); +! 13 !double mod(double, double, double); +! 15 !#else +! 17 !static double upper32(double x) +! 18 !{ +! 19 ! return floor(x*TwoToMinus32); +! 20 !} +! 22 !static double lower32(double x, double y) +! 23 !{ +! 24 ! return x-TwoTo32*floor(x*TwoToMinus32); +! 25 !} +! 27 !static double mod(double x, double oneoverm, double m) +! 28 !{ +! 29 ! return x-m*floor(x*oneoverm); +! 30 !} +! 32 !#endif +! 35 !void cleanup(double *dt, int from, int tlen) +! 36 !{ +! 37 ! int i; +! 38 ! double tmp,tmp1,x,x1; +! 40 ! tmp=tmp1=Zero; + +/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2 + +! 41 ! /* original code ** +! 42 ! for(i=2*from;i<2*tlen-2;i++) +! 43 ! { +! 44 ! x=dt[i]; +! 45 ! dt[i]=lower32(x,Zero)+tmp1; +! 46 ! tmp1=tmp; +! 47 ! tmp=upper32(x); +! 48 ! } +! 49 ! dt[tlen-2]+=tmp1; +! 50 ! dt[tlen-1]+=tmp; +! 51 ! **end original code ***/ +! 52 ! /* new code ***/ +! 53 ! for(i=2*from;i<2*tlen;i+=2) + +/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3 +/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0 +/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2 +/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4 +/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1 +/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4 +/* 0x001c 53 ( 3 4) */ cmp %g4,%g3 +/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56 +/* 0x0024 ( 4 5) */ fmovd %f0,%f2 +/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1 +/* 0x002c ( 4 5) */ sub %g3,1,%g3 + +! 54 ! { +! 55 ! x=dt[i]; + +/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8 + .L900000114: /* frequency 6.4 confidence 0.0 */ +/* 0x0034 ( 0 3) */ fdtox %f8,%f6 + +! 56 ! x1=dt[i+1]; + +/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10 + +! 57 ! dt[i]=lower32(x,Zero)+tmp; +! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1; +! 59 ! tmp=upper32(x); +! 60 ! tmp1=upper32(x1); + +/* 0x003c 60 ( 0 1) */ add %g4,2,%g4 +/* 0x0040 ( 1 4) */ fdtox %f8,%f8 +/* 0x0044 ( 1 2) */ cmp %g4,%g3 +/* 0x0048 ( 5 6) */ fmovs %f0,%f6 +/* 0x004c ( 7 10) */ fxtod %f6,%f6 +/* 0x0050 ( 8 11) */ fdtox %f10,%f0 +/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2 +/* 0x0058 (10 11) */ std %f2,[%g1] +/* 0x005c (12 15) */ ldd [%g2],%f2 +/* 0x0060 (14 15) */ fmovs %f2,%f0 +/* 0x0064 (16 19) */ fxtod %f0,%f6 +/* 0x0068 (17 20) */ fdtox %f10,%f0 +/* 0x006c (18 21) */ fitod %f8,%f2 +/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4 +/* 0x0074 (19 20) */ std %f4,[%g1+8] +/* 0x0078 60 (19 20) */ add %g1,16,%g1 +/* 0x007c (20 23) */ fitod %f0,%f4 +/* 0x0080 (20 23) */ ldd [%g2],%f0 +/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86 +/* 0x0088 (21 24) */ ldd [%g1],%f8 + .L77000116: /* frequency 1.0 confidence 0.0 */ +/* 0x008c ( 0 2) */ retl ! Result = +/* 0x0090 ( 1 2) */ nop +/* 0x0094 0 ( 0 0) */ .type cleanup,2 +/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_d16_to_i32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_d16_to_i32 + conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-136,%sp + +! 61 ! } +! 62 ! /** end new code **/ +! 63 !} +! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +! 67 !{ +! 68 !int i; +! 69 !long long t, t1, a, b, c, d; +! 71 ! t1=0; +! 72 ! a=(long long)d16[0]; + +/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0 + +! 73 ! b=(long long)d16[1]; +! 74 ! for(i=0; i>32); +! 79 ! d=(long long)d16[2*i+3]; +! 80 ! t1+=(b&0xffff)<<16; + +/* 0x0070 80 (15 16) */ and %g1,%o1,%o0 + +! 81 ! t+=(b>>16)+(t1>>32); +! 82 ! i32[i]=t1&0xffffffff; +! 83 ! t1=t; +! 84 ! a=c; +! 85 ! b=d; + +/* 0x0074 85 (15 16) */ add %g2,16,%g2 +/* 0x0078 80 (16 17) */ sllx %o0,16,%g3 +/* 0x007c 77 (16 17) */ and %g4,%o3,%o0 +/* 0x0080 76 (17 20) */ fdtox %f0,%f0 +/* 0x0084 (17 18) */ std %f0,[%sp+104] +/* 0x0088 74 (17 18) */ add %o0,%g3,%o4 +/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2 +/* 0x0090 81 (18 19) */ srax %g1,16,%o0 +/* 0x0094 82 (18 19) */ and %o4,%o3,%o7 +/* 0x0098 81 (19 20) */ stx %o0,[%sp+112] +/* 0x009c (19 20) */ srax %o4,32,%o0 +/* 0x00a0 85 (19 20) */ add %g5,4,%o5 +/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120] +/* 0x00a8 78 (20 21) */ srax %g4,32,%o4 +/* 0x00ac 79 (20 23) */ fdtox %f2,%f0 +/* 0x00b0 (21 22) */ std %f0,[%sp+96] +/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0 +/* 0x00b8 (23 25) */ ldx [%sp+120],%g4 +/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3 +/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4 +/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1 +/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4 +/* 0x00cc 82 (27 28) */ st %o7,[%g5] +/* 0x00d0 (27 28) */ or %g0,1,%o7 +/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4 + .L900000209: /* frequency 64.0 confidence 0.0 */ +/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0 +/* 0x00dc 85 (17 18) */ add %o7,1,%o7 +/* 0x00e0 (17 18) */ add %o5,4,%o5 +/* 0x00e4 (18 18) */ cmp %o7,%o2 +/* 0x00e8 (18 19) */ add %g2,16,%g2 +/* 0x00ec 76 (19 22) */ fdtox %f0,%f0 +/* 0x00f0 (20 21) */ std %f0,[%sp+104] +/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0 +/* 0x00f8 (23 26) */ fdtox %f0,%f0 +/* 0x00fc (24 25) */ std %f0,[%sp+96] +/* 0x0100 80 (25 26) */ and %g1,%o1,%g3 +/* 0x0104 (26 27) */ sllx %g3,16,%g3 +/* 0x0108 ( 0 0) */ stx %g3,[%sp+120] +/* 0x010c 77 (26 27) */ and %g4,%o3,%g3 +/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128] +/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7 +/* 0x0118 (27 27) */ add %g3,%o7,%g3 +/* 0x011c ( 0 0) */ ldx [%sp+128],%o7 +/* 0x0120 81 (28 29) */ srax %g1,16,%g1 +/* 0x0124 74 (28 28) */ add %g3,%o4,%g3 +/* 0x0128 81 (29 30) */ srax %g3,32,%o4 +/* 0x012c ( 0 0) */ stx %o4,[%sp+112] +/* 0x0130 78 (30 31) */ srax %g4,32,%o4 +/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4 +/* 0x0138 (30 31) */ add %g1,%g4,%g4 +/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1 +/* 0x0140 81 (31 32) */ add %o4,%g4,%o4 +/* 0x0144 82 (32 33) */ and %g3,%o3,%g3 +/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4 +/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50 +/* 0x0150 (33 34) */ st %g3,[%o5-4] + .L900000212: /* frequency 8.0 confidence 0.0 */ +/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00 +/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L77000134: /* frequency 0.7 confidence 0.0 */ + .L900000213: /* frequency 6.4 confidence 0.0 */ +/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0 +/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3 +/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0 +/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0 +/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104] +/* 0x0170 85 ( 1 2) */ add %o7,1,%o7 +/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4 +/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2 +/* 0x017c 85 ( 2 3) */ add %g2,16,%g2 +/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4 +/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128] +/* 0x0188 ( 4 5) */ srax %g1,16,%o0 +/* 0x018c ( 4 5) */ stx %o0,[%sp+112] +/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3 +/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0 +/* 0x0198 ( 5 6) */ stx %o0,[%sp+120] +/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0 +/* 0x01a0 ( 6 7) */ std %f0,[%sp+96] +/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4 +/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7 +/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4 +/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1 +/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4 +/* 0x01b8 (11 13) */ ldx [%sp+128],%o7 +/* 0x01bc (11 12) */ add %o4,%g4,%o4 +/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0 +/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4 +/* 0x01c8 82 (13 14) */ st %g3,[%o5] +/* 0x01cc 85 (13 14) */ add %o5,4,%o5 +/* 0x01d0 (13 14) */ cmp %o7,%o2 +/* 0x01d4 (14 15) */ or %g0,%o0,%g1 +/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86 +/* 0x01dc (14 17) */ ldd [%g2+16],%f0 + .L77000127: /* frequency 1.0 confidence 0.0 */ + +! 86 ! } +! 87 ! t1+=a&0xffffffff; +! 88 ! t=(a>>32); +! 89 ! t1+=(b&0xffff)<<16; +! 90 ! i32[i]=t1&0xffffffff; + +/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L900000214: /* frequency 1.0 confidence 0.0 */ +/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3 +/* 0x01e8 ( 0 1) */ add %g2,1023,%g2 +/* 0x01ec ( 1 2) */ srl %g3,0,%g3 +/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2 +/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4 +/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2 +/* 0x01fc ( 3 4) */ add %o4,%g4,%g4 +/* 0x0200 ( 4 5) */ add %g4,%g2,%g2 +/* 0x0204 ( 5 6) */ sll %o7,2,%g4 +/* 0x0208 ( 5 6) */ and %g2,%g3,%g2 +/* 0x020c ( 6 7) */ st %g2,[%g5+%g4] +/* 0x0210 ( 7 9) */ ret ! Result = +/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0 +/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2 +/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32 + conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ orcc %g0,%o2,%g1 + +! 92 !} +! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 95 !{ +! 96 !int i; +! 98 !#pragma pipeloop(0) +! 99 ! for(i=0;i>16); + +/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0 +/* 0x001c ( 3 4) */ add %o5,1,%g3 +/* 0x0020 ( 4 5) */ add %g2,1023,%o4 +/* 0x0024 109 ( 4 5) */ or %g0,0,%g1 +/* 0x0028 ( 5 6) */ cmp %g3,3 +/* 0x002c ( 5 6) */ or %g0,%i1,%o7 +/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3 +/* 0x0034 ( 6 7) */ or %g0,%i0,%g2 +/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44 +/* 0x003c ( 7 8) */ add %o7,4,%o0 +/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0 +/* 0x0044 113 ( 7 8) */ or %g0,1,%g1 +/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1 +/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7 +/* 0x0050 112 (10 11) */ and %o1,%o4,%o0 + .L900000406: /* frequency 64.0 confidence 0.0 */ +/* 0x0054 112 (22 23) */ st %o0,[%sp+96] +/* 0x0058 113 (22 23) */ add %g1,1,%g1 +/* 0x005c (22 23) */ add %g2,16,%g2 +/* 0x0060 (23 23) */ cmp %g1,%o5 +/* 0x0064 (23 24) */ add %o7,4,%o7 +/* 0x0068 112 (29 31) */ ld [%sp+96],%f3 +/* 0x006c ( 0 0) */ fmovs %f0,%f2 +/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2 +/* 0x0074 113 (32 33) */ srl %o1,16,%o0 +/* 0x0078 112 (32 33) */ std %f2,[%g2-16] +/* 0x007c 113 (33 34) */ st %o0,[%sp+92] +/* 0x0080 (40 42) */ ld [%sp+92],%f3 +/* 0x0084 111 (41 43) */ ld [%o7-4],%o1 +/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2 +/* 0x008c (42 45) */ fsubd %f2,%f0,%f2 +/* 0x0090 112 (43 44) */ and %o1,%o4,%o0 +/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50 +/* 0x0098 (43 44) */ std %f2,[%g2-8] + .L900000409: /* frequency 8.0 confidence 0.0 */ +/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96] +/* 0x00a0 ( 0 1) */ fmovs %f0,%f2 +/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2 +/* 0x00a8 ( 1 2) */ srl %o1,16,%o0 +/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x00b4 ( 6 7) */ std %f2,[%g2-16] +/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x00bc (10 11) */ fmovs %f0,%f2 +/* 0x00c0 (11 14) */ ld [%sp+92],%f3 +/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x00c8 (13 14) */ std %f0,[%g2-8] +/* 0x00cc (14 16) */ ret ! Result = +/* 0x00d0 (16 17) */ restore %g0,%g0,%g0 + .L77000154: /* frequency 0.7 confidence 0.0 */ +/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0 + .L900000410: /* frequency 6.4 confidence 0.0 */ +/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1 +/* 0x00dc ( 0 1) */ st %o1,[%sp+96] +/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1 +/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0 +/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0 +/* 0x00ec ( 1 2) */ add %o7,4,%o7 +/* 0x00f0 ( 2 3) */ cmp %g1,%o5 +/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2 +/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x0100 ( 6 7) */ std %f2,[%g2] +/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x0108 (10 11) */ fmovs %f0,%f2 +/* 0x010c (11 14) */ ld [%sp+92],%f3 +/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x0114 (13 14) */ std %f0,[%g2+8] +/* 0x0118 (13 14) */ add %g2,16,%g2 +/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86 +/* 0x0120 (14 17) */ ld [%o7],%o0 + .L77000150: /* frequency 1.0 confidence 0.0 */ +/* 0x0124 ( 0 2) */ ret ! Result = +/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2 +/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-104,%sp +/* 0x0004 ( 1 2) */ or %g0,%i3,%i4 +/* 0x0008 ( 1 2) */ or %g0,%i2,%g1 + +! 114 ! } +! 115 !} +! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! 119 ! double * /* 0 */, +! 120 ! double * /*result16*/, double * /* result32 */, +! 121 ! float * /*source - should be unsigned int* +! 122 ! converted to float* */); +! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 127 ! unsigned int *i32, int len) +! 128 !{ +! 129 !int i; +! 130 !unsigned int a; +! 132 !#pragma pipeloop(0) +! 133 ! for(i=0;i>16); + +/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1 +/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0 +/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0 +/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3 +/* 0x0138 ( 2 3) */ sll %o7,2,%g2 +/* 0x013c ( 2 3) */ add %o0,1023,%o3 +/* 0x0140 ( 3 4) */ sll %o7,3,%g4 +/* 0x0144 ( 3 4) */ cmp %g3,3 +/* 0x0148 ( 4 5) */ add %g1,%g2,%o0 +/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2 +/* 0x0150 ( 5 6) */ add %i3,%g4,%o4 +/* 0x0154 ( 5 6) */ sub %i4,1,%o1 +/* 0x0158 ( 6 7) */ sll %o7,4,%g5 +/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44 +/* 0x0160 ( 7 8) */ add %i1,%g5,%o5 +/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3 +/* 0x0168 143 ( 7 8) */ add %o4,8,%o4 +/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1 +/* 0x0170 143 ( 8 9) */ add %o5,16,%o5 +/* 0x0174 ( 8 9) */ add %o7,1,%o7 +/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2 +/* 0x017c 143 ( 9 10) */ add %o0,4,%o0 +/* 0x0180 142 (10 11) */ and %g1,%o3,%g2 +/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x0188 (11 12) */ std %f2,[%o4-8] +/* 0x018c 143 (11 12) */ srl %g1,16,%g1 +/* 0x0190 142 (12 13) */ st %g2,[%sp+96] +/* 0x0194 (15 16) */ fmovs %f0,%f2 +/* 0x0198 (16 19) */ ld [%sp+96],%f3 +/* 0x019c (18 21) */ fsubd %f2,%f0,%f2 +/* 0x01a0 (18 19) */ std %f2,[%o5-16] +/* 0x01a4 143 (19 20) */ st %g1,[%sp+92] +/* 0x01a8 (22 23) */ fmovs %f0,%f2 +/* 0x01ac (23 26) */ ld [%sp+92],%f3 +/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2 +/* 0x01b4 (25 26) */ std %f2,[%o5-8] + .L900000509: /* frequency 64.0 confidence 0.0 */ +/* 0x01b8 141 (26 28) */ ld [%o0],%f3 +/* 0x01bc 143 (26 27) */ add %o7,2,%o7 +/* 0x01c0 (26 27) */ add %o5,32,%o5 +/* 0x01c4 140 (27 29) */ ld [%o0],%g1 +/* 0x01c8 143 (27 27) */ cmp %o7,%o1 +/* 0x01cc (27 28) */ add %o4,16,%o4 +/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2 +/* 0x01d8 (29 30) */ std %f2,[%o4-16] +/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2 +/* 0x01e0 (30 31) */ st %g2,[%sp+96] +/* 0x01e4 (37 39) */ ld [%sp+96],%f3 +/* 0x01e8 ( 0 0) */ fmovs %f0,%f2 +/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2 +/* 0x01f0 143 (40 41) */ srl %g1,16,%g1 +/* 0x01f4 142 (40 41) */ std %f2,[%o5-32] +/* 0x01f8 143 (41 42) */ st %g1,[%sp+92] +/* 0x01fc (48 50) */ ld [%sp+92],%f3 +/* 0x0200 ( 0 0) */ fmovs %f0,%f2 +/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2 +/* 0x0208 (51 52) */ std %f2,[%o5-24] +/* 0x020c (51 52) */ add %o0,4,%o0 +/* 0x0210 141 (52 54) */ ld [%o0],%f3 +/* 0x0214 140 (53 55) */ ld [%o0],%g1 +/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x021c (54 57) */ fsubd %f2,%f0,%f2 +/* 0x0220 (55 56) */ std %f2,[%o4-8] +/* 0x0224 142 (55 56) */ and %g1,%o3,%g2 +/* 0x0228 (56 57) */ st %g2,[%sp+96] +/* 0x022c (63 65) */ ld [%sp+96],%f3 +/* 0x0230 ( 0 0) */ fmovs %f0,%f2 +/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2 +/* 0x0238 143 (66 67) */ srl %g1,16,%g1 +/* 0x023c 142 (66 67) */ std %f2,[%o5-16] +/* 0x0240 143 (67 68) */ st %g1,[%sp+92] +/* 0x0244 (74 76) */ ld [%sp+92],%f3 +/* 0x0248 ( 0 0) */ fmovs %f0,%f2 +/* 0x024c (76 79) */ fsubd %f2,%f0,%f2 +/* 0x0250 (77 78) */ std %f2,[%o5-8] +/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50 +/* 0x0258 (77 78) */ add %o0,4,%o0 + .L900000512: /* frequency 8.0 confidence 0.0 */ +/* 0x025c 143 ( 0 1) */ cmp %o7,%i4 +/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14 +/* 0x0264 ( 0 1) */ nop + .L77000161: /* frequency 0.7 confidence 0.0 */ +/* 0x0268 141 ( 0 3) */ ld [%o0],%f3 + .L900000513: /* frequency 6.4 confidence 0.0 */ +/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0 +/* 0x0270 143 ( 0 1) */ add %o7,1,%o7 +/* 0x0274 140 ( 1 4) */ ld [%o0],%o1 +/* 0x0278 143 ( 1 2) */ add %o0,4,%o0 +/* 0x027c ( 1 2) */ cmp %o7,%i4 +/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2 +/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1 +/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2 +/* 0x028c ( 4 5) */ std %f2,[%o4] +/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1 +/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96] +/* 0x0298 143 ( 5 6) */ add %o4,8,%o4 +/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2 +/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3 +/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x02a8 (11 12) */ std %f2,[%o5] +/* 0x02ac 143 (12 13) */ st %o1,[%sp+92] +/* 0x02b0 (15 16) */ fmovs %f0,%f2 +/* 0x02b4 (16 19) */ ld [%sp+92],%f3 +/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0 +/* 0x02bc (18 19) */ std %f0,[%o5+8] +/* 0x02c0 (18 19) */ add %o5,16,%o5 +/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86 +/* 0x02c8 (19 22) */ ld [%o0],%f3 + .L77000164: /* frequency 1.0 confidence 0.0 */ +/* 0x02cc ( 0 2) */ ret ! Result = +/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2 +/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global adjust_montf_result + adjust_montf_result: /* frequency 1.0 confidence 0.0 */ + +! 144 ! } +! 145 !} +! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 149 !{ +! 150 !long long acc; +! 151 !int i; +! 153 ! if(i32[len]>0) i=-1; + +/* 000000 153 ( 0 1) */ sll %o2,2,%g1 +/* 0x0004 ( 0 1) */ or %g0,-1,%g3 +/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1 +/* 0x000c ( 3 4) */ cmp %g1,0 +/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50 +/* 0x0014 ( 3 4) */ or %g0,%o1,%o3 +/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00 +/* 0x001c ( 4 5) */ cmp %g3,0 + .L77000175: /* frequency 0.8 confidence 0.0 */ + +! 154 ! else +! 155 ! { +! 156 ! for(i=len-1; i>=0; i++) + +/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3 +/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60 +/* 0x0028 ( 1 2) */ cmp %g3,0 +/* 0x002c ( 1 2) */ sll %g3,2,%g1 +/* 0x0030 ( 2 3) */ add %o0,%g1,%g2 +/* 0x0034 ( 2 3) */ add %o1,%g1,%g1 + +! 157 ! { +! 158 ! if(i32[i]!=nint[i]) break; + +/* 0x0038 158 ( 3 6) */ ld [%g1],%g5 + .L900000610: /* frequency 5.3 confidence 0.0 */ +/* 0x003c 158 ( 0 3) */ ld [%g2],%o5 +/* 0x0040 ( 0 1) */ add %g1,4,%g1 +/* 0x0044 ( 0 1) */ add %g2,4,%g2 +/* 0x0048 ( 2 3) */ cmp %o5,%g5 +/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16 +/* 0x0050 ( 2 3) */ nop +/* 0x0054 ( 3 4) */ addcc %g3,1,%g3 +/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84 +/* 0x005c ( 3 6) */ ld [%g1],%g5 + .L77000182: /* frequency 1.0 confidence 0.0 */ + +! 159 ! } +! 160 ! } +! 161 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0060 161 ( 0 1) */ cmp %g3,0 + .L900000611: /* frequency 1.0 confidence 0.0 */ +/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50 +/* 0x0068 ( 0 1) */ sll %g3,2,%g2 +/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1 +/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2 +/* 0x0074 ( 4 5) */ cmp %g2,%g1 +/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56 +/* 0x007c ( 4 5) */ nop + .L77000198: /* frequency 0.8 confidence 0.0 */ + +! 162 ! { +! 163 ! acc=0; +! 164 ! for(i=0;i>32; + +/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5 +/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1 +/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2 +/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5 +/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2 +/* 0x00d8 ( 9 10) */ st %o2,[%o0] +/* 0x00dc 168 (10 11) */ srax %g5,32,%g5 + .L900000605: /* frequency 64.0 confidence 0.0 */ +/* 0x00e0 166 (12 20) */ ld [%o3],%o2 +/* 0x00e4 168 (12 13) */ add %o5,1,%o5 +/* 0x00e8 (12 13) */ add %o3,4,%o3 +/* 0x00ec (13 13) */ cmp %o5,%g4 +/* 0x00f0 (13 14) */ add %o4,4,%o4 +/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1 +/* 0x00f8 (15 15) */ add %g1,%g5,%g5 +/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2 +/* 0x0100 166 (16 24) */ ld [%o4-4],%g1 +/* 0x0104 167 (17 18) */ st %o2,[%o4-8] +/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50 +/* 0x010c (17 18) */ srax %g5,32,%g5 + .L900000608: /* frequency 8.0 confidence 0.0 */ +/* 0x0110 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0118 ( 3 4) */ add %g1,%g5,%g1 +/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2 +/* 0x0120 ( 5 7) */ retl ! Result = +/* 0x0124 ( 6 7) */ st %g2,[%o4-4] + .L77000199: /* frequency 0.6 confidence 0.0 */ +/* 0x0128 166 ( 0 3) */ ld [%o4],%g1 + .L900000609: /* frequency 5.3 confidence 0.0 */ +/* 0x012c 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0130 ( 0 1) */ add %g5,%g1,%g1 +/* 0x0134 168 ( 0 1) */ add %o5,1,%o5 +/* 0x0138 ( 1 2) */ add %o3,4,%o3 +/* 0x013c ( 1 2) */ cmp %o5,%g4 +/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2 +/* 0x0148 ( 3 4) */ st %g2,[%o4] +/* 0x014c 168 ( 3 4) */ add %o4,4,%o4 +/* 0x0150 ( 4 5) */ srax %g1,32,%g5 +/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84 +/* 0x0158 ( 4 7) */ ld [%o4],%g1 + .L77000191: /* frequency 1.0 confidence 0.0 */ +/* 0x015c ( 0 2) */ retl ! Result = +/* 0x0160 ( 1 2) */ nop +/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2 +/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 32 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global mont_mulf_noconv + mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-144,%sp +/* 0x0004 ( 1 2) */ st %i0,[%fp+68] + +! 169 ! } +! 170 ! } +! 171 !} +! 175 !void cleanup(double *dt, int from, int tlen); +! 177 !/* +! 178 !** the lengths of the input arrays should be at least the following: +! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 180 !** all of them should be different from one another +! 181 !** +! 182 !*/ +! 183 !void mont_mulf_noconv(unsigned int *result, +! 184 ! double *dm1, double *dm2, double *dt, +! 185 ! double *dn, unsigned int *nint, +! 186 ! int nlen, double dn0) +! 187 !{ +! 188 ! int i, j, jj; +! 189 ! int tmp; +! 190 ! double digit, m2j, nextm2j, a, b; +! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 193 ! pdm1=&(dm1[0]); +! 194 ! pdm2=&(dm2[0]); +! 195 ! pdn=&(dn[0]); +! 196 ! pdm2[2*nlen]=Zero; + +/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2 +/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1 +/* 0x0010 ( 2 3) */ st %i5,[%fp+88] +/* 0x0014 ( 2 3) */ or %g0,%i3,%o2 +/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4 +/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2 +/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5 +/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0 +/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2 + +! 198 ! if (nlen!=16) +! 199 ! { +! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 202 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); +! 205 ! pdtj=&(dt[0]); +! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 207 ! { +! 208 ! m2j=pdm2[j]; +! 209 ! a=pdtj[0]+pdn[0]*digit; +! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 211 ! pdtj[1]=b; +! 213 !#pragma pipeloop(0) +! 214 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; +! 124 ! t+=(b>>16)+(t1>>32); +! 125 ! i32[i]=t1&0xffffffff; +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x0070 128 */ add %o0,16,%g2 +/* 0x0074 123 */ and %g1,%o1,%o0 +/* 0x0078 */ sllx %o0,16,%g3 +/* 0x007c 120 */ and %g4,%o3,%o0 +/* 0x0080 117 */ add %o0,%g3,%o4 +/* 0x0084 119 */ fdtox %f0,%f0 +/* 0x0088 */ std %f0,[%sp+104] +/* 0x008c 125 */ and %o4,%o3,%g5 +/* 0x0090 122 */ ldd [%g2+8],%f2 +/* 0x0094 128 */ add %o5,4,%o5 +/* 0x0098 124 */ srax %o4,32,%o4 +/* 0x009c */ stx %o4,[%sp+112] +/* 0x00a0 122 */ fdtox %f2,%f0 +/* 0x00a4 */ std %f0,[%sp+96] +/* 0x00a8 124 */ srax %g1,16,%o0 +/* 0x00ac */ ldx [%sp+112],%o7 +/* 0x00b0 121 */ srax %g4,32,%o4 +/* 0x00b4 124 */ add %o0,%o7,%g4 +/* 0x00b8 128 */ or %g0,1,%o7 +/* 0x00bc 119 */ ldx [%sp+104],%g3 +/* 0x00c0 124 */ add %o4,%g4,%o4 +/* 0x00c4 122 */ ldx [%sp+96],%g1 +/* 0x00c8 125 */ st %g5,[%o5-4] +/* 0x00cc 127 */ or %g0,%g3,%g4 + .L900000112: +/* 0x00d0 119 */ ldd [%g2+16],%f0 +/* 0x00d4 128 */ add %o7,1,%o7 +/* 0x00d8 */ add %o5,4,%o5 +/* 0x00dc */ cmp %o7,%o2 +/* 0x00e0 */ add %g2,16,%g2 +/* 0x00e4 119 */ fdtox %f0,%f0 +/* 0x00e8 */ std %f0,[%sp+104] +/* 0x00ec 122 */ ldd [%g2+8],%f0 +/* 0x00f0 */ fdtox %f0,%f0 +/* 0x00f4 */ std %f0,[%sp+96] +/* 0x00f8 123 */ and %g1,%o1,%g3 +/* 0x00fc */ sllx %g3,16,%g5 +/* 0x0100 120 */ and %g4,%o3,%g3 +/* 0x0104 117 */ add %g3,%g5,%g3 +/* 0x0108 124 */ srax %g1,16,%g1 +/* 0x010c 117 */ add %g3,%o4,%g3 +/* 0x0110 124 */ srax %g3,32,%o4 +/* 0x0114 */ stx %o4,[%sp+112] +/* 0x0118 119 */ ldx [%sp+104],%g5 +/* 0x011c 121 */ srax %g4,32,%o4 +/* 0x0120 124 */ ldx [%sp+112],%g4 +/* 0x0124 */ add %g1,%g4,%g4 +/* 0x0128 122 */ ldx [%sp+96],%g1 +/* 0x012c 124 */ add %o4,%g4,%o4 +/* 0x0130 125 */ and %g3,%o3,%g3 +/* 0x0134 127 */ or %g0,%g5,%g4 +/* 0x0138 128 */ ble,pt %icc,.L900000112 +/* 0x013c */ st %g3,[%o5-4] + .L900000115: +/* 0x0140 128 */ ba .L900000117 +/* 0x0144 */ sethi %hi(0xfc00),%g2 + .L77000134: +/* 0x0148 119 */ ldd [%g2+16],%f0 + .L900000116: +/* 0x014c 120 */ and %g4,%o3,%o0 +/* 0x0150 123 */ and %g1,%o1,%g3 +/* 0x0154 119 */ fdtox %f0,%f0 +/* 0x0158 120 */ add %o4,%o0,%o0 +/* 0x015c 119 */ std %f0,[%sp+104] +/* 0x0160 128 */ add %o7,1,%o7 +/* 0x0164 123 */ sllx %g3,16,%o4 +/* 0x0168 122 */ ldd [%g2+24],%f2 +/* 0x016c 128 */ add %g2,16,%g2 +/* 0x0170 123 */ add %o0,%o4,%o0 +/* 0x0174 128 */ cmp %o7,%o2 +/* 0x0178 125 */ and %o0,%o3,%g3 +/* 0x017c 122 */ fdtox %f2,%f0 +/* 0x0180 */ std %f0,[%sp+96] +/* 0x0184 124 */ srax %o0,32,%o0 +/* 0x0188 */ stx %o0,[%sp+112] +/* 0x018c 121 */ srax %g4,32,%o4 +/* 0x0190 122 */ ldx [%sp+96],%o0 +/* 0x0194 124 */ srax %g1,16,%g5 +/* 0x0198 */ ldx [%sp+112],%g4 +/* 0x019c 119 */ ldx [%sp+104],%g1 +/* 0x01a0 125 */ st %g3,[%o5] +/* 0x01a4 124 */ add %g5,%g4,%g4 +/* 0x01a8 128 */ add %o5,4,%o5 +/* 0x01ac 124 */ add %o4,%g4,%o4 +/* 0x01b0 127 */ or %g0,%g1,%g4 +/* 0x01b4 128 */ or %g0,%o0,%g1 +/* 0x01b8 */ ble,a,pt %icc,.L900000116 +/* 0x01bc */ ldd [%g2+16],%f0 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x01c0 133 */ sethi %hi(0xfc00),%g2 + .L900000117: +/* 0x01c4 133 */ or %g0,-1,%g3 +/* 0x01c8 */ add %g2,1023,%g2 +/* 0x01cc */ srl %g3,0,%g3 +/* 0x01d0 */ and %g1,%g2,%g2 +/* 0x01d4 */ and %g4,%g3,%g4 +/* 0x01d8 */ sllx %g2,16,%g2 +/* 0x01dc */ add %o4,%g4,%g4 +/* 0x01e0 */ add %g4,%g2,%g2 +/* 0x01e4 */ sll %o7,2,%g4 +/* 0x01e8 */ and %g2,%g3,%g2 +/* 0x01ec */ st %g2,[%i0+%g4] +/* 0x01f0 */ ret ! Result = +/* 0x01f4 */ restore %g0,%g0,%g0 +/* 0x01f8 0 */ .type conv_d16_to_i32,2 +/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g2 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2 + .L900000310: +/* 0x000c */ call .+8 +/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x0014 152 */ cmp %o0,0 +/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x001c 152 */ ble,pt %icc,.L77000150 +/* 0x0020 */ add %g3,%o7,%o2 +/* 0x0024 */ sub %i2,1,%o5 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1 +/* 0x002c 152 */ sethi %hi(0xfc00),%o0 +/* 0x0030 */ ld [%o2+%o1],%o3 +/* 0x0034 */ add %o5,1,%g2 +/* 0x0038 */ or %g0,0,%g1 +/* 0x003c */ cmp %g2,3 +/* 0x0040 */ or %g0,%i1,%o7 +/* 0x0044 */ add %o0,1023,%o4 +/* 0x0048 */ or %g0,%i0,%g3 +/* 0x004c */ bl,pn %icc,.L77000154 +/* 0x0050 */ add %o7,4,%o0 +/* 0x0054 155 */ ldd [%o3],%f0 +/* 0x0058 156 */ or %g0,1,%g1 +/* 0x005c 154 */ ld [%o0-4],%o1 +/* 0x0060 0 */ or %g0,%o0,%o7 +/* 0x0064 155 */ and %o1,%o4,%o0 + .L900000306: +/* 0x0068 155 */ st %o0,[%sp+96] +/* 0x006c 156 */ add %g1,1,%g1 +/* 0x0070 */ add %g3,16,%g3 +/* 0x0074 */ cmp %g1,%o5 +/* 0x0078 */ add %o7,4,%o7 +/* 0x007c 155 */ ld [%sp+96],%f3 +/* 0x0080 */ fmovs %f0,%f2 +/* 0x0084 */ fsubd %f2,%f0,%f2 +/* 0x0088 156 */ srl %o1,16,%o0 +/* 0x008c 155 */ std %f2,[%g3-16] +/* 0x0090 156 */ st %o0,[%sp+92] +/* 0x0094 */ ld [%sp+92],%f3 +/* 0x0098 154 */ ld [%o7-4],%o1 +/* 0x009c 156 */ fmovs %f0,%f2 +/* 0x00a0 */ fsubd %f2,%f0,%f2 +/* 0x00a4 155 */ and %o1,%o4,%o0 +/* 0x00a8 156 */ ble,pt %icc,.L900000306 +/* 0x00ac */ std %f2,[%g3-8] + .L900000309: +/* 0x00b0 155 */ st %o0,[%sp+96] +/* 0x00b4 */ fmovs %f0,%f2 +/* 0x00b8 156 */ add %g3,16,%g3 +/* 0x00bc */ srl %o1,16,%o0 +/* 0x00c0 155 */ ld [%sp+96],%f3 +/* 0x00c4 */ fsubd %f2,%f0,%f2 +/* 0x00c8 */ std %f2,[%g3-16] +/* 0x00cc 156 */ st %o0,[%sp+92] +/* 0x00d0 */ fmovs %f0,%f2 +/* 0x00d4 */ ld [%sp+92],%f3 +/* 0x00d8 */ fsubd %f2,%f0,%f0 +/* 0x00dc */ std %f0,[%g3-8] +/* 0x00e0 */ ret ! Result = +/* 0x00e4 */ restore %g0,%g0,%g0 + .L77000154: +/* 0x00e8 154 */ ld [%o7],%o0 + .L900000311: +/* 0x00ec 155 */ and %o0,%o4,%o1 +/* 0x00f0 */ st %o1,[%sp+96] +/* 0x00f4 156 */ add %g1,1,%g1 +/* 0x00f8 155 */ ldd [%o3],%f0 +/* 0x00fc 156 */ srl %o0,16,%o0 +/* 0x0100 */ add %o7,4,%o7 +/* 0x0104 */ cmp %g1,%o5 +/* 0x0108 155 */ fmovs %f0,%f2 +/* 0x010c */ ld [%sp+96],%f3 +/* 0x0110 */ fsubd %f2,%f0,%f2 +/* 0x0114 */ std %f2,[%g3] +/* 0x0118 156 */ st %o0,[%sp+92] +/* 0x011c */ fmovs %f0,%f2 +/* 0x0120 */ ld [%sp+92],%f3 +/* 0x0124 */ fsubd %f2,%f0,%f0 +/* 0x0128 */ std %f0,[%g3+8] +/* 0x012c */ add %g3,16,%g3 +/* 0x0130 */ ble,a,pt %icc,.L900000311 +/* 0x0134 */ ld [%o7],%o0 + .L77000150: +/* 0x0138 */ ret ! Result = +/* 0x013c */ restore %g0,%g0,%g0 +/* 0x0140 0 */ .type conv_i32_to_d16,2 +/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-120,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1 +/* 0x0160 175 */ sethi %hi(0xfc00),%o0 +/* 0x0164 */ ld [%o4+%o1],%o2 +/* 0x0168 */ sll %g5,2,%o3 +/* 0x016c */ sub %i3,%g5,%g3 +/* 0x0170 */ sll %g5,3,%g2 +/* 0x0174 */ add %o0,1023,%o4 +/* 0x0178 178 */ ldd [%o2],%f0 +/* 0x017c */ add %i2,%o3,%o0 +/* 0x0180 175 */ cmp %g3,3 +/* 0x0184 */ add %i4,%g2,%o3 +/* 0x0188 */ sub %i3,1,%o1 +/* 0x018c */ sll %g5,4,%g4 +/* 0x0190 */ bl,pn %icc,.L77000161 +/* 0x0194 */ add %i1,%g4,%o5 +/* 0x0198 178 */ ld [%o0],%f3 +/* 0x019c 180 */ add %o3,8,%o3 +/* 0x01a0 177 */ ld [%o0],%o7 +/* 0x01a4 180 */ add %o5,16,%o5 +/* 0x01a8 */ add %g5,1,%g5 +/* 0x01ac 178 */ fmovs %f0,%f2 +/* 0x01b0 180 */ add %o0,4,%o0 +/* 0x01b4 179 */ and %o7,%o4,%g1 +/* 0x01b8 178 */ fsubd %f2,%f0,%f2 +/* 0x01bc */ std %f2,[%o3-8] +/* 0x01c0 180 */ srl %o7,16,%o7 +/* 0x01c4 179 */ st %g1,[%sp+96] +/* 0x01c8 */ fmovs %f0,%f2 +/* 0x01cc */ ld [%sp+96],%f3 +/* 0x01d0 */ fsubd %f2,%f0,%f2 +/* 0x01d4 */ std %f2,[%o5-16] +/* 0x01d8 180 */ st %o7,[%sp+92] +/* 0x01dc */ fmovs %f0,%f2 +/* 0x01e0 */ ld [%sp+92],%f3 +/* 0x01e4 */ fsubd %f2,%f0,%f2 +/* 0x01e8 */ std %f2,[%o5-8] + .L900000411: +/* 0x01ec 178 */ ld [%o0],%f3 +/* 0x01f0 180 */ add %g5,2,%g5 +/* 0x01f4 */ add %o5,32,%o5 +/* 0x01f8 177 */ ld [%o0],%o7 +/* 0x01fc 180 */ cmp %g5,%o1 +/* 0x0200 */ add %o3,16,%o3 +/* 0x0204 178 */ fmovs %f0,%f2 +/* 0x0208 */ fsubd %f2,%f0,%f2 +/* 0x020c */ std %f2,[%o3-16] +/* 0x0210 179 */ and %o7,%o4,%g1 +/* 0x0214 */ st %g1,[%sp+96] +/* 0x0218 */ ld [%sp+96],%f3 +/* 0x021c */ fmovs %f0,%f2 +/* 0x0220 */ fsubd %f2,%f0,%f2 +/* 0x0224 180 */ srl %o7,16,%o7 +/* 0x0228 179 */ std %f2,[%o5-32] +/* 0x022c 180 */ st %o7,[%sp+92] +/* 0x0230 */ ld [%sp+92],%f3 +/* 0x0234 */ fmovs %f0,%f2 +/* 0x0238 */ fsubd %f2,%f0,%f2 +/* 0x023c */ std %f2,[%o5-24] +/* 0x0240 */ add %o0,4,%o0 +/* 0x0244 178 */ ld [%o0],%f3 +/* 0x0248 177 */ ld [%o0],%o7 +/* 0x024c 178 */ fmovs %f0,%f2 +/* 0x0250 */ fsubd %f2,%f0,%f2 +/* 0x0254 */ std %f2,[%o3-8] +/* 0x0258 179 */ and %o7,%o4,%g1 +/* 0x025c */ st %g1,[%sp+96] +/* 0x0260 */ ld [%sp+96],%f3 +/* 0x0264 */ fmovs %f0,%f2 +/* 0x0268 */ fsubd %f2,%f0,%f2 +/* 0x026c 180 */ srl %o7,16,%o7 +/* 0x0270 179 */ std %f2,[%o5-16] +/* 0x0274 180 */ st %o7,[%sp+92] +/* 0x0278 */ ld [%sp+92],%f3 +/* 0x027c */ fmovs %f0,%f2 +/* 0x0280 */ fsubd %f2,%f0,%f2 +/* 0x0284 */ std %f2,[%o5-8] +/* 0x0288 */ bl,pt %icc,.L900000411 +/* 0x028c */ add %o0,4,%o0 + .L900000414: +/* 0x0290 180 */ cmp %g5,%i3 +/* 0x0294 */ bge,pn %icc,.L77000164 +/* 0x0298 */ nop + .L77000161: +/* 0x029c 178 */ ld [%o0],%f3 + .L900000416: +/* 0x02a0 178 */ ldd [%o2],%f0 +/* 0x02a4 180 */ add %g5,1,%g5 +/* 0x02a8 177 */ ld [%o0],%o1 +/* 0x02ac 180 */ add %o0,4,%o0 +/* 0x02b0 */ cmp %g5,%i3 +/* 0x02b4 178 */ fmovs %f0,%f2 +/* 0x02b8 179 */ and %o1,%o4,%o7 +/* 0x02bc 178 */ fsubd %f2,%f0,%f2 +/* 0x02c0 */ std %f2,[%o3] +/* 0x02c4 180 */ srl %o1,16,%o1 +/* 0x02c8 179 */ st %o7,[%sp+96] +/* 0x02cc 180 */ add %o3,8,%o3 +/* 0x02d0 179 */ fmovs %f0,%f2 +/* 0x02d4 */ ld [%sp+96],%f3 +/* 0x02d8 */ fsubd %f2,%f0,%f2 +/* 0x02dc */ std %f2,[%o5] +/* 0x02e0 180 */ st %o1,[%sp+92] +/* 0x02e4 */ fmovs %f0,%f2 +/* 0x02e8 */ ld [%sp+92],%f3 +/* 0x02ec */ fsubd %f2,%f0,%f0 +/* 0x02f0 */ std %f0,[%o5+8] +/* 0x02f4 */ add %o5,16,%o5 +/* 0x02f8 */ bl,a,pt %icc,.L900000416 +/* 0x02fc */ ld [%o0],%f3 + .L77000164: +/* 0x0300 */ ret ! Result = +/* 0x0304 */ restore %g0,%g0,%g0 +/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ or %g0,%o2,%g5 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x0004 190 */ or %g0,-1,%g4 +/* 0x0008 */ sll %o2,2,%g1 +/* 0x000c */ ld [%o0+%g1],%g1 +/* 0x0010 */ cmp %g1,0 +/* 0x0014 */ bleu,pn %icc,.L77000175 +/* 0x0018 */ or %g0,%o1,%o3 +/* 0x001c */ ba .L900000511 +/* 0x0020 */ cmp %g4,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0024 193 */ sub %o2,1,%g4 +/* 0x0028 */ sll %g4,2,%g1 +/* 0x002c */ cmp %g4,0 +/* 0x0030 */ bl,pt %icc,.L900000511 +/* 0x0034 */ cmp %g4,0 +/* 0x0038 */ add %o1,%g1,%g2 + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x003c 195 */ ld [%g2],%o5 +/* 0x0040 193 */ add %o0,%g1,%g3 + .L900000510: +/* 0x0044 195 */ ld [%g3],%o2 +/* 0x0048 */ sub %g4,1,%g1 +/* 0x004c */ sub %g2,4,%g2 +/* 0x0050 */ sub %g3,4,%g3 +/* 0x0054 */ cmp %o2,%o5 +/* 0x0058 */ bne,pn %icc,.L77000182 +/* 0x005c */ nop +/* 0x0060 0 */ or %g0,%g1,%g4 +/* 0x0064 195 */ cmp %g1,0 +/* 0x0068 */ bge,a,pt %icc,.L900000510 +/* 0x006c */ ld [%g2],%o5 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0070 198 */ cmp %g4,0 + .L900000511: +/* 0x0074 198 */ bl,pn %icc,.L77000198 +/* 0x0078 */ sll %g4,2,%g2 +/* 0x007c */ ld [%o1+%g2],%g1 +/* 0x0080 */ ld [%o0+%g2],%g2 +/* 0x0084 */ cmp %g2,%g1 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00dc 205 */ or %g0,2,%o5 +/* 0x00e0 201 */ sub %o2,%o1,%o2 +/* 0x00e4 */ or %g0,%o2,%g5 +/* 0x00e8 204 */ and %o2,%g3,%o2 +/* 0x00ec */ st %o2,[%o0] +/* 0x00f0 205 */ srax %g5,32,%g5 + .L900000505: +/* 0x00f4 203 */ ld [%o3],%o2 +/* 0x00f8 205 */ add %o5,1,%o5 +/* 0x00fc */ add %o3,4,%o3 +/* 0x0100 */ cmp %o5,%g4 +/* 0x0104 */ add %o4,4,%o4 +/* 0x0108 201 */ sub %g1,%o2,%g1 +/* 0x010c */ add %g1,%g5,%g5 +/* 0x0110 204 */ and %g5,%g3,%o2 +/* 0x0114 203 */ ld [%o4-4],%g1 +/* 0x0118 204 */ st %o2,[%o4-8] +/* 0x011c 205 */ ble,pt %icc,.L900000505 +/* 0x0120 */ srax %g5,32,%g5 + .L900000508: +/* 0x0124 203 */ ld [%o3],%g2 +/* 0x0128 201 */ sub %g1,%g2,%g1 +/* 0x012c */ add %g1,%g5,%g1 +/* 0x0130 204 */ and %g1,%g3,%g2 +/* 0x0134 */ retl ! Result = +/* 0x0138 */ st %g2,[%o4-4] + .L77000199: +/* 0x013c 203 */ ld [%o4],%g1 + .L900000509: +/* 0x0140 203 */ ld [%o3],%g2 +/* 0x0144 */ add %g5,%g1,%g1 +/* 0x0148 205 */ add %o5,1,%o5 +/* 0x014c */ add %o3,4,%o3 +/* 0x0150 */ cmp %o5,%g4 +/* 0x0154 203 */ sub %g1,%g2,%g1 +/* 0x0158 204 */ and %g1,%g3,%g2 +/* 0x015c */ st %g2,[%o4] +/* 0x0160 205 */ add %o4,4,%o4 +/* 0x0164 */ srax %g1,32,%g5 +/* 0x0168 */ ble,a,pt %icc,.L900000509 +/* 0x016c */ ld [%o4],%g1 + .L77000191: +/* 0x0170 */ retl ! Result = +/* 0x0174 */ nop +/* 0x0178 0 */ .type adjust_montf_result,2 +/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +/* 000000 */ .skip 16 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-144,%sp + .L900000646: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x000c 232 */ ld [%fp+92],%o1 +/* 0x0010 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ ldd [%fp+96],%f2 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 223 */ st %i0,[%fp+68] +/* 0x0024 */ add %g5,%o7,%o3 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x002c 232 */ ld [%o3+%g2],%l0 +/* 0x0030 239 */ sethi %hi(TwoTo16),%g4 +/* 0x0034 223 */ or %g0,%i2,%o2 +/* 0x0038 */ fmovd %f2,%f16 +/* 0x003c */ st %i5,[%fp+88] +/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x0044 223 */ or %g0,%i1,%i2 +/* 0x0048 232 */ ldd [%l0],%f0 +/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0050 223 */ or %g0,%i3,%o0 +/* 0x0054 232 */ sll %o1,4,%g4 +/* 0x0058 239 */ ld [%o3+%g2],%g5 +/* 0x005c 223 */ or %g0,%i3,%i1 +/* 0x0060 239 */ ld [%o3+%g3],%g1 +/* 0x0064 232 */ or %g0,%o1,%i0 +/* 0x0068 */ or %g0,%o2,%i3 +/* 0x006c 234 */ cmp %o1,16 +/* 0x0070 */ be,pn %icc,.L77000279 +/* 0x0074 */ std %f0,[%o2+%g4] +/* 0x0078 236 */ sll %o1,2,%g2 +/* 0x007c */ or %g0,%o0,%o3 +/* 0x0080 232 */ sll %o1,1,%o1 +/* 0x0084 236 */ add %g2,2,%o2 +/* 0x0088 */ cmp %o2,0 +/* 0x008c */ ble,a,pt %icc,.L900000660 +/* 0x0090 */ ldd [%i2],%f0 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; + +/* 0x0068 123 */ and %i2,%o5,%i4 +/* 0x006c */ sllx %i4,16,%o1 +/* 0x0070 117 */ cmp %g2,6 +/* 0x0074 */ bl,pn %icc,.L77000134 +/* 0x0078 */ or %g0,3,%i0 +/* 0x007c 119 */ ldd [%o4+16],%f0 +/* 0x0080 120 */ and %i3,%g3,%o3 + +! 124 ! t+=(b>>16)+(t1>>32); + +/* 0x0084 124 */ srax %i2,16,%i5 +/* 0x0088 117 */ add %o3,%o1,%i4 +/* 0x008c 121 */ srax %i3,32,%i3 +/* 0x0090 119 */ fdtox %f0,%f0 +/* 0x0094 */ std %f0,[%sp+2231] + +! 125 ! i32[i]=t1&0xffffffff; + +/* 0x0098 125 */ and %i4,%g3,%l0 +/* 0x009c 117 */ or %g0,72,%o3 +/* 0x00a0 122 */ ldd [%g4+24],%f0 +/* 0x00a4 117 */ or %g0,64,%o4 +/* 0x00a8 */ or %g0,4,%o1 + +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x00ac 128 */ or %g0,5,%i0 +/* 0x00b0 */ or %g0,4,%i1 +/* 0x00b4 119 */ ldx [%sp+2231],%g2 +/* 0x00b8 122 */ fdtox %f0,%f0 +/* 0x00bc 128 */ or %g0,4,%o0 +/* 0x00c0 122 */ std %f0,[%sp+2223] +/* 0x00c4 */ ldd [%g4+40],%f2 +/* 0x00c8 120 */ and %g2,%g3,%i2 +/* 0x00cc 119 */ ldd [%g4+32],%f0 +/* 0x00d0 121 */ srax %g2,32,%g2 +/* 0x00d4 122 */ ldd [%g4+56],%f4 +/* 0x00d8 */ fdtox %f2,%f2 +/* 0x00dc */ ldx [%sp+2223],%g5 +/* 0x00e0 119 */ fdtox %f0,%f0 +/* 0x00e4 125 */ st %l0,[%g1] +/* 0x00e8 124 */ srax %i4,32,%l0 +/* 0x00ec 122 */ fdtox %f4,%f4 +/* 0x00f0 */ std %f2,[%sp+2223] +/* 0x00f4 123 */ and %g5,%o5,%i4 +/* 0x00f8 124 */ add %i5,%l0,%i5 +/* 0x00fc 119 */ std %f0,[%sp+2231] +/* 0x0100 123 */ sllx %i4,16,%i4 +/* 0x0104 124 */ add %i3,%i5,%i3 +/* 0x0108 119 */ ldd [%g4+48],%f2 +/* 0x010c 124 */ srax %g5,16,%g5 +/* 0x0110 117 */ add %i2,%i4,%i2 +/* 0x0114 122 */ ldd [%g4+72],%f0 +/* 0x0118 117 */ add %i2,%i3,%i4 +/* 0x011c 124 */ srax %i4,32,%i5 +/* 0x0120 119 */ fdtox %f2,%f2 +/* 0x0124 125 */ and %i4,%g3,%i4 +/* 0x0128 122 */ ldx [%sp+2223],%i2 +/* 0x012c 124 */ add %g5,%i5,%g5 +/* 0x0130 119 */ ldx [%sp+2231],%i3 +/* 0x0134 124 */ add %g2,%g5,%g5 +/* 0x0138 119 */ std %f2,[%sp+2231] +/* 0x013c 122 */ std %f4,[%sp+2223] +/* 0x0140 119 */ ldd [%g4+64],%f2 +/* 0x0144 125 */ st %i4,[%g1+4] + .L900000108: +/* 0x0148 122 */ ldx [%sp+2223],%i4 +/* 0x014c 128 */ add %o0,2,%o0 +/* 0x0150 */ add %i0,4,%i0 +/* 0x0154 119 */ ldx [%sp+2231],%l0 +/* 0x0158 117 */ add %o3,16,%o3 +/* 0x015c 123 */ and %i2,%o5,%g2 +/* 0x0160 */ sllx %g2,16,%i5 +/* 0x0164 120 */ and %i3,%g3,%g2 +/* 0x0168 122 */ ldd [%g4+%o3],%f4 +/* 0x016c */ fdtox %f0,%f0 +/* 0x0170 */ std %f0,[%sp+2223] +/* 0x0174 124 */ srax %i2,16,%i2 +/* 0x0178 117 */ add %g2,%i5,%g2 +/* 0x017c 119 */ fdtox %f2,%f0 +/* 0x0180 117 */ add %o4,16,%o4 +/* 0x0184 119 */ std %f0,[%sp+2231] +/* 0x0188 117 */ add %g2,%g5,%g2 +/* 0x018c 119 */ ldd [%g4+%o4],%f2 +/* 0x0190 124 */ srax %g2,32,%i5 +/* 0x0194 128 */ cmp %o0,%o2 +/* 0x0198 121 */ srax %i3,32,%g5 +/* 0x019c 124 */ add %i2,%i5,%i2 +/* 0x01a0 */ add %g5,%i2,%i5 +/* 0x01a4 117 */ add %o1,4,%o1 +/* 0x01a8 125 */ and %g2,%g3,%g2 +/* 0x01ac 127 */ or %g0,%l0,%g5 +/* 0x01b0 125 */ st %g2,[%g1+%o1] +/* 0x01b4 128 */ add %i1,4,%i1 +/* 0x01b8 122 */ ldx [%sp+2223],%i2 +/* 0x01bc 119 */ ldx [%sp+2231],%i3 +/* 0x01c0 117 */ add %o3,16,%o3 +/* 0x01c4 123 */ and %i4,%o5,%g2 +/* 0x01c8 */ sllx %g2,16,%l0 +/* 0x01cc 120 */ and %g5,%g3,%g2 +/* 0x01d0 122 */ ldd [%g4+%o3],%f0 +/* 0x01d4 */ fdtox %f4,%f4 +/* 0x01d8 */ std %f4,[%sp+2223] +/* 0x01dc 124 */ srax %i4,16,%i4 +/* 0x01e0 117 */ add %g2,%l0,%g2 +/* 0x01e4 119 */ fdtox %f2,%f2 +/* 0x01e8 117 */ add %o4,16,%o4 +/* 0x01ec 119 */ std %f2,[%sp+2231] +/* 0x01f0 117 */ add %g2,%i5,%g2 +/* 0x01f4 119 */ ldd [%g4+%o4],%f2 +/* 0x01f8 124 */ srax %g2,32,%i5 +/* 0x01fc 121 */ srax %g5,32,%g5 +/* 0x0200 124 */ add %i4,%i5,%i4 +/* 0x0204 */ add %g5,%i4,%g5 +/* 0x0208 117 */ add %o1,4,%o1 +/* 0x020c 125 */ and %g2,%g3,%g2 +/* 0x0210 128 */ ble,pt %icc,.L900000108 +/* 0x0214 */ st %g2,[%g1+%o1] + .L900000111: +/* 0x0218 122 */ ldx [%sp+2223],%o2 +/* 0x021c 123 */ and %i2,%o5,%i4 +/* 0x0220 120 */ and %i3,%g3,%g2 +/* 0x0224 123 */ sllx %i4,16,%i4 +/* 0x0228 119 */ ldx [%sp+2231],%i5 +/* 0x022c 128 */ cmp %o0,%o7 +/* 0x0230 124 */ srax %i2,16,%i2 +/* 0x0234 117 */ add %g2,%i4,%g2 +/* 0x0238 122 */ fdtox %f0,%f4 +/* 0x023c */ std %f4,[%sp+2223] +/* 0x0240 117 */ add %g2,%g5,%g5 +/* 0x0244 123 */ and %o2,%o5,%l0 +/* 0x0248 124 */ srax %g5,32,%l1 +/* 0x024c 120 */ and %i5,%g3,%i4 +/* 0x0250 119 */ fdtox %f2,%f0 +/* 0x0254 121 */ srax %i3,32,%g2 +/* 0x0258 119 */ std %f0,[%sp+2231] +/* 0x025c 124 */ add %i2,%l1,%i2 +/* 0x0260 123 */ sllx %l0,16,%i3 +/* 0x0264 124 */ add %g2,%i2,%i2 +/* 0x0268 */ srax %o2,16,%o2 +/* 0x026c 117 */ add %o1,4,%g2 +/* 0x0270 */ add %i4,%i3,%o1 +/* 0x0274 125 */ and %g5,%g3,%g5 +/* 0x0278 */ st %g5,[%g1+%g2] +/* 0x027c 119 */ ldx [%sp+2231],%i3 +/* 0x0280 117 */ add %o1,%i2,%o1 +/* 0x0284 */ add %g2,4,%g2 +/* 0x0288 124 */ srax %o1,32,%i4 +/* 0x028c 122 */ ldx [%sp+2223],%i2 +/* 0x0290 125 */ and %o1,%g3,%g5 +/* 0x0294 121 */ srax %i5,32,%o1 +/* 0x0298 124 */ add %o2,%i4,%o2 +/* 0x029c 125 */ st %g5,[%g1+%g2] +/* 0x02a0 128 */ bg,pn %icc,.L77000127 +/* 0x02a4 */ add %o1,%o2,%g5 +/* 0x02a8 */ add %i0,6,%i0 +/* 0x02ac */ add %i1,6,%i1 + .L77000134: +/* 0x02b0 119 */ sra %i1,0,%o2 + .L900000112: +/* 0x02b4 119 */ sllx %o2,3,%o3 +/* 0x02b8 120 */ and %i3,%g3,%o1 +/* 0x02bc 119 */ ldd [%g4+%o3],%f0 +/* 0x02c0 122 */ sra %i0,0,%o3 +/* 0x02c4 123 */ and %i2,%o5,%o2 +/* 0x02c8 122 */ sllx %o3,3,%o3 +/* 0x02cc 120 */ add %g5,%o1,%o1 +/* 0x02d0 119 */ fdtox %f0,%f0 +/* 0x02d4 */ std %f0,[%sp+2231] +/* 0x02d8 123 */ sllx %o2,16,%o2 +/* 0x02dc */ add %o1,%o2,%o2 +/* 0x02e0 128 */ add %i1,2,%i1 +/* 0x02e4 122 */ ldd [%g4+%o3],%f0 +/* 0x02e8 124 */ srax %o2,32,%g2 +/* 0x02ec 125 */ and %o2,%g3,%o3 +/* 0x02f0 124 */ srax %i2,16,%o1 +/* 0x02f4 128 */ add %i0,2,%i0 +/* 0x02f8 122 */ fdtox %f0,%f0 +/* 0x02fc */ std %f0,[%sp+2223] +/* 0x0300 125 */ sra %o0,0,%o2 +/* 0x0304 */ sllx %o2,2,%o2 +/* 0x0308 124 */ add %o1,%g2,%g5 +/* 0x030c 121 */ srax %i3,32,%g2 +/* 0x0310 128 */ add %o0,1,%o0 +/* 0x0314 124 */ add %g2,%g5,%g5 +/* 0x0318 128 */ cmp %o0,%o7 +/* 0x031c 119 */ ldx [%sp+2231],%o4 +/* 0x0320 122 */ ldx [%sp+2223],%i2 +/* 0x0324 125 */ st %o3,[%g1+%o2] +/* 0x0328 127 */ or %g0,%o4,%i3 +/* 0x032c 128 */ ble,pt %icc,.L900000112 +/* 0x0330 */ sra %i1,0,%o2 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x0334 133 */ sethi %hi(0xfc00),%g2 + .L900000113: +/* 0x0338 133 */ or %g0,-1,%g3 +/* 0x033c */ add %g2,1023,%g2 +/* 0x0340 */ srl %g3,0,%g3 +/* 0x0344 */ and %i2,%g2,%g2 +/* 0x0348 */ and %i3,%g3,%g4 +/* 0x034c */ sllx %g2,16,%g2 +/* 0x0350 */ add %g5,%g4,%g4 +/* 0x0354 */ sra %o0,0,%g5 +/* 0x0358 */ add %g4,%g2,%g4 +/* 0x035c */ sllx %g5,2,%g2 +/* 0x0360 */ and %g4,%g3,%g3 +/* 0x0364 */ st %g3,[%g1+%g2] +/* 0x0368 */ ret ! Result = +/* 0x036c */ restore %g0,%g0,%g0 +/* 0x0370 0 */ .type conv_d16_to_i32,2 +/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g3 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2 +/* 0x0020 147 */ or %g0,%i2,%o1 +/* 0x0024 152 */ sethi %hi(0xfc00),%g3 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2 +/* 0x002c 152 */ or %g0,%o1,%g4 +/* 0x0030 156 */ ldx [%o0+%g2],%o5 +/* 0x0034 152 */ add %g3,1023,%g1 +/* 0x0038 147 */ or %g0,%i1,%o7 +/* 0x003c 152 */ or %g0,0,%i2 +/* 0x0040 */ sub %o1,1,%g5 +/* 0x0044 */ or %g0,0,%g3 +/* 0x0048 */ or %g0,1,%g2 +/* 0x004c 154 */ or %g0,0,%o2 +/* 0x0050 */ cmp %g4,6 +/* 0x0054 152 */ bl,pn %icc,.L77000154 +/* 0x0058 */ ldd [%o5],%f0 +/* 0x005c */ sub %o1,2,%o3 +/* 0x0060 */ or %g0,16,%o2 +/* 0x0064 154 */ ld [%i1],%o4 +/* 0x0068 156 */ or %g0,3,%g2 +/* 0x006c */ or %g0,2,%g3 +/* 0x0070 155 */ fmovs %f0,%f2 +/* 0x0074 156 */ or %g0,4,%i2 +/* 0x0078 155 */ and %o4,%g1,%o0 +/* 0x007c */ st %o0,[%sp+2227] +/* 0x0080 */ fmovs %f0,%f4 +/* 0x0084 156 */ srl %o4,16,%i4 +/* 0x0088 152 */ or %g0,12,%o4 +/* 0x008c */ or %g0,24,%o0 +/* 0x0090 155 */ ld [%sp+2227],%f3 +/* 0x0094 */ fsubd %f2,%f0,%f2 +/* 0x0098 */ std %f2,[%i0] +/* 0x009c 156 */ st %i4,[%sp+2223] +/* 0x00a0 154 */ ld [%o7+4],%o1 +/* 0x00a4 156 */ fmovs %f0,%f2 +/* 0x00a8 155 */ and %o1,%g1,%i1 +/* 0x00ac 156 */ ld [%sp+2223],%f3 +/* 0x00b0 */ srl %o1,16,%o1 +/* 0x00b4 */ fsubd %f2,%f0,%f2 +/* 0x00b8 */ std %f2,[%i0+8] +/* 0x00bc */ st %o1,[%sp+2223] +/* 0x00c0 155 */ st %i1,[%sp+2227] +/* 0x00c4 154 */ ld [%o7+8],%o1 +/* 0x00c8 156 */ fmovs %f0,%f2 +/* 0x00cc 155 */ and %o1,%g1,%g4 +/* 0x00d0 */ ld [%sp+2227],%f5 +/* 0x00d4 156 */ srl %o1,16,%o1 +/* 0x00d8 */ ld [%sp+2223],%f3 +/* 0x00dc */ st %o1,[%sp+2223] +/* 0x00e0 155 */ fsubd %f4,%f0,%f4 +/* 0x00e4 */ st %g4,[%sp+2227] +/* 0x00e8 156 */ fsubd %f2,%f0,%f2 +/* 0x00ec 154 */ ld [%o7+12],%o1 +/* 0x00f0 155 */ std %f4,[%i0+16] +/* 0x00f4 156 */ std %f2,[%i0+24] + .L900000306: +/* 0x00f8 155 */ ld [%sp+2227],%f5 +/* 0x00fc 156 */ add %i2,2,%i2 +/* 0x0100 */ add %g2,4,%g2 +/* 0x0104 */ ld [%sp+2223],%f3 +/* 0x0108 */ cmp %i2,%o3 +/* 0x010c */ add %g3,4,%g3 +/* 0x0110 155 */ and %o1,%g1,%g4 +/* 0x0114 156 */ srl %o1,16,%o1 +/* 0x0118 155 */ st %g4,[%sp+2227] +/* 0x011c 156 */ st %o1,[%sp+2223] +/* 0x0120 152 */ add %o4,4,%o1 +/* 0x0124 154 */ ld [%o7+%o1],%o4 +/* 0x0128 156 */ fmovs %f0,%f2 +/* 0x012c 155 */ fmovs %f0,%f4 +/* 0x0130 */ fsubd %f4,%f0,%f4 +/* 0x0134 152 */ add %o2,16,%o2 +/* 0x0138 156 */ fsubd %f2,%f0,%f2 +/* 0x013c 155 */ std %f4,[%i0+%o2] +/* 0x0140 152 */ add %o0,16,%o0 +/* 0x0144 156 */ std %f2,[%i0+%o0] +/* 0x0148 155 */ ld [%sp+2227],%f5 +/* 0x014c 156 */ ld [%sp+2223],%f3 +/* 0x0150 155 */ and %o4,%g1,%g4 +/* 0x0154 156 */ srl %o4,16,%o4 +/* 0x0158 155 */ st %g4,[%sp+2227] +/* 0x015c 156 */ st %o4,[%sp+2223] +/* 0x0160 152 */ add %o1,4,%o4 +/* 0x0164 154 */ ld [%o7+%o4],%o1 +/* 0x0168 156 */ fmovs %f0,%f2 +/* 0x016c 155 */ fmovs %f0,%f4 +/* 0x0170 */ fsubd %f4,%f0,%f4 +/* 0x0174 152 */ add %o2,16,%o2 +/* 0x0178 156 */ fsubd %f2,%f0,%f2 +/* 0x017c 155 */ std %f4,[%i0+%o2] +/* 0x0180 152 */ add %o0,16,%o0 +/* 0x0184 156 */ ble,pt %icc,.L900000306 +/* 0x0188 */ std %f2,[%i0+%o0] + .L900000309: +/* 0x018c 155 */ ld [%sp+2227],%f5 +/* 0x0190 156 */ fmovs %f0,%f2 +/* 0x0194 */ srl %o1,16,%o3 +/* 0x0198 */ ld [%sp+2223],%f3 +/* 0x019c 155 */ and %o1,%g1,%i1 +/* 0x01a0 152 */ add %o2,16,%g4 +/* 0x01a4 155 */ fmovs %f0,%f4 +/* 0x01a8 */ st %i1,[%sp+2227] +/* 0x01ac 152 */ add %o0,16,%o2 +/* 0x01b0 156 */ st %o3,[%sp+2223] +/* 0x01b4 154 */ sra %i2,0,%o3 +/* 0x01b8 152 */ add %g4,16,%o1 +/* 0x01bc 155 */ fsubd %f4,%f0,%f4 +/* 0x01c0 */ std %f4,[%i0+%g4] +/* 0x01c4 152 */ add %o0,32,%o0 +/* 0x01c8 156 */ fsubd %f2,%f0,%f2 +/* 0x01cc */ std %f2,[%i0+%o2] +/* 0x01d0 */ sllx %o3,2,%o2 +/* 0x01d4 155 */ ld [%sp+2227],%f5 +/* 0x01d8 156 */ cmp %i2,%g5 +/* 0x01dc */ add %g2,6,%g2 +/* 0x01e0 */ ld [%sp+2223],%f3 +/* 0x01e4 */ add %g3,6,%g3 +/* 0x01e8 155 */ fmovs %f0,%f4 +/* 0x01ec 156 */ fmovs %f0,%f2 +/* 0x01f0 155 */ fsubd %f4,%f0,%f4 +/* 0x01f4 */ std %f4,[%i0+%o1] +/* 0x01f8 156 */ fsubd %f2,%f0,%f0 +/* 0x01fc */ bg,pn %icc,.L77000150 +/* 0x0200 */ std %f0,[%i0+%o0] + .L77000154: +/* 0x0204 155 */ ldd [%o5],%f0 + .L900000311: +/* 0x0208 154 */ ld [%o7+%o2],%o0 +/* 0x020c 155 */ sra %g3,0,%o1 +/* 0x0210 */ fmovs %f0,%f2 +/* 0x0214 */ sllx %o1,3,%o2 +/* 0x0218 156 */ add %i2,1,%i2 +/* 0x021c 155 */ and %o0,%g1,%o1 +/* 0x0220 */ st %o1,[%sp+2227] +/* 0x0224 156 */ add %g3,2,%g3 +/* 0x0228 */ srl %o0,16,%o1 +/* 0x022c */ cmp %i2,%g5 +/* 0x0230 */ sra %g2,0,%o0 +/* 0x0234 */ add %g2,2,%g2 +/* 0x0238 */ sllx %o0,3,%o0 +/* 0x023c 155 */ ld [%sp+2227],%f3 +/* 0x0240 154 */ sra %i2,0,%o3 +/* 0x0244 155 */ fsubd %f2,%f0,%f2 +/* 0x0248 */ std %f2,[%i0+%o2] +/* 0x024c */ sllx %o3,2,%o2 +/* 0x0250 156 */ st %o1,[%sp+2223] +/* 0x0254 */ fmovs %f0,%f2 +/* 0x0258 */ ld [%sp+2223],%f3 +/* 0x025c */ fsubd %f2,%f0,%f0 +/* 0x0260 */ std %f0,[%i0+%o0] +/* 0x0264 */ ble,a,pt %icc,.L900000311 +/* 0x0268 */ ldd [%o5],%f0 + .L77000150: +/* 0x026c */ ret ! Result = +/* 0x0270 */ restore %g0,%g0,%g0 +/* 0x0274 0 */ .type conv_i32_to_d16,2 +/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-192,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2 +/* 0x0160 175 */ sethi %hi(0xfc00),%g3 +/* 0x0164 180 */ ldx [%o3+%g2],%g1 +/* 0x0168 175 */ sll %i2,1,%i4 +/* 0x016c */ sub %g5,%i2,%g4 +/* 0x0170 177 */ sra %i2,0,%o3 +/* 0x0174 175 */ add %g3,1023,%g3 +/* 0x0178 178 */ ldd [%g1],%f2 +/* 0x017c */ sllx %o3,2,%o2 +/* 0x0180 175 */ add %i4,1,%g2 +/* 0x0184 177 */ or %g0,%o3,%o1 +/* 0x0188 */ cmp %g4,6 +/* 0x018c 175 */ bl,pn %icc,.L77000161 +/* 0x0190 */ sra %i2,0,%o3 +/* 0x0194 177 */ or %g0,%o2,%o0 +/* 0x0198 178 */ ld [%i0+%o2],%f5 +/* 0x019c 179 */ fmovs %f2,%f8 +/* 0x01a0 175 */ add %o0,4,%o3 +/* 0x01a4 177 */ ld [%i0+%o0],%o7 +/* 0x01a8 180 */ fmovs %f2,%f6 +/* 0x01ac 178 */ fmovs %f2,%f4 +/* 0x01b0 */ sllx %o1,3,%o2 +/* 0x01b4 175 */ add %o3,4,%o5 +/* 0x01b8 179 */ sra %i4,0,%o0 +/* 0x01bc 175 */ add %o3,8,%o4 +/* 0x01c0 178 */ fsubd %f4,%f2,%f4 +/* 0x01c4 */ std %f4,[%i3+%o2] +/* 0x01c8 179 */ sllx %o0,3,%i5 +/* 0x01cc */ and %o7,%g3,%o0 +/* 0x01d0 */ st %o0,[%sp+2227] +/* 0x01d4 175 */ add %i5,16,%o1 +/* 0x01d8 180 */ srl %o7,16,%g4 +/* 0x01dc */ add %i2,1,%i2 +/* 0x01e0 */ sra %g2,0,%o0 +/* 0x01e4 175 */ add %o2,8,%o2 +/* 0x01e8 179 */ fmovs %f2,%f4 +/* 0x01ec 180 */ sllx %o0,3,%l0 +/* 0x01f0 */ add %i4,3,%g2 +/* 0x01f4 179 */ ld [%sp+2227],%f5 +/* 0x01f8 175 */ add %l0,16,%o0 +/* 0x01fc 180 */ add %i4,2,%i4 +/* 0x0200 175 */ sub %g5,1,%o7 +/* 0x0204 180 */ add %i2,3,%i2 +/* 0x0208 179 */ fsubd %f4,%f2,%f4 +/* 0x020c */ std %f4,[%i1+%i5] +/* 0x0210 180 */ st %g4,[%sp+2223] +/* 0x0214 177 */ ld [%i0+%o3],%i5 +/* 0x0218 180 */ fmovs %f2,%f4 +/* 0x021c */ srl %i5,16,%g4 +/* 0x0220 179 */ and %i5,%g3,%i5 +/* 0x0224 180 */ ld [%sp+2223],%f5 +/* 0x0228 */ fsubd %f4,%f2,%f4 +/* 0x022c */ std %f4,[%i1+%l0] +/* 0x0230 */ st %g4,[%sp+2223] +/* 0x0234 177 */ ld [%i0+%o5],%g4 +/* 0x0238 179 */ st %i5,[%sp+2227] +/* 0x023c 178 */ fmovs %f2,%f4 +/* 0x0240 180 */ srl %g4,16,%i5 +/* 0x0244 179 */ and %g4,%g3,%g4 +/* 0x0248 180 */ ld [%sp+2223],%f7 +/* 0x024c */ st %i5,[%sp+2223] +/* 0x0250 178 */ ld [%i0+%o3],%f5 +/* 0x0254 180 */ fsubd %f6,%f2,%f6 +/* 0x0258 177 */ ld [%i0+%o4],%o3 +/* 0x025c 178 */ fsubd %f4,%f2,%f4 +/* 0x0260 179 */ ld [%sp+2227],%f9 +/* 0x0264 180 */ ld [%sp+2223],%f1 +/* 0x0268 179 */ st %g4,[%sp+2227] +/* 0x026c */ fsubd %f8,%f2,%f8 +/* 0x0270 */ std %f8,[%i1+%o1] +/* 0x0274 180 */ std %f6,[%i1+%o0] +/* 0x0278 178 */ std %f4,[%i3+%o2] + .L900000411: +/* 0x027c 179 */ ld [%sp+2227],%f13 +/* 0x0280 180 */ srl %o3,16,%g4 +/* 0x0284 */ add %i2,2,%i2 +/* 0x0288 */ st %g4,[%sp+2223] +/* 0x028c */ cmp %i2,%o7 +/* 0x0290 */ add %g2,4,%g2 +/* 0x0294 178 */ ld [%i0+%o5],%f11 +/* 0x0298 180 */ add %i4,4,%i4 +/* 0x029c 175 */ add %o4,4,%o5 +/* 0x02a0 177 */ ld [%i0+%o5],%g4 +/* 0x02a4 179 */ and %o3,%g3,%o3 +/* 0x02a8 */ st %o3,[%sp+2227] +/* 0x02ac 180 */ fmovs %f2,%f0 +/* 0x02b0 179 */ fmovs %f2,%f12 +/* 0x02b4 180 */ fsubd %f0,%f2,%f8 +/* 0x02b8 179 */ fsubd %f12,%f2,%f4 +/* 0x02bc 175 */ add %o1,16,%o1 +/* 0x02c0 180 */ ld [%sp+2223],%f7 +/* 0x02c4 178 */ fmovs %f2,%f10 +/* 0x02c8 179 */ std %f4,[%i1+%o1] +/* 0x02cc 175 */ add %o0,16,%o0 +/* 0x02d0 178 */ fsubd %f10,%f2,%f4 +/* 0x02d4 175 */ add %o2,8,%o2 +/* 0x02d8 180 */ std %f8,[%i1+%o0] +/* 0x02dc 178 */ std %f4,[%i3+%o2] +/* 0x02e0 179 */ ld [%sp+2227],%f9 +/* 0x02e4 180 */ srl %g4,16,%o3 +/* 0x02e8 */ st %o3,[%sp+2223] +/* 0x02ec 178 */ ld [%i0+%o4],%f5 +/* 0x02f0 175 */ add %o4,8,%o4 +/* 0x02f4 177 */ ld [%i0+%o4],%o3 +/* 0x02f8 179 */ and %g4,%g3,%g4 +/* 0x02fc */ st %g4,[%sp+2227] +/* 0x0300 180 */ fmovs %f2,%f6 +/* 0x0304 179 */ fmovs %f2,%f8 +/* 0x0308 180 */ fsubd %f6,%f2,%f6 +/* 0x030c 179 */ fsubd %f8,%f2,%f8 +/* 0x0310 175 */ add %o1,16,%o1 +/* 0x0314 180 */ ld [%sp+2223],%f1 +/* 0x0318 178 */ fmovs %f2,%f4 +/* 0x031c 179 */ std %f8,[%i1+%o1] +/* 0x0320 175 */ add %o0,16,%o0 +/* 0x0324 178 */ fsubd %f4,%f2,%f4 +/* 0x0328 175 */ add %o2,8,%o2 +/* 0x032c 180 */ std %f6,[%i1+%o0] +/* 0x0330 */ bl,pt %icc,.L900000411 +/* 0x0334 */ std %f4,[%i3+%o2] + .L900000414: +/* 0x0338 180 */ srl %o3,16,%o7 +/* 0x033c */ st %o7,[%sp+2223] +/* 0x0340 179 */ fmovs %f2,%f12 +/* 0x0344 178 */ ld [%i0+%o5],%f11 +/* 0x0348 180 */ fmovs %f2,%f0 +/* 0x034c 179 */ and %o3,%g3,%g4 +/* 0x0350 180 */ fmovs %f2,%f6 +/* 0x0354 175 */ add %o1,16,%o3 +/* 0x0358 */ add %o0,16,%o7 +/* 0x035c 178 */ fmovs %f2,%f10 +/* 0x0360 175 */ add %o2,8,%o2 +/* 0x0364 */ add %o1,32,%o5 +/* 0x0368 179 */ ld [%sp+2227],%f13 +/* 0x036c 178 */ fmovs %f2,%f4 +/* 0x0370 175 */ add %o0,32,%o1 +/* 0x0374 180 */ ld [%sp+2223],%f7 +/* 0x0378 175 */ add %o2,8,%o0 +/* 0x037c 180 */ cmp %i2,%g5 +/* 0x0380 179 */ st %g4,[%sp+2227] +/* 0x0384 */ fsubd %f12,%f2,%f8 +/* 0x0388 180 */ add %g2,6,%g2 +/* 0x038c 179 */ std %f8,[%i1+%o3] +/* 0x0390 180 */ fsubd %f0,%f2,%f0 +/* 0x0394 177 */ sra %i2,0,%o3 +/* 0x0398 180 */ std %f0,[%i1+%o7] +/* 0x039c 178 */ fsubd %f10,%f2,%f0 +/* 0x03a0 180 */ add %i4,6,%i4 +/* 0x03a4 178 */ std %f0,[%i3+%o2] +/* 0x03a8 */ sllx %o3,2,%o2 +/* 0x03ac 179 */ ld [%sp+2227],%f9 +/* 0x03b0 178 */ ld [%i0+%o4],%f5 +/* 0x03b4 179 */ fmovs %f2,%f8 +/* 0x03b8 */ fsubd %f8,%f2,%f0 +/* 0x03bc */ std %f0,[%i1+%o5] +/* 0x03c0 180 */ fsubd %f6,%f2,%f0 +/* 0x03c4 */ std %f0,[%i1+%o1] +/* 0x03c8 178 */ fsubd %f4,%f2,%f0 +/* 0x03cc 180 */ bge,pn %icc,.L77000164 +/* 0x03d0 */ std %f0,[%i3+%o0] + .L77000161: +/* 0x03d4 178 */ ldd [%g1],%f2 + .L900000416: +/* 0x03d8 178 */ ld [%i0+%o2],%f5 +/* 0x03dc 179 */ sra %i4,0,%o0 +/* 0x03e0 180 */ add %i2,1,%i2 +/* 0x03e4 177 */ ld [%i0+%o2],%o1 +/* 0x03e8 178 */ sllx %o3,3,%o3 +/* 0x03ec 180 */ add %i4,2,%i4 +/* 0x03f0 178 */ fmovs %f2,%f4 +/* 0x03f4 179 */ sllx %o0,3,%o4 +/* 0x03f8 180 */ cmp %i2,%g5 +/* 0x03fc 179 */ and %o1,%g3,%o0 +/* 0x0400 178 */ fsubd %f4,%f2,%f0 +/* 0x0404 */ std %f0,[%i3+%o3] +/* 0x0408 180 */ srl %o1,16,%o1 +/* 0x040c 179 */ st %o0,[%sp+2227] +/* 0x0410 180 */ sra %g2,0,%o0 +/* 0x0414 */ add %g2,2,%g2 +/* 0x0418 177 */ sra %i2,0,%o3 +/* 0x041c 180 */ sllx %o0,3,%o0 +/* 0x0420 179 */ fmovs %f2,%f4 +/* 0x0424 */ sllx %o3,2,%o2 +/* 0x0428 */ ld [%sp+2227],%f5 +/* 0x042c */ fsubd %f4,%f2,%f0 +/* 0x0430 */ std %f0,[%i1+%o4] +/* 0x0434 180 */ st %o1,[%sp+2223] +/* 0x0438 */ fmovs %f2,%f4 +/* 0x043c */ ld [%sp+2223],%f5 +/* 0x0440 */ fsubd %f4,%f2,%f0 +/* 0x0444 */ std %f0,[%i1+%o0] +/* 0x0448 */ bl,a,pt %icc,.L900000416 +/* 0x044c */ ldd [%g1],%f2 + .L77000164: +/* 0x0450 */ ret ! Result = +/* 0x0454 */ restore %g0,%g0,%g0 +/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ save %sp,-176,%sp +/* 0x0004 */ or %g0,%i2,%o1 +/* 0x0008 */ or %g0,%i0,%i2 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x000c 190 */ sra %o1,0,%g2 +/* 0x0010 */ or %g0,-1,%o2 +/* 0x0014 */ sllx %g2,2,%g2 +/* 0x0018 */ ld [%i2+%g2],%g2 +/* 0x001c */ cmp %g2,0 +/* 0x0020 */ bleu,pn %icc,.L77000175 +/* 0x0024 */ or %g0,%i1,%i0 +/* 0x0028 */ ba .L900000511 +/* 0x002c */ cmp %o2,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0030 193 */ sub %o1,1,%o2 +/* 0x0034 */ cmp %o2,0 +/* 0x0038 */ bl,pn %icc,.L77000182 +/* 0x003c */ sra %o2,0,%g2 + .L900000510: + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x0040 195 */ sllx %g2,2,%g2 +/* 0x0044 */ sub %o2,1,%o0 +/* 0x0048 */ ld [%i1+%g2],%g3 +/* 0x004c */ ld [%i2+%g2],%g2 +/* 0x0050 */ cmp %g2,%g3 +/* 0x0054 */ bne,pn %icc,.L77000182 +/* 0x0058 */ nop +/* 0x005c 0 */ or %g0,%o0,%o2 +/* 0x0060 195 */ cmp %o0,0 +/* 0x0064 */ bge,pt %icc,.L900000510 +/* 0x0068 */ sra %o2,0,%g2 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x006c 198 */ cmp %o2,0 + .L900000511: +/* 0x0070 198 */ bl,pn %icc,.L77000198 +/* 0x0074 */ sra %o2,0,%g2 +/* 0x0078 */ sllx %g2,2,%g2 +/* 0x007c */ ld [%i1+%g2],%g3 +/* 0x0080 */ ld [%i2+%g2],%g2 +/* 0x0084 */ cmp %g2,%g3 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00c8 205 */ or %g0,5,%i1 +/* 0x00cc 203 */ ld [%i0],%o2 +/* 0x00d0 201 */ or %g0,8,%o5 +/* 0x00d4 */ or %g0,12,%o4 +/* 0x00d8 203 */ ld [%i0+4],%o3 +/* 0x00dc 201 */ or %g0,16,%g1 +/* 0x00e0 203 */ ld [%i2+4],%o0 +/* 0x00e4 201 */ sub %o1,%o2,%o1 +/* 0x00e8 203 */ ld [%i0+8],%i3 +/* 0x00ec 204 */ and %o1,%g2,%g5 +/* 0x00f0 */ st %g5,[%i2] +/* 0x00f4 205 */ srax %o1,32,%g5 +/* 0x00f8 201 */ sub %o0,%o3,%o0 +/* 0x00fc 203 */ ld [%i0+12],%o2 +/* 0x0100 201 */ add %o0,%g5,%o0 +/* 0x0104 204 */ and %o0,%g2,%g5 +/* 0x0108 */ st %g5,[%i2+4] +/* 0x010c 205 */ srax %o0,32,%o0 +/* 0x0110 203 */ ld [%i2+8],%o1 +/* 0x0114 */ ld [%i2+12],%o3 +/* 0x0118 201 */ sub %o1,%i3,%o1 + .L900000505: +/* 0x011c */ add %g1,4,%g3 +/* 0x0120 203 */ ld [%g1+%i2],%g5 +/* 0x0124 201 */ add %o1,%o0,%o0 +/* 0x0128 203 */ ld [%i0+%g1],%i3 +/* 0x012c 201 */ sub %o3,%o2,%o1 +/* 0x0130 204 */ and %o0,%g2,%o2 +/* 0x0134 */ st %o2,[%o5+%i2] +/* 0x0138 205 */ srax %o0,32,%o2 +/* 0x013c */ add %i1,4,%i1 +/* 0x0140 201 */ add %g1,8,%o5 +/* 0x0144 203 */ ld [%g3+%i2],%o0 +/* 0x0148 201 */ add %o1,%o2,%o1 +/* 0x014c 203 */ ld [%i0+%g3],%o3 +/* 0x0150 201 */ sub %g5,%i3,%o2 +/* 0x0154 204 */ and %o1,%g2,%g5 +/* 0x0158 */ st %g5,[%o4+%i2] +/* 0x015c 205 */ srax %o1,32,%g5 +/* 0x0160 */ cmp %i1,%o7 +/* 0x0164 201 */ add %g1,12,%o4 +/* 0x0168 203 */ ld [%o5+%i2],%o1 +/* 0x016c 201 */ add %o2,%g5,%o2 +/* 0x0170 203 */ ld [%i0+%o5],%i3 +/* 0x0174 201 */ sub %o0,%o3,%o0 +/* 0x0178 204 */ and %o2,%g2,%o3 +/* 0x017c */ st %o3,[%g1+%i2] +/* 0x0180 205 */ srax %o2,32,%g5 +/* 0x0184 203 */ ld [%o4+%i2],%o3 +/* 0x0188 201 */ add %g1,16,%g1 +/* 0x018c */ add %o0,%g5,%o0 +/* 0x0190 203 */ ld [%i0+%o4],%o2 +/* 0x0194 201 */ sub %o1,%i3,%o1 +/* 0x0198 204 */ and %o0,%g2,%g5 +/* 0x019c */ st %g5,[%g3+%i2] +/* 0x01a0 205 */ ble,pt %icc,.L900000505 +/* 0x01a4 */ srax %o0,32,%o0 + .L900000508: +/* 0x01a8 */ add %o1,%o0,%g3 +/* 0x01ac */ sub %o3,%o2,%o1 +/* 0x01b0 203 */ ld [%g1+%i2],%o0 +/* 0x01b4 */ ld [%i0+%g1],%o2 +/* 0x01b8 205 */ srax %g3,32,%o7 +/* 0x01bc 204 */ and %g3,%g2,%o3 +/* 0x01c0 201 */ add %o1,%o7,%o1 +/* 0x01c4 204 */ st %o3,[%o5+%i2] +/* 0x01c8 205 */ cmp %i1,%g4 +/* 0x01cc 201 */ sub %o0,%o2,%o0 +/* 0x01d0 204 */ and %o1,%g2,%o2 +/* 0x01d4 */ st %o2,[%o4+%i2] +/* 0x01d8 205 */ srax %o1,32,%o1 +/* 0x01dc 203 */ sra %i1,0,%o2 +/* 0x01e0 201 */ add %o0,%o1,%o0 +/* 0x01e4 205 */ srax %o0,32,%g5 +/* 0x01e8 204 */ and %o0,%g2,%o1 +/* 0x01ec */ st %o1,[%g1+%i2] +/* 0x01f0 205 */ bg,pn %icc,.L77000191 +/* 0x01f4 */ sllx %o2,2,%o1 + .L77000199: +/* 0x01f8 0 */ or %g0,%o1,%g1 + .L900000509: +/* 0x01fc 203 */ ld [%o1+%i2],%o0 +/* 0x0200 205 */ add %i1,1,%i1 +/* 0x0204 203 */ ld [%i0+%o1],%o1 +/* 0x0208 */ sra %i1,0,%o2 +/* 0x020c 205 */ cmp %i1,%g4 +/* 0x0210 203 */ add %g5,%o0,%o0 +/* 0x0214 */ sub %o0,%o1,%o0 +/* 0x0218 205 */ srax %o0,32,%g5 +/* 0x021c 204 */ and %o0,%g2,%o1 +/* 0x0220 */ st %o1,[%g1+%i2] +/* 0x0224 */ sllx %o2,2,%o1 +/* 0x0228 205 */ ble,pt %icc,.L900000509 +/* 0x022c */ or %g0,%o1,%g1 + .L77000191: +/* 0x0230 */ ret ! Result = +/* 0x0234 */ restore %g0,%g0,%g0 +/* 0x0238 0 */ .type adjust_montf_result,2 +/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +/* 000000 */ .skip 24 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-224,%sp + .L900000643: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x000c */ ldx [%fp+2223],%l0 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x0010 232 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ fmovd %f14,%f30 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 */ sll %l0,1,%o3 +/* 0x0024 223 */ add %g5,%o7,%o4 +/* 0x0028 232 */ sra %o3,0,%g5 +/* 0x002c */ ldx [%o4+%g2],%o7 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x0034 */ sethi %hi(TwoTo16),%g4 +/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x003c 232 */ ldd [%o7],%f0 +/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0044 223 */ or %g0,%i4,%o0 +/* 0x0048 232 */ sllx %g5,3,%g4 +/* 0x004c 239 */ ldx [%o4+%g2],%o5 +/* 0x0050 223 */ or %g0,%i5,%l3 +/* 0x0054 */ or %g0,%i0,%l2 +/* 0x0058 239 */ ldx [%o4+%g3],%o4 +/* 0x005c 234 */ cmp %l0,16 +/* 0x0060 232 */ std %f0,[%i2+%g4] +/* 0x0064 234 */ be,pn %icc,.L77000279 +/* 0x0068 */ or %g0,%i3,%l4 +/* 0x006c 236 */ sll %l0,2,%g2 +/* 0x0070 223 */ or %g0,%o0,%i5 +/* 0x0074 236 */ add %g2,2,%o0 +/* 0x0078 223 */ or %g0,%i1,%i4 +/* 0x007c 236 */ cmp %o0,0 +/* 0x0080 223 */ or %g0,%i2,%l1 +/* 0x0084 236 */ ble,a,pt %icc,.L900000657 +/* 0x0088 */ ldd [%i1],%f6 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;iused && (a)->dp[(a)->used - 1] == 0) \ + --((a)->used); \ + (a)->sign = (a)->used ? (a)->sign : ZPOS; \ + } + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +/* sqr macros only */ +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i) \ + : "%rax", "%rdx", "cc"); + +#define SQRADD2(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDSC(i, j) \ + __asm__( \ + "movq %3,%%rax \n\t" \ + "mulq %4 \n\t" \ + "movq %%rax,%0 \n\t" \ + "movq %%rdx,%1 \n\t" \ + "xorq %2,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDAC(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDDB \ + __asm__( \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + : "=&r"(c0), "=&r"(c1), "=&r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \ + : "cc"); + +void +s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(mp_digit)); + memcpy(at + 4, B->dp, 4 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); + MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); + MULADD(at[1], at[5]); + MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); + MULADD(at[1], at[6]); + MULADD(at[2], at[5]); + MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); + MULADD(at[2], at[6]); + MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); + MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(mp_digit)); + memcpy(at + 8, B->dp, 8 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); + MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); + MULADD(at[1], at[9]); + MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); + MULADD(at[1], at[10]); + MULADD(at[2], at[9]); + MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); + MULADD(at[1], at[11]); + MULADD(at[2], at[10]); + MULADD(at[3], at[9]); + MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); + MULADD(at[1], at[12]); + MULADD(at[2], at[11]); + MULADD(at[3], at[10]); + MULADD(at[4], at[9]); + MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); + MULADD(at[1], at[13]); + MULADD(at[2], at[12]); + MULADD(at[3], at[11]); + MULADD(at[4], at[10]); + MULADD(at[5], at[9]); + MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); + MULADD(at[1], at[14]); + MULADD(at[2], at[13]); + MULADD(at[3], at[12]); + MULADD(at[4], at[11]); + MULADD(at[5], at[10]); + MULADD(at[6], at[9]); + MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); + MULADD(at[2], at[14]); + MULADD(at[3], at[13]); + MULADD(at[4], at[12]); + MULADD(at[5], at[11]); + MULADD(at[6], at[10]); + MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); + MULADD(at[3], at[14]); + MULADD(at[4], at[13]); + MULADD(at[5], at[12]); + MULADD(at[6], at[11]); + MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); + MULADD(at[4], at[14]); + MULADD(at[5], at[13]); + MULADD(at[6], at[12]); + MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); + MULADD(at[5], at[14]); + MULADD(at[6], at[13]); + MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); + MULADD(at[6], at[14]); + MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); + MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(mp_digit)); + memcpy(at + 16, B->dp, 16 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); + MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); + MULADD(at[1], at[17]); + MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); + MULADD(at[1], at[18]); + MULADD(at[2], at[17]); + MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); + MULADD(at[1], at[19]); + MULADD(at[2], at[18]); + MULADD(at[3], at[17]); + MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); + MULADD(at[1], at[20]); + MULADD(at[2], at[19]); + MULADD(at[3], at[18]); + MULADD(at[4], at[17]); + MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); + MULADD(at[1], at[21]); + MULADD(at[2], at[20]); + MULADD(at[3], at[19]); + MULADD(at[4], at[18]); + MULADD(at[5], at[17]); + MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); + MULADD(at[1], at[22]); + MULADD(at[2], at[21]); + MULADD(at[3], at[20]); + MULADD(at[4], at[19]); + MULADD(at[5], at[18]); + MULADD(at[6], at[17]); + MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); + MULADD(at[1], at[23]); + MULADD(at[2], at[22]); + MULADD(at[3], at[21]); + MULADD(at[4], at[20]); + MULADD(at[5], at[19]); + MULADD(at[6], at[18]); + MULADD(at[7], at[17]); + MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); + MULADD(at[1], at[24]); + MULADD(at[2], at[23]); + MULADD(at[3], at[22]); + MULADD(at[4], at[21]); + MULADD(at[5], at[20]); + MULADD(at[6], at[19]); + MULADD(at[7], at[18]); + MULADD(at[8], at[17]); + MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); + MULADD(at[1], at[25]); + MULADD(at[2], at[24]); + MULADD(at[3], at[23]); + MULADD(at[4], at[22]); + MULADD(at[5], at[21]); + MULADD(at[6], at[20]); + MULADD(at[7], at[19]); + MULADD(at[8], at[18]); + MULADD(at[9], at[17]); + MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); + MULADD(at[1], at[26]); + MULADD(at[2], at[25]); + MULADD(at[3], at[24]); + MULADD(at[4], at[23]); + MULADD(at[5], at[22]); + MULADD(at[6], at[21]); + MULADD(at[7], at[20]); + MULADD(at[8], at[19]); + MULADD(at[9], at[18]); + MULADD(at[10], at[17]); + MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); + MULADD(at[1], at[27]); + MULADD(at[2], at[26]); + MULADD(at[3], at[25]); + MULADD(at[4], at[24]); + MULADD(at[5], at[23]); + MULADD(at[6], at[22]); + MULADD(at[7], at[21]); + MULADD(at[8], at[20]); + MULADD(at[9], at[19]); + MULADD(at[10], at[18]); + MULADD(at[11], at[17]); + MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); + MULADD(at[1], at[28]); + MULADD(at[2], at[27]); + MULADD(at[3], at[26]); + MULADD(at[4], at[25]); + MULADD(at[5], at[24]); + MULADD(at[6], at[23]); + MULADD(at[7], at[22]); + MULADD(at[8], at[21]); + MULADD(at[9], at[20]); + MULADD(at[10], at[19]); + MULADD(at[11], at[18]); + MULADD(at[12], at[17]); + MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); + MULADD(at[1], at[29]); + MULADD(at[2], at[28]); + MULADD(at[3], at[27]); + MULADD(at[4], at[26]); + MULADD(at[5], at[25]); + MULADD(at[6], at[24]); + MULADD(at[7], at[23]); + MULADD(at[8], at[22]); + MULADD(at[9], at[21]); + MULADD(at[10], at[20]); + MULADD(at[11], at[19]); + MULADD(at[12], at[18]); + MULADD(at[13], at[17]); + MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); + MULADD(at[1], at[30]); + MULADD(at[2], at[29]); + MULADD(at[3], at[28]); + MULADD(at[4], at[27]); + MULADD(at[5], at[26]); + MULADD(at[6], at[25]); + MULADD(at[7], at[24]); + MULADD(at[8], at[23]); + MULADD(at[9], at[22]); + MULADD(at[10], at[21]); + MULADD(at[11], at[20]); + MULADD(at[12], at[19]); + MULADD(at[13], at[18]); + MULADD(at[14], at[17]); + MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); + MULADD(at[2], at[30]); + MULADD(at[3], at[29]); + MULADD(at[4], at[28]); + MULADD(at[5], at[27]); + MULADD(at[6], at[26]); + MULADD(at[7], at[25]); + MULADD(at[8], at[24]); + MULADD(at[9], at[23]); + MULADD(at[10], at[22]); + MULADD(at[11], at[21]); + MULADD(at[12], at[20]); + MULADD(at[13], at[19]); + MULADD(at[14], at[18]); + MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); + MULADD(at[3], at[30]); + MULADD(at[4], at[29]); + MULADD(at[5], at[28]); + MULADD(at[6], at[27]); + MULADD(at[7], at[26]); + MULADD(at[8], at[25]); + MULADD(at[9], at[24]); + MULADD(at[10], at[23]); + MULADD(at[11], at[22]); + MULADD(at[12], at[21]); + MULADD(at[13], at[20]); + MULADD(at[14], at[19]); + MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); + MULADD(at[4], at[30]); + MULADD(at[5], at[29]); + MULADD(at[6], at[28]); + MULADD(at[7], at[27]); + MULADD(at[8], at[26]); + MULADD(at[9], at[25]); + MULADD(at[10], at[24]); + MULADD(at[11], at[23]); + MULADD(at[12], at[22]); + MULADD(at[13], at[21]); + MULADD(at[14], at[20]); + MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); + MULADD(at[5], at[30]); + MULADD(at[6], at[29]); + MULADD(at[7], at[28]); + MULADD(at[8], at[27]); + MULADD(at[9], at[26]); + MULADD(at[10], at[25]); + MULADD(at[11], at[24]); + MULADD(at[12], at[23]); + MULADD(at[13], at[22]); + MULADD(at[14], at[21]); + MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); + MULADD(at[6], at[30]); + MULADD(at[7], at[29]); + MULADD(at[8], at[28]); + MULADD(at[9], at[27]); + MULADD(at[10], at[26]); + MULADD(at[11], at[25]); + MULADD(at[12], at[24]); + MULADD(at[13], at[23]); + MULADD(at[14], at[22]); + MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); + MULADD(at[7], at[30]); + MULADD(at[8], at[29]); + MULADD(at[9], at[28]); + MULADD(at[10], at[27]); + MULADD(at[11], at[26]); + MULADD(at[12], at[25]); + MULADD(at[13], at[24]); + MULADD(at[14], at[23]); + MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); + MULADD(at[8], at[30]); + MULADD(at[9], at[29]); + MULADD(at[10], at[28]); + MULADD(at[11], at[27]); + MULADD(at[12], at[26]); + MULADD(at[13], at[25]); + MULADD(at[14], at[24]); + MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); + MULADD(at[9], at[30]); + MULADD(at[10], at[29]); + MULADD(at[11], at[28]); + MULADD(at[12], at[27]); + MULADD(at[13], at[26]); + MULADD(at[14], at[25]); + MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); + MULADD(at[10], at[30]); + MULADD(at[11], at[29]); + MULADD(at[12], at[28]); + MULADD(at[13], at[27]); + MULADD(at[14], at[26]); + MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); + MULADD(at[11], at[30]); + MULADD(at[12], at[29]); + MULADD(at[13], at[28]); + MULADD(at[14], at[27]); + MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); + MULADD(at[12], at[30]); + MULADD(at[13], at[29]); + MULADD(at[14], at[28]); + MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); + MULADD(at[13], at[30]); + MULADD(at[14], at[29]); + MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); + MULADD(at[14], at[30]); + MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); + MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(mp_digit)); + memcpy(at + 32, B->dp, 32 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); + MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); + MULADD(at[1], at[33]); + MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); + MULADD(at[1], at[34]); + MULADD(at[2], at[33]); + MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); + MULADD(at[1], at[35]); + MULADD(at[2], at[34]); + MULADD(at[3], at[33]); + MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); + MULADD(at[1], at[36]); + MULADD(at[2], at[35]); + MULADD(at[3], at[34]); + MULADD(at[4], at[33]); + MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); + MULADD(at[1], at[37]); + MULADD(at[2], at[36]); + MULADD(at[3], at[35]); + MULADD(at[4], at[34]); + MULADD(at[5], at[33]); + MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); + MULADD(at[1], at[38]); + MULADD(at[2], at[37]); + MULADD(at[3], at[36]); + MULADD(at[4], at[35]); + MULADD(at[5], at[34]); + MULADD(at[6], at[33]); + MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); + MULADD(at[1], at[39]); + MULADD(at[2], at[38]); + MULADD(at[3], at[37]); + MULADD(at[4], at[36]); + MULADD(at[5], at[35]); + MULADD(at[6], at[34]); + MULADD(at[7], at[33]); + MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); + MULADD(at[1], at[40]); + MULADD(at[2], at[39]); + MULADD(at[3], at[38]); + MULADD(at[4], at[37]); + MULADD(at[5], at[36]); + MULADD(at[6], at[35]); + MULADD(at[7], at[34]); + MULADD(at[8], at[33]); + MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); + MULADD(at[1], at[41]); + MULADD(at[2], at[40]); + MULADD(at[3], at[39]); + MULADD(at[4], at[38]); + MULADD(at[5], at[37]); + MULADD(at[6], at[36]); + MULADD(at[7], at[35]); + MULADD(at[8], at[34]); + MULADD(at[9], at[33]); + MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); + MULADD(at[1], at[42]); + MULADD(at[2], at[41]); + MULADD(at[3], at[40]); + MULADD(at[4], at[39]); + MULADD(at[5], at[38]); + MULADD(at[6], at[37]); + MULADD(at[7], at[36]); + MULADD(at[8], at[35]); + MULADD(at[9], at[34]); + MULADD(at[10], at[33]); + MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); + MULADD(at[1], at[43]); + MULADD(at[2], at[42]); + MULADD(at[3], at[41]); + MULADD(at[4], at[40]); + MULADD(at[5], at[39]); + MULADD(at[6], at[38]); + MULADD(at[7], at[37]); + MULADD(at[8], at[36]); + MULADD(at[9], at[35]); + MULADD(at[10], at[34]); + MULADD(at[11], at[33]); + MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); + MULADD(at[1], at[44]); + MULADD(at[2], at[43]); + MULADD(at[3], at[42]); + MULADD(at[4], at[41]); + MULADD(at[5], at[40]); + MULADD(at[6], at[39]); + MULADD(at[7], at[38]); + MULADD(at[8], at[37]); + MULADD(at[9], at[36]); + MULADD(at[10], at[35]); + MULADD(at[11], at[34]); + MULADD(at[12], at[33]); + MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); + MULADD(at[1], at[45]); + MULADD(at[2], at[44]); + MULADD(at[3], at[43]); + MULADD(at[4], at[42]); + MULADD(at[5], at[41]); + MULADD(at[6], at[40]); + MULADD(at[7], at[39]); + MULADD(at[8], at[38]); + MULADD(at[9], at[37]); + MULADD(at[10], at[36]); + MULADD(at[11], at[35]); + MULADD(at[12], at[34]); + MULADD(at[13], at[33]); + MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); + MULADD(at[1], at[46]); + MULADD(at[2], at[45]); + MULADD(at[3], at[44]); + MULADD(at[4], at[43]); + MULADD(at[5], at[42]); + MULADD(at[6], at[41]); + MULADD(at[7], at[40]); + MULADD(at[8], at[39]); + MULADD(at[9], at[38]); + MULADD(at[10], at[37]); + MULADD(at[11], at[36]); + MULADD(at[12], at[35]); + MULADD(at[13], at[34]); + MULADD(at[14], at[33]); + MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); + MULADD(at[1], at[47]); + MULADD(at[2], at[46]); + MULADD(at[3], at[45]); + MULADD(at[4], at[44]); + MULADD(at[5], at[43]); + MULADD(at[6], at[42]); + MULADD(at[7], at[41]); + MULADD(at[8], at[40]); + MULADD(at[9], at[39]); + MULADD(at[10], at[38]); + MULADD(at[11], at[37]); + MULADD(at[12], at[36]); + MULADD(at[13], at[35]); + MULADD(at[14], at[34]); + MULADD(at[15], at[33]); + MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); + MULADD(at[1], at[48]); + MULADD(at[2], at[47]); + MULADD(at[3], at[46]); + MULADD(at[4], at[45]); + MULADD(at[5], at[44]); + MULADD(at[6], at[43]); + MULADD(at[7], at[42]); + MULADD(at[8], at[41]); + MULADD(at[9], at[40]); + MULADD(at[10], at[39]); + MULADD(at[11], at[38]); + MULADD(at[12], at[37]); + MULADD(at[13], at[36]); + MULADD(at[14], at[35]); + MULADD(at[15], at[34]); + MULADD(at[16], at[33]); + MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); + MULADD(at[1], at[49]); + MULADD(at[2], at[48]); + MULADD(at[3], at[47]); + MULADD(at[4], at[46]); + MULADD(at[5], at[45]); + MULADD(at[6], at[44]); + MULADD(at[7], at[43]); + MULADD(at[8], at[42]); + MULADD(at[9], at[41]); + MULADD(at[10], at[40]); + MULADD(at[11], at[39]); + MULADD(at[12], at[38]); + MULADD(at[13], at[37]); + MULADD(at[14], at[36]); + MULADD(at[15], at[35]); + MULADD(at[16], at[34]); + MULADD(at[17], at[33]); + MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); + MULADD(at[1], at[50]); + MULADD(at[2], at[49]); + MULADD(at[3], at[48]); + MULADD(at[4], at[47]); + MULADD(at[5], at[46]); + MULADD(at[6], at[45]); + MULADD(at[7], at[44]); + MULADD(at[8], at[43]); + MULADD(at[9], at[42]); + MULADD(at[10], at[41]); + MULADD(at[11], at[40]); + MULADD(at[12], at[39]); + MULADD(at[13], at[38]); + MULADD(at[14], at[37]); + MULADD(at[15], at[36]); + MULADD(at[16], at[35]); + MULADD(at[17], at[34]); + MULADD(at[18], at[33]); + MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); + MULADD(at[1], at[51]); + MULADD(at[2], at[50]); + MULADD(at[3], at[49]); + MULADD(at[4], at[48]); + MULADD(at[5], at[47]); + MULADD(at[6], at[46]); + MULADD(at[7], at[45]); + MULADD(at[8], at[44]); + MULADD(at[9], at[43]); + MULADD(at[10], at[42]); + MULADD(at[11], at[41]); + MULADD(at[12], at[40]); + MULADD(at[13], at[39]); + MULADD(at[14], at[38]); + MULADD(at[15], at[37]); + MULADD(at[16], at[36]); + MULADD(at[17], at[35]); + MULADD(at[18], at[34]); + MULADD(at[19], at[33]); + MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); + MULADD(at[1], at[52]); + MULADD(at[2], at[51]); + MULADD(at[3], at[50]); + MULADD(at[4], at[49]); + MULADD(at[5], at[48]); + MULADD(at[6], at[47]); + MULADD(at[7], at[46]); + MULADD(at[8], at[45]); + MULADD(at[9], at[44]); + MULADD(at[10], at[43]); + MULADD(at[11], at[42]); + MULADD(at[12], at[41]); + MULADD(at[13], at[40]); + MULADD(at[14], at[39]); + MULADD(at[15], at[38]); + MULADD(at[16], at[37]); + MULADD(at[17], at[36]); + MULADD(at[18], at[35]); + MULADD(at[19], at[34]); + MULADD(at[20], at[33]); + MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); + MULADD(at[1], at[53]); + MULADD(at[2], at[52]); + MULADD(at[3], at[51]); + MULADD(at[4], at[50]); + MULADD(at[5], at[49]); + MULADD(at[6], at[48]); + MULADD(at[7], at[47]); + MULADD(at[8], at[46]); + MULADD(at[9], at[45]); + MULADD(at[10], at[44]); + MULADD(at[11], at[43]); + MULADD(at[12], at[42]); + MULADD(at[13], at[41]); + MULADD(at[14], at[40]); + MULADD(at[15], at[39]); + MULADD(at[16], at[38]); + MULADD(at[17], at[37]); + MULADD(at[18], at[36]); + MULADD(at[19], at[35]); + MULADD(at[20], at[34]); + MULADD(at[21], at[33]); + MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); + MULADD(at[1], at[54]); + MULADD(at[2], at[53]); + MULADD(at[3], at[52]); + MULADD(at[4], at[51]); + MULADD(at[5], at[50]); + MULADD(at[6], at[49]); + MULADD(at[7], at[48]); + MULADD(at[8], at[47]); + MULADD(at[9], at[46]); + MULADD(at[10], at[45]); + MULADD(at[11], at[44]); + MULADD(at[12], at[43]); + MULADD(at[13], at[42]); + MULADD(at[14], at[41]); + MULADD(at[15], at[40]); + MULADD(at[16], at[39]); + MULADD(at[17], at[38]); + MULADD(at[18], at[37]); + MULADD(at[19], at[36]); + MULADD(at[20], at[35]); + MULADD(at[21], at[34]); + MULADD(at[22], at[33]); + MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); + MULADD(at[1], at[55]); + MULADD(at[2], at[54]); + MULADD(at[3], at[53]); + MULADD(at[4], at[52]); + MULADD(at[5], at[51]); + MULADD(at[6], at[50]); + MULADD(at[7], at[49]); + MULADD(at[8], at[48]); + MULADD(at[9], at[47]); + MULADD(at[10], at[46]); + MULADD(at[11], at[45]); + MULADD(at[12], at[44]); + MULADD(at[13], at[43]); + MULADD(at[14], at[42]); + MULADD(at[15], at[41]); + MULADD(at[16], at[40]); + MULADD(at[17], at[39]); + MULADD(at[18], at[38]); + MULADD(at[19], at[37]); + MULADD(at[20], at[36]); + MULADD(at[21], at[35]); + MULADD(at[22], at[34]); + MULADD(at[23], at[33]); + MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); + MULADD(at[1], at[56]); + MULADD(at[2], at[55]); + MULADD(at[3], at[54]); + MULADD(at[4], at[53]); + MULADD(at[5], at[52]); + MULADD(at[6], at[51]); + MULADD(at[7], at[50]); + MULADD(at[8], at[49]); + MULADD(at[9], at[48]); + MULADD(at[10], at[47]); + MULADD(at[11], at[46]); + MULADD(at[12], at[45]); + MULADD(at[13], at[44]); + MULADD(at[14], at[43]); + MULADD(at[15], at[42]); + MULADD(at[16], at[41]); + MULADD(at[17], at[40]); + MULADD(at[18], at[39]); + MULADD(at[19], at[38]); + MULADD(at[20], at[37]); + MULADD(at[21], at[36]); + MULADD(at[22], at[35]); + MULADD(at[23], at[34]); + MULADD(at[24], at[33]); + MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); + MULADD(at[1], at[57]); + MULADD(at[2], at[56]); + MULADD(at[3], at[55]); + MULADD(at[4], at[54]); + MULADD(at[5], at[53]); + MULADD(at[6], at[52]); + MULADD(at[7], at[51]); + MULADD(at[8], at[50]); + MULADD(at[9], at[49]); + MULADD(at[10], at[48]); + MULADD(at[11], at[47]); + MULADD(at[12], at[46]); + MULADD(at[13], at[45]); + MULADD(at[14], at[44]); + MULADD(at[15], at[43]); + MULADD(at[16], at[42]); + MULADD(at[17], at[41]); + MULADD(at[18], at[40]); + MULADD(at[19], at[39]); + MULADD(at[20], at[38]); + MULADD(at[21], at[37]); + MULADD(at[22], at[36]); + MULADD(at[23], at[35]); + MULADD(at[24], at[34]); + MULADD(at[25], at[33]); + MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); + MULADD(at[1], at[58]); + MULADD(at[2], at[57]); + MULADD(at[3], at[56]); + MULADD(at[4], at[55]); + MULADD(at[5], at[54]); + MULADD(at[6], at[53]); + MULADD(at[7], at[52]); + MULADD(at[8], at[51]); + MULADD(at[9], at[50]); + MULADD(at[10], at[49]); + MULADD(at[11], at[48]); + MULADD(at[12], at[47]); + MULADD(at[13], at[46]); + MULADD(at[14], at[45]); + MULADD(at[15], at[44]); + MULADD(at[16], at[43]); + MULADD(at[17], at[42]); + MULADD(at[18], at[41]); + MULADD(at[19], at[40]); + MULADD(at[20], at[39]); + MULADD(at[21], at[38]); + MULADD(at[22], at[37]); + MULADD(at[23], at[36]); + MULADD(at[24], at[35]); + MULADD(at[25], at[34]); + MULADD(at[26], at[33]); + MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); + MULADD(at[1], at[59]); + MULADD(at[2], at[58]); + MULADD(at[3], at[57]); + MULADD(at[4], at[56]); + MULADD(at[5], at[55]); + MULADD(at[6], at[54]); + MULADD(at[7], at[53]); + MULADD(at[8], at[52]); + MULADD(at[9], at[51]); + MULADD(at[10], at[50]); + MULADD(at[11], at[49]); + MULADD(at[12], at[48]); + MULADD(at[13], at[47]); + MULADD(at[14], at[46]); + MULADD(at[15], at[45]); + MULADD(at[16], at[44]); + MULADD(at[17], at[43]); + MULADD(at[18], at[42]); + MULADD(at[19], at[41]); + MULADD(at[20], at[40]); + MULADD(at[21], at[39]); + MULADD(at[22], at[38]); + MULADD(at[23], at[37]); + MULADD(at[24], at[36]); + MULADD(at[25], at[35]); + MULADD(at[26], at[34]); + MULADD(at[27], at[33]); + MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); + MULADD(at[1], at[60]); + MULADD(at[2], at[59]); + MULADD(at[3], at[58]); + MULADD(at[4], at[57]); + MULADD(at[5], at[56]); + MULADD(at[6], at[55]); + MULADD(at[7], at[54]); + MULADD(at[8], at[53]); + MULADD(at[9], at[52]); + MULADD(at[10], at[51]); + MULADD(at[11], at[50]); + MULADD(at[12], at[49]); + MULADD(at[13], at[48]); + MULADD(at[14], at[47]); + MULADD(at[15], at[46]); + MULADD(at[16], at[45]); + MULADD(at[17], at[44]); + MULADD(at[18], at[43]); + MULADD(at[19], at[42]); + MULADD(at[20], at[41]); + MULADD(at[21], at[40]); + MULADD(at[22], at[39]); + MULADD(at[23], at[38]); + MULADD(at[24], at[37]); + MULADD(at[25], at[36]); + MULADD(at[26], at[35]); + MULADD(at[27], at[34]); + MULADD(at[28], at[33]); + MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); + MULADD(at[1], at[61]); + MULADD(at[2], at[60]); + MULADD(at[3], at[59]); + MULADD(at[4], at[58]); + MULADD(at[5], at[57]); + MULADD(at[6], at[56]); + MULADD(at[7], at[55]); + MULADD(at[8], at[54]); + MULADD(at[9], at[53]); + MULADD(at[10], at[52]); + MULADD(at[11], at[51]); + MULADD(at[12], at[50]); + MULADD(at[13], at[49]); + MULADD(at[14], at[48]); + MULADD(at[15], at[47]); + MULADD(at[16], at[46]); + MULADD(at[17], at[45]); + MULADD(at[18], at[44]); + MULADD(at[19], at[43]); + MULADD(at[20], at[42]); + MULADD(at[21], at[41]); + MULADD(at[22], at[40]); + MULADD(at[23], at[39]); + MULADD(at[24], at[38]); + MULADD(at[25], at[37]); + MULADD(at[26], at[36]); + MULADD(at[27], at[35]); + MULADD(at[28], at[34]); + MULADD(at[29], at[33]); + MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); + MULADD(at[1], at[62]); + MULADD(at[2], at[61]); + MULADD(at[3], at[60]); + MULADD(at[4], at[59]); + MULADD(at[5], at[58]); + MULADD(at[6], at[57]); + MULADD(at[7], at[56]); + MULADD(at[8], at[55]); + MULADD(at[9], at[54]); + MULADD(at[10], at[53]); + MULADD(at[11], at[52]); + MULADD(at[12], at[51]); + MULADD(at[13], at[50]); + MULADD(at[14], at[49]); + MULADD(at[15], at[48]); + MULADD(at[16], at[47]); + MULADD(at[17], at[46]); + MULADD(at[18], at[45]); + MULADD(at[19], at[44]); + MULADD(at[20], at[43]); + MULADD(at[21], at[42]); + MULADD(at[22], at[41]); + MULADD(at[23], at[40]); + MULADD(at[24], at[39]); + MULADD(at[25], at[38]); + MULADD(at[26], at[37]); + MULADD(at[27], at[36]); + MULADD(at[28], at[35]); + MULADD(at[29], at[34]); + MULADD(at[30], at[33]); + MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); + MULADD(at[2], at[62]); + MULADD(at[3], at[61]); + MULADD(at[4], at[60]); + MULADD(at[5], at[59]); + MULADD(at[6], at[58]); + MULADD(at[7], at[57]); + MULADD(at[8], at[56]); + MULADD(at[9], at[55]); + MULADD(at[10], at[54]); + MULADD(at[11], at[53]); + MULADD(at[12], at[52]); + MULADD(at[13], at[51]); + MULADD(at[14], at[50]); + MULADD(at[15], at[49]); + MULADD(at[16], at[48]); + MULADD(at[17], at[47]); + MULADD(at[18], at[46]); + MULADD(at[19], at[45]); + MULADD(at[20], at[44]); + MULADD(at[21], at[43]); + MULADD(at[22], at[42]); + MULADD(at[23], at[41]); + MULADD(at[24], at[40]); + MULADD(at[25], at[39]); + MULADD(at[26], at[38]); + MULADD(at[27], at[37]); + MULADD(at[28], at[36]); + MULADD(at[29], at[35]); + MULADD(at[30], at[34]); + MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); + MULADD(at[3], at[62]); + MULADD(at[4], at[61]); + MULADD(at[5], at[60]); + MULADD(at[6], at[59]); + MULADD(at[7], at[58]); + MULADD(at[8], at[57]); + MULADD(at[9], at[56]); + MULADD(at[10], at[55]); + MULADD(at[11], at[54]); + MULADD(at[12], at[53]); + MULADD(at[13], at[52]); + MULADD(at[14], at[51]); + MULADD(at[15], at[50]); + MULADD(at[16], at[49]); + MULADD(at[17], at[48]); + MULADD(at[18], at[47]); + MULADD(at[19], at[46]); + MULADD(at[20], at[45]); + MULADD(at[21], at[44]); + MULADD(at[22], at[43]); + MULADD(at[23], at[42]); + MULADD(at[24], at[41]); + MULADD(at[25], at[40]); + MULADD(at[26], at[39]); + MULADD(at[27], at[38]); + MULADD(at[28], at[37]); + MULADD(at[29], at[36]); + MULADD(at[30], at[35]); + MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); + MULADD(at[4], at[62]); + MULADD(at[5], at[61]); + MULADD(at[6], at[60]); + MULADD(at[7], at[59]); + MULADD(at[8], at[58]); + MULADD(at[9], at[57]); + MULADD(at[10], at[56]); + MULADD(at[11], at[55]); + MULADD(at[12], at[54]); + MULADD(at[13], at[53]); + MULADD(at[14], at[52]); + MULADD(at[15], at[51]); + MULADD(at[16], at[50]); + MULADD(at[17], at[49]); + MULADD(at[18], at[48]); + MULADD(at[19], at[47]); + MULADD(at[20], at[46]); + MULADD(at[21], at[45]); + MULADD(at[22], at[44]); + MULADD(at[23], at[43]); + MULADD(at[24], at[42]); + MULADD(at[25], at[41]); + MULADD(at[26], at[40]); + MULADD(at[27], at[39]); + MULADD(at[28], at[38]); + MULADD(at[29], at[37]); + MULADD(at[30], at[36]); + MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); + MULADD(at[5], at[62]); + MULADD(at[6], at[61]); + MULADD(at[7], at[60]); + MULADD(at[8], at[59]); + MULADD(at[9], at[58]); + MULADD(at[10], at[57]); + MULADD(at[11], at[56]); + MULADD(at[12], at[55]); + MULADD(at[13], at[54]); + MULADD(at[14], at[53]); + MULADD(at[15], at[52]); + MULADD(at[16], at[51]); + MULADD(at[17], at[50]); + MULADD(at[18], at[49]); + MULADD(at[19], at[48]); + MULADD(at[20], at[47]); + MULADD(at[21], at[46]); + MULADD(at[22], at[45]); + MULADD(at[23], at[44]); + MULADD(at[24], at[43]); + MULADD(at[25], at[42]); + MULADD(at[26], at[41]); + MULADD(at[27], at[40]); + MULADD(at[28], at[39]); + MULADD(at[29], at[38]); + MULADD(at[30], at[37]); + MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); + MULADD(at[6], at[62]); + MULADD(at[7], at[61]); + MULADD(at[8], at[60]); + MULADD(at[9], at[59]); + MULADD(at[10], at[58]); + MULADD(at[11], at[57]); + MULADD(at[12], at[56]); + MULADD(at[13], at[55]); + MULADD(at[14], at[54]); + MULADD(at[15], at[53]); + MULADD(at[16], at[52]); + MULADD(at[17], at[51]); + MULADD(at[18], at[50]); + MULADD(at[19], at[49]); + MULADD(at[20], at[48]); + MULADD(at[21], at[47]); + MULADD(at[22], at[46]); + MULADD(at[23], at[45]); + MULADD(at[24], at[44]); + MULADD(at[25], at[43]); + MULADD(at[26], at[42]); + MULADD(at[27], at[41]); + MULADD(at[28], at[40]); + MULADD(at[29], at[39]); + MULADD(at[30], at[38]); + MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); + MULADD(at[7], at[62]); + MULADD(at[8], at[61]); + MULADD(at[9], at[60]); + MULADD(at[10], at[59]); + MULADD(at[11], at[58]); + MULADD(at[12], at[57]); + MULADD(at[13], at[56]); + MULADD(at[14], at[55]); + MULADD(at[15], at[54]); + MULADD(at[16], at[53]); + MULADD(at[17], at[52]); + MULADD(at[18], at[51]); + MULADD(at[19], at[50]); + MULADD(at[20], at[49]); + MULADD(at[21], at[48]); + MULADD(at[22], at[47]); + MULADD(at[23], at[46]); + MULADD(at[24], at[45]); + MULADD(at[25], at[44]); + MULADD(at[26], at[43]); + MULADD(at[27], at[42]); + MULADD(at[28], at[41]); + MULADD(at[29], at[40]); + MULADD(at[30], at[39]); + MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); + MULADD(at[8], at[62]); + MULADD(at[9], at[61]); + MULADD(at[10], at[60]); + MULADD(at[11], at[59]); + MULADD(at[12], at[58]); + MULADD(at[13], at[57]); + MULADD(at[14], at[56]); + MULADD(at[15], at[55]); + MULADD(at[16], at[54]); + MULADD(at[17], at[53]); + MULADD(at[18], at[52]); + MULADD(at[19], at[51]); + MULADD(at[20], at[50]); + MULADD(at[21], at[49]); + MULADD(at[22], at[48]); + MULADD(at[23], at[47]); + MULADD(at[24], at[46]); + MULADD(at[25], at[45]); + MULADD(at[26], at[44]); + MULADD(at[27], at[43]); + MULADD(at[28], at[42]); + MULADD(at[29], at[41]); + MULADD(at[30], at[40]); + MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); + MULADD(at[9], at[62]); + MULADD(at[10], at[61]); + MULADD(at[11], at[60]); + MULADD(at[12], at[59]); + MULADD(at[13], at[58]); + MULADD(at[14], at[57]); + MULADD(at[15], at[56]); + MULADD(at[16], at[55]); + MULADD(at[17], at[54]); + MULADD(at[18], at[53]); + MULADD(at[19], at[52]); + MULADD(at[20], at[51]); + MULADD(at[21], at[50]); + MULADD(at[22], at[49]); + MULADD(at[23], at[48]); + MULADD(at[24], at[47]); + MULADD(at[25], at[46]); + MULADD(at[26], at[45]); + MULADD(at[27], at[44]); + MULADD(at[28], at[43]); + MULADD(at[29], at[42]); + MULADD(at[30], at[41]); + MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); + MULADD(at[10], at[62]); + MULADD(at[11], at[61]); + MULADD(at[12], at[60]); + MULADD(at[13], at[59]); + MULADD(at[14], at[58]); + MULADD(at[15], at[57]); + MULADD(at[16], at[56]); + MULADD(at[17], at[55]); + MULADD(at[18], at[54]); + MULADD(at[19], at[53]); + MULADD(at[20], at[52]); + MULADD(at[21], at[51]); + MULADD(at[22], at[50]); + MULADD(at[23], at[49]); + MULADD(at[24], at[48]); + MULADD(at[25], at[47]); + MULADD(at[26], at[46]); + MULADD(at[27], at[45]); + MULADD(at[28], at[44]); + MULADD(at[29], at[43]); + MULADD(at[30], at[42]); + MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); + MULADD(at[11], at[62]); + MULADD(at[12], at[61]); + MULADD(at[13], at[60]); + MULADD(at[14], at[59]); + MULADD(at[15], at[58]); + MULADD(at[16], at[57]); + MULADD(at[17], at[56]); + MULADD(at[18], at[55]); + MULADD(at[19], at[54]); + MULADD(at[20], at[53]); + MULADD(at[21], at[52]); + MULADD(at[22], at[51]); + MULADD(at[23], at[50]); + MULADD(at[24], at[49]); + MULADD(at[25], at[48]); + MULADD(at[26], at[47]); + MULADD(at[27], at[46]); + MULADD(at[28], at[45]); + MULADD(at[29], at[44]); + MULADD(at[30], at[43]); + MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); + MULADD(at[12], at[62]); + MULADD(at[13], at[61]); + MULADD(at[14], at[60]); + MULADD(at[15], at[59]); + MULADD(at[16], at[58]); + MULADD(at[17], at[57]); + MULADD(at[18], at[56]); + MULADD(at[19], at[55]); + MULADD(at[20], at[54]); + MULADD(at[21], at[53]); + MULADD(at[22], at[52]); + MULADD(at[23], at[51]); + MULADD(at[24], at[50]); + MULADD(at[25], at[49]); + MULADD(at[26], at[48]); + MULADD(at[27], at[47]); + MULADD(at[28], at[46]); + MULADD(at[29], at[45]); + MULADD(at[30], at[44]); + MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); + MULADD(at[13], at[62]); + MULADD(at[14], at[61]); + MULADD(at[15], at[60]); + MULADD(at[16], at[59]); + MULADD(at[17], at[58]); + MULADD(at[18], at[57]); + MULADD(at[19], at[56]); + MULADD(at[20], at[55]); + MULADD(at[21], at[54]); + MULADD(at[22], at[53]); + MULADD(at[23], at[52]); + MULADD(at[24], at[51]); + MULADD(at[25], at[50]); + MULADD(at[26], at[49]); + MULADD(at[27], at[48]); + MULADD(at[28], at[47]); + MULADD(at[29], at[46]); + MULADD(at[30], at[45]); + MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); + MULADD(at[14], at[62]); + MULADD(at[15], at[61]); + MULADD(at[16], at[60]); + MULADD(at[17], at[59]); + MULADD(at[18], at[58]); + MULADD(at[19], at[57]); + MULADD(at[20], at[56]); + MULADD(at[21], at[55]); + MULADD(at[22], at[54]); + MULADD(at[23], at[53]); + MULADD(at[24], at[52]); + MULADD(at[25], at[51]); + MULADD(at[26], at[50]); + MULADD(at[27], at[49]); + MULADD(at[28], at[48]); + MULADD(at[29], at[47]); + MULADD(at[30], at[46]); + MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); + MULADD(at[15], at[62]); + MULADD(at[16], at[61]); + MULADD(at[17], at[60]); + MULADD(at[18], at[59]); + MULADD(at[19], at[58]); + MULADD(at[20], at[57]); + MULADD(at[21], at[56]); + MULADD(at[22], at[55]); + MULADD(at[23], at[54]); + MULADD(at[24], at[53]); + MULADD(at[25], at[52]); + MULADD(at[26], at[51]); + MULADD(at[27], at[50]); + MULADD(at[28], at[49]); + MULADD(at[29], at[48]); + MULADD(at[30], at[47]); + MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); + MULADD(at[16], at[62]); + MULADD(at[17], at[61]); + MULADD(at[18], at[60]); + MULADD(at[19], at[59]); + MULADD(at[20], at[58]); + MULADD(at[21], at[57]); + MULADD(at[22], at[56]); + MULADD(at[23], at[55]); + MULADD(at[24], at[54]); + MULADD(at[25], at[53]); + MULADD(at[26], at[52]); + MULADD(at[27], at[51]); + MULADD(at[28], at[50]); + MULADD(at[29], at[49]); + MULADD(at[30], at[48]); + MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); + MULADD(at[17], at[62]); + MULADD(at[18], at[61]); + MULADD(at[19], at[60]); + MULADD(at[20], at[59]); + MULADD(at[21], at[58]); + MULADD(at[22], at[57]); + MULADD(at[23], at[56]); + MULADD(at[24], at[55]); + MULADD(at[25], at[54]); + MULADD(at[26], at[53]); + MULADD(at[27], at[52]); + MULADD(at[28], at[51]); + MULADD(at[29], at[50]); + MULADD(at[30], at[49]); + MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); + MULADD(at[18], at[62]); + MULADD(at[19], at[61]); + MULADD(at[20], at[60]); + MULADD(at[21], at[59]); + MULADD(at[22], at[58]); + MULADD(at[23], at[57]); + MULADD(at[24], at[56]); + MULADD(at[25], at[55]); + MULADD(at[26], at[54]); + MULADD(at[27], at[53]); + MULADD(at[28], at[52]); + MULADD(at[29], at[51]); + MULADD(at[30], at[50]); + MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); + MULADD(at[19], at[62]); + MULADD(at[20], at[61]); + MULADD(at[21], at[60]); + MULADD(at[22], at[59]); + MULADD(at[23], at[58]); + MULADD(at[24], at[57]); + MULADD(at[25], at[56]); + MULADD(at[26], at[55]); + MULADD(at[27], at[54]); + MULADD(at[28], at[53]); + MULADD(at[29], at[52]); + MULADD(at[30], at[51]); + MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); + MULADD(at[20], at[62]); + MULADD(at[21], at[61]); + MULADD(at[22], at[60]); + MULADD(at[23], at[59]); + MULADD(at[24], at[58]); + MULADD(at[25], at[57]); + MULADD(at[26], at[56]); + MULADD(at[27], at[55]); + MULADD(at[28], at[54]); + MULADD(at[29], at[53]); + MULADD(at[30], at[52]); + MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); + MULADD(at[21], at[62]); + MULADD(at[22], at[61]); + MULADD(at[23], at[60]); + MULADD(at[24], at[59]); + MULADD(at[25], at[58]); + MULADD(at[26], at[57]); + MULADD(at[27], at[56]); + MULADD(at[28], at[55]); + MULADD(at[29], at[54]); + MULADD(at[30], at[53]); + MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); + MULADD(at[22], at[62]); + MULADD(at[23], at[61]); + MULADD(at[24], at[60]); + MULADD(at[25], at[59]); + MULADD(at[26], at[58]); + MULADD(at[27], at[57]); + MULADD(at[28], at[56]); + MULADD(at[29], at[55]); + MULADD(at[30], at[54]); + MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); + MULADD(at[23], at[62]); + MULADD(at[24], at[61]); + MULADD(at[25], at[60]); + MULADD(at[26], at[59]); + MULADD(at[27], at[58]); + MULADD(at[28], at[57]); + MULADD(at[29], at[56]); + MULADD(at[30], at[55]); + MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); + MULADD(at[24], at[62]); + MULADD(at[25], at[61]); + MULADD(at[26], at[60]); + MULADD(at[27], at[59]); + MULADD(at[28], at[58]); + MULADD(at[29], at[57]); + MULADD(at[30], at[56]); + MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); + MULADD(at[25], at[62]); + MULADD(at[26], at[61]); + MULADD(at[27], at[60]); + MULADD(at[28], at[59]); + MULADD(at[29], at[58]); + MULADD(at[30], at[57]); + MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); + MULADD(at[26], at[62]); + MULADD(at[27], at[61]); + MULADD(at[28], at[60]); + MULADD(at[29], at[59]); + MULADD(at[30], at[58]); + MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); + MULADD(at[27], at[62]); + MULADD(at[28], at[61]); + MULADD(at[29], at[60]); + MULADD(at[30], at[59]); + MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); + MULADD(at[28], at[62]); + MULADD(at[29], at[61]); + MULADD(at[30], at[60]); + MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); + MULADD(at[29], at[62]); + MULADD(at[30], at[61]); + MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); + MULADD(at[30], at[62]); + MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); + MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_sqr_comba_4(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[8], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = ZPOS; + memcpy(B->dp, b, 8 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_8(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); + SQRADD2(a[4], a[6]); + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); + SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = ZPOS; + memcpy(B->dp, b, 16 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_16(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); + SQRADD2(a[12], a[14]); + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); + SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = ZPOS; + memcpy(B->dp, b, 32 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_32(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); + SQRADDAC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); + SQRADDAC(a[1], a[16]); + SQRADDAC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); + SQRADDAC(a[1], a[17]); + SQRADDAC(a[2], a[16]); + SQRADDAC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); + SQRADDAC(a[1], a[18]); + SQRADDAC(a[2], a[17]); + SQRADDAC(a[3], a[16]); + SQRADDAC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); + SQRADDAC(a[1], a[19]); + SQRADDAC(a[2], a[18]); + SQRADDAC(a[3], a[17]); + SQRADDAC(a[4], a[16]); + SQRADDAC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); + SQRADDAC(a[1], a[20]); + SQRADDAC(a[2], a[19]); + SQRADDAC(a[3], a[18]); + SQRADDAC(a[4], a[17]); + SQRADDAC(a[5], a[16]); + SQRADDAC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); + SQRADDAC(a[1], a[21]); + SQRADDAC(a[2], a[20]); + SQRADDAC(a[3], a[19]); + SQRADDAC(a[4], a[18]); + SQRADDAC(a[5], a[17]); + SQRADDAC(a[6], a[16]); + SQRADDAC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); + SQRADDAC(a[1], a[22]); + SQRADDAC(a[2], a[21]); + SQRADDAC(a[3], a[20]); + SQRADDAC(a[4], a[19]); + SQRADDAC(a[5], a[18]); + SQRADDAC(a[6], a[17]); + SQRADDAC(a[7], a[16]); + SQRADDAC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); + SQRADDAC(a[1], a[23]); + SQRADDAC(a[2], a[22]); + SQRADDAC(a[3], a[21]); + SQRADDAC(a[4], a[20]); + SQRADDAC(a[5], a[19]); + SQRADDAC(a[6], a[18]); + SQRADDAC(a[7], a[17]); + SQRADDAC(a[8], a[16]); + SQRADDAC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); + SQRADDAC(a[1], a[24]); + SQRADDAC(a[2], a[23]); + SQRADDAC(a[3], a[22]); + SQRADDAC(a[4], a[21]); + SQRADDAC(a[5], a[20]); + SQRADDAC(a[6], a[19]); + SQRADDAC(a[7], a[18]); + SQRADDAC(a[8], a[17]); + SQRADDAC(a[9], a[16]); + SQRADDAC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); + SQRADDAC(a[1], a[25]); + SQRADDAC(a[2], a[24]); + SQRADDAC(a[3], a[23]); + SQRADDAC(a[4], a[22]); + SQRADDAC(a[5], a[21]); + SQRADDAC(a[6], a[20]); + SQRADDAC(a[7], a[19]); + SQRADDAC(a[8], a[18]); + SQRADDAC(a[9], a[17]); + SQRADDAC(a[10], a[16]); + SQRADDAC(a[11], a[15]); + SQRADDAC(a[12], a[14]); + SQRADDDB; + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); + SQRADDAC(a[1], a[26]); + SQRADDAC(a[2], a[25]); + SQRADDAC(a[3], a[24]); + SQRADDAC(a[4], a[23]); + SQRADDAC(a[5], a[22]); + SQRADDAC(a[6], a[21]); + SQRADDAC(a[7], a[20]); + SQRADDAC(a[8], a[19]); + SQRADDAC(a[9], a[18]); + SQRADDAC(a[10], a[17]); + SQRADDAC(a[11], a[16]); + SQRADDAC(a[12], a[15]); + SQRADDAC(a[13], a[14]); + SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); + SQRADDAC(a[1], a[27]); + SQRADDAC(a[2], a[26]); + SQRADDAC(a[3], a[25]); + SQRADDAC(a[4], a[24]); + SQRADDAC(a[5], a[23]); + SQRADDAC(a[6], a[22]); + SQRADDAC(a[7], a[21]); + SQRADDAC(a[8], a[20]); + SQRADDAC(a[9], a[19]); + SQRADDAC(a[10], a[18]); + SQRADDAC(a[11], a[17]); + SQRADDAC(a[12], a[16]); + SQRADDAC(a[13], a[15]); + SQRADDDB; + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); + SQRADDAC(a[1], a[28]); + SQRADDAC(a[2], a[27]); + SQRADDAC(a[3], a[26]); + SQRADDAC(a[4], a[25]); + SQRADDAC(a[5], a[24]); + SQRADDAC(a[6], a[23]); + SQRADDAC(a[7], a[22]); + SQRADDAC(a[8], a[21]); + SQRADDAC(a[9], a[20]); + SQRADDAC(a[10], a[19]); + SQRADDAC(a[11], a[18]); + SQRADDAC(a[12], a[17]); + SQRADDAC(a[13], a[16]); + SQRADDAC(a[14], a[15]); + SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); + SQRADDAC(a[1], a[29]); + SQRADDAC(a[2], a[28]); + SQRADDAC(a[3], a[27]); + SQRADDAC(a[4], a[26]); + SQRADDAC(a[5], a[25]); + SQRADDAC(a[6], a[24]); + SQRADDAC(a[7], a[23]); + SQRADDAC(a[8], a[22]); + SQRADDAC(a[9], a[21]); + SQRADDAC(a[10], a[20]); + SQRADDAC(a[11], a[19]); + SQRADDAC(a[12], a[18]); + SQRADDAC(a[13], a[17]); + SQRADDAC(a[14], a[16]); + SQRADDDB; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); + SQRADDAC(a[1], a[30]); + SQRADDAC(a[2], a[29]); + SQRADDAC(a[3], a[28]); + SQRADDAC(a[4], a[27]); + SQRADDAC(a[5], a[26]); + SQRADDAC(a[6], a[25]); + SQRADDAC(a[7], a[24]); + SQRADDAC(a[8], a[23]); + SQRADDAC(a[9], a[22]); + SQRADDAC(a[10], a[21]); + SQRADDAC(a[11], a[20]); + SQRADDAC(a[12], a[19]); + SQRADDAC(a[13], a[18]); + SQRADDAC(a[14], a[17]); + SQRADDAC(a[15], a[16]); + SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[31]); + SQRADDAC(a[2], a[30]); + SQRADDAC(a[3], a[29]); + SQRADDAC(a[4], a[28]); + SQRADDAC(a[5], a[27]); + SQRADDAC(a[6], a[26]); + SQRADDAC(a[7], a[25]); + SQRADDAC(a[8], a[24]); + SQRADDAC(a[9], a[23]); + SQRADDAC(a[10], a[22]); + SQRADDAC(a[11], a[21]); + SQRADDAC(a[12], a[20]); + SQRADDAC(a[13], a[19]); + SQRADDAC(a[14], a[18]); + SQRADDAC(a[15], a[17]); + SQRADDDB; + SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[31]); + SQRADDAC(a[3], a[30]); + SQRADDAC(a[4], a[29]); + SQRADDAC(a[5], a[28]); + SQRADDAC(a[6], a[27]); + SQRADDAC(a[7], a[26]); + SQRADDAC(a[8], a[25]); + SQRADDAC(a[9], a[24]); + SQRADDAC(a[10], a[23]); + SQRADDAC(a[11], a[22]); + SQRADDAC(a[12], a[21]); + SQRADDAC(a[13], a[20]); + SQRADDAC(a[14], a[19]); + SQRADDAC(a[15], a[18]); + SQRADDAC(a[16], a[17]); + SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[31]); + SQRADDAC(a[4], a[30]); + SQRADDAC(a[5], a[29]); + SQRADDAC(a[6], a[28]); + SQRADDAC(a[7], a[27]); + SQRADDAC(a[8], a[26]); + SQRADDAC(a[9], a[25]); + SQRADDAC(a[10], a[24]); + SQRADDAC(a[11], a[23]); + SQRADDAC(a[12], a[22]); + SQRADDAC(a[13], a[21]); + SQRADDAC(a[14], a[20]); + SQRADDAC(a[15], a[19]); + SQRADDAC(a[16], a[18]); + SQRADDDB; + SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[31]); + SQRADDAC(a[5], a[30]); + SQRADDAC(a[6], a[29]); + SQRADDAC(a[7], a[28]); + SQRADDAC(a[8], a[27]); + SQRADDAC(a[9], a[26]); + SQRADDAC(a[10], a[25]); + SQRADDAC(a[11], a[24]); + SQRADDAC(a[12], a[23]); + SQRADDAC(a[13], a[22]); + SQRADDAC(a[14], a[21]); + SQRADDAC(a[15], a[20]); + SQRADDAC(a[16], a[19]); + SQRADDAC(a[17], a[18]); + SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[31]); + SQRADDAC(a[6], a[30]); + SQRADDAC(a[7], a[29]); + SQRADDAC(a[8], a[28]); + SQRADDAC(a[9], a[27]); + SQRADDAC(a[10], a[26]); + SQRADDAC(a[11], a[25]); + SQRADDAC(a[12], a[24]); + SQRADDAC(a[13], a[23]); + SQRADDAC(a[14], a[22]); + SQRADDAC(a[15], a[21]); + SQRADDAC(a[16], a[20]); + SQRADDAC(a[17], a[19]); + SQRADDDB; + SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[31]); + SQRADDAC(a[7], a[30]); + SQRADDAC(a[8], a[29]); + SQRADDAC(a[9], a[28]); + SQRADDAC(a[10], a[27]); + SQRADDAC(a[11], a[26]); + SQRADDAC(a[12], a[25]); + SQRADDAC(a[13], a[24]); + SQRADDAC(a[14], a[23]); + SQRADDAC(a[15], a[22]); + SQRADDAC(a[16], a[21]); + SQRADDAC(a[17], a[20]); + SQRADDAC(a[18], a[19]); + SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[31]); + SQRADDAC(a[8], a[30]); + SQRADDAC(a[9], a[29]); + SQRADDAC(a[10], a[28]); + SQRADDAC(a[11], a[27]); + SQRADDAC(a[12], a[26]); + SQRADDAC(a[13], a[25]); + SQRADDAC(a[14], a[24]); + SQRADDAC(a[15], a[23]); + SQRADDAC(a[16], a[22]); + SQRADDAC(a[17], a[21]); + SQRADDAC(a[18], a[20]); + SQRADDDB; + SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[31]); + SQRADDAC(a[9], a[30]); + SQRADDAC(a[10], a[29]); + SQRADDAC(a[11], a[28]); + SQRADDAC(a[12], a[27]); + SQRADDAC(a[13], a[26]); + SQRADDAC(a[14], a[25]); + SQRADDAC(a[15], a[24]); + SQRADDAC(a[16], a[23]); + SQRADDAC(a[17], a[22]); + SQRADDAC(a[18], a[21]); + SQRADDAC(a[19], a[20]); + SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[31]); + SQRADDAC(a[10], a[30]); + SQRADDAC(a[11], a[29]); + SQRADDAC(a[12], a[28]); + SQRADDAC(a[13], a[27]); + SQRADDAC(a[14], a[26]); + SQRADDAC(a[15], a[25]); + SQRADDAC(a[16], a[24]); + SQRADDAC(a[17], a[23]); + SQRADDAC(a[18], a[22]); + SQRADDAC(a[19], a[21]); + SQRADDDB; + SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[31]); + SQRADDAC(a[11], a[30]); + SQRADDAC(a[12], a[29]); + SQRADDAC(a[13], a[28]); + SQRADDAC(a[14], a[27]); + SQRADDAC(a[15], a[26]); + SQRADDAC(a[16], a[25]); + SQRADDAC(a[17], a[24]); + SQRADDAC(a[18], a[23]); + SQRADDAC(a[19], a[22]); + SQRADDAC(a[20], a[21]); + SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[31]); + SQRADDAC(a[12], a[30]); + SQRADDAC(a[13], a[29]); + SQRADDAC(a[14], a[28]); + SQRADDAC(a[15], a[27]); + SQRADDAC(a[16], a[26]); + SQRADDAC(a[17], a[25]); + SQRADDAC(a[18], a[24]); + SQRADDAC(a[19], a[23]); + SQRADDAC(a[20], a[22]); + SQRADDDB; + SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[31]); + SQRADDAC(a[13], a[30]); + SQRADDAC(a[14], a[29]); + SQRADDAC(a[15], a[28]); + SQRADDAC(a[16], a[27]); + SQRADDAC(a[17], a[26]); + SQRADDAC(a[18], a[25]); + SQRADDAC(a[19], a[24]); + SQRADDAC(a[20], a[23]); + SQRADDAC(a[21], a[22]); + SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[31]); + SQRADDAC(a[14], a[30]); + SQRADDAC(a[15], a[29]); + SQRADDAC(a[16], a[28]); + SQRADDAC(a[17], a[27]); + SQRADDAC(a[18], a[26]); + SQRADDAC(a[19], a[25]); + SQRADDAC(a[20], a[24]); + SQRADDAC(a[21], a[23]); + SQRADDDB; + SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[31]); + SQRADDAC(a[15], a[30]); + SQRADDAC(a[16], a[29]); + SQRADDAC(a[17], a[28]); + SQRADDAC(a[18], a[27]); + SQRADDAC(a[19], a[26]); + SQRADDAC(a[20], a[25]); + SQRADDAC(a[21], a[24]); + SQRADDAC(a[22], a[23]); + SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[31]); + SQRADDAC(a[16], a[30]); + SQRADDAC(a[17], a[29]); + SQRADDAC(a[18], a[28]); + SQRADDAC(a[19], a[27]); + SQRADDAC(a[20], a[26]); + SQRADDAC(a[21], a[25]); + SQRADDAC(a[22], a[24]); + SQRADDDB; + SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[31]); + SQRADDAC(a[17], a[30]); + SQRADDAC(a[18], a[29]); + SQRADDAC(a[19], a[28]); + SQRADDAC(a[20], a[27]); + SQRADDAC(a[21], a[26]); + SQRADDAC(a[22], a[25]); + SQRADDAC(a[23], a[24]); + SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[31]); + SQRADDAC(a[18], a[30]); + SQRADDAC(a[19], a[29]); + SQRADDAC(a[20], a[28]); + SQRADDAC(a[21], a[27]); + SQRADDAC(a[22], a[26]); + SQRADDAC(a[23], a[25]); + SQRADDDB; + SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[31]); + SQRADDAC(a[19], a[30]); + SQRADDAC(a[20], a[29]); + SQRADDAC(a[21], a[28]); + SQRADDAC(a[22], a[27]); + SQRADDAC(a[23], a[26]); + SQRADDAC(a[24], a[25]); + SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[31]); + SQRADDAC(a[20], a[30]); + SQRADDAC(a[21], a[29]); + SQRADDAC(a[22], a[28]); + SQRADDAC(a[23], a[27]); + SQRADDAC(a[24], a[26]); + SQRADDDB; + SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[31]); + SQRADDAC(a[21], a[30]); + SQRADDAC(a[22], a[29]); + SQRADDAC(a[23], a[28]); + SQRADDAC(a[24], a[27]); + SQRADDAC(a[25], a[26]); + SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[31]); + SQRADDAC(a[22], a[30]); + SQRADDAC(a[23], a[29]); + SQRADDAC(a[24], a[28]); + SQRADDAC(a[25], a[27]); + SQRADDDB; + SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[31]); + SQRADDAC(a[23], a[30]); + SQRADDAC(a[24], a[29]); + SQRADDAC(a[25], a[28]); + SQRADDAC(a[26], a[27]); + SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[31]); + SQRADDAC(a[24], a[30]); + SQRADDAC(a[25], a[29]); + SQRADDAC(a[26], a[28]); + SQRADDDB; + SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[31]); + SQRADDAC(a[25], a[30]); + SQRADDAC(a[26], a[29]); + SQRADDAC(a[27], a[28]); + SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[31]); + SQRADDAC(a[26], a[30]); + SQRADDAC(a[27], a[29]); + SQRADDDB; + SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[31]); + SQRADDAC(a[27], a[30]); + SQRADDAC(a[28], a[29]); + SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); + SQRADD2(a[28], a[30]); + SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); + SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); + SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = ZPOS; + memcpy(B->dp, b, 64 * sizeof(mp_digit)); + mp_clamp(B); +} diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm new file mode 100644 index 0000000000..e50efa8de3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm @@ -0,0 +1,13066 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +;/* TomsFastMath, a fast ISO C bignum library. +; * +; * This project is meant to fill in where LibTomMath +; * falls short. That is speed ;-) +; * +; * This project is public domain and free for all purposes. +; * +; * Tom St Denis, tomstdenis@iahu.ca +; */ + +;/* +; * The source file from which this assembly was derived +; * comes from TFM v0.03, which has the above license. +; * This source was from mp_comba_amd64.sun.s and convert to +; * MASM code set. +; */ + +.CODE + +externdef memcpy:PROC + +public s_mp_mul_comba_4 +public s_mp_mul_comba_8 +public s_mp_mul_comba_16 +public s_mp_mul_comba_32 +public s_mp_sqr_comba_8 +public s_mp_sqr_comba_16 +public s_mp_sqr_comba_32 + + +; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + sub rsp, 64 + mov r9, qword ptr [16+rdi] + mov rbx, rdx + mov rdx, qword ptr [16+rsi] + mov rax, qword ptr [r9] + mov qword ptr [-64+64+rsp], rax + mov r8, qword ptr [8+r9] + mov qword ptr [-56+64+rsp], r8 + mov rbp, qword ptr [16+r9] + mov qword ptr [-48+64+rsp], rbp + mov r12, qword ptr [24+r9] + mov qword ptr [-40+64+rsp], r12 + mov rcx, qword ptr [rdx] + mov qword ptr [-32+64+rsp], rcx + mov r10, qword ptr [8+rdx] + mov qword ptr [-24+64+rsp], r10 + mov r11, qword ptr [16+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-16+64+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [24+rdx] + mov qword ptr [-8+64+rsp], rax + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-24+64+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-32+64+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-24+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-32+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [48+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [56+r11], rcx + mov dword ptr [8+rbx], 8 + jne L9 + ALIGN 16 +L18: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L9 + lea r10d, dword ptr [-2+rdx] + cmp qword ptr [r11+r10*8], 0 + je L18 +L9: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 64 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_4 ENDP + + +; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 8+128 + mov rdx, qword ptr [16+rdi] + mov r8, qword ptr [rdx] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-80+128+rsp], r10 + mov r11, qword ptr [48+rdx] + mov qword ptr [-72+128+rsp], r11 + mov rax, qword ptr [56+rdx] + mov rdx, qword ptr [16+rsi] + mov qword ptr [-64+128+rsp], rax + mov r8, qword ptr [rdx] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-16+128+rsp], r10 + mov r11, qword ptr [48+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-8+128+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [56+rdx] + mov qword ptr [128+rsp], rax + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-48+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-56+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [112+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [120+r11], rcx + mov dword ptr [8+rbx], 16 + jne L35 + ALIGN 16 +L43: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L35 + lea eax, dword ptr [-2+rdx] + cmp qword ptr [r11+rax*8], 0 + je L43 +L35: + mov r11d, dword ptr [8+rbx] + xor edx, edx + test r11d, r11d + cmovne edx, esi + mov dword ptr [rbx], edx + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_8 ENDP + + +; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); + + ALIGN 16 +s_mp_mul_comba_16 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 136+128 + mov rax, qword ptr [16+rdi] + mov r8, qword ptr [rax] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rax] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rax] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rax] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rax] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rax] + mov qword ptr [-80+128+rsp], r10 + mov rdx, qword ptr [48+rax] + mov qword ptr [-72+128+rsp], rdx + mov r11, qword ptr [56+rax] + mov qword ptr [-64+128+rsp], r11 + mov r8, qword ptr [64+rax] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [72+rax] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [80+rax] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [88+rax] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [96+rax] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [104+rax] + mov qword ptr [-16+128+rsp], r10 + mov rdx, qword ptr [112+rax] + mov qword ptr [-8+128+rsp], rdx + mov r11, qword ptr [120+rax] + mov qword ptr [128+rsp], r11 + mov r11, qword ptr [16+rsi] + mov r8, qword ptr [r11] + mov qword ptr [8+128+rsp], r8 + mov rbp, qword ptr [8+r11] + mov qword ptr [16+128+rsp], rbp + mov r9, qword ptr [16+r11] + mov qword ptr [24+128+rsp], r9 + mov r12, qword ptr [24+r11] + mov qword ptr [32+128+rsp], r12 + mov rcx, qword ptr [32+r11] + mov qword ptr [40+128+rsp], rcx + mov r10, qword ptr [40+r11] + mov qword ptr [48+128+rsp], r10 + mov rdx, qword ptr [48+r11] + mov qword ptr [56+128+rsp], rdx + mov rax, qword ptr [56+r11] + mov qword ptr [64+128+rsp], rax + mov r8, qword ptr [64+r11] + mov qword ptr [72+128+rsp], r8 + mov rbp, qword ptr [72+r11] + mov qword ptr [80+128+rsp], rbp + mov r9, qword ptr [80+r11] + mov qword ptr [88+128+rsp], r9 + mov r12, qword ptr [88+r11] + mov qword ptr [96+128+rsp], r12 + mov rcx, qword ptr [96+r11] + mov qword ptr [104+128+rsp], rcx + mov r10, qword ptr [104+r11] + mov qword ptr [112+128+rsp], r10 + mov rdx, qword ptr [112+r11] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [120+128+rsp], rdx + mov rax, qword ptr [120+r11] + mov qword ptr [128+128+rsp], rax + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [16+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [8+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [112+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [120+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [128+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [136+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [144+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [152+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [160+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [168+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [176+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [184+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [192+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [200+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [208+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [216+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [224+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [232+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [240+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [248+r11], rcx + mov dword ptr [8+rbx], 32 + jne L76 + ALIGN 16 +L84: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L76 + lea eax, dword ptr [-2+rdx] + cmp qword ptr [r11+rax*8], 0 + je L84 +L76: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 136+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_16 ENDP + +; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) + + + ALIGN 16 +s_mp_mul_comba_32 PROC ; a "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push rbp + mov rbp, rsp + push r13 + mov r13, rdx +; mov edx, 256 + mov r8d, 256 + push r12 + mov r12, rsi + push rbx + mov rbx, rdi + sub rsp, 520+32 ; +32 for "home" storage +; mov rsi, qword ptr [16+rdi] +; lea rdi, qword ptr [-544+rbp] + mov rdx, qword ptr [16+rdi] + lea rcx, qword ptr [-544+rbp] + call memcpy +; mov rsi, qword ptr [16+r12] +; lea rdi, qword ptr [-288+rbp] +; mov edx, 256 + mov rdx, qword ptr [16+r12] + lea rcx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov r9, qword ptr [16+r13] + xor r8d, r8d + mov rsi, r8 + mov rdi, r8 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [r9], rsi + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-280+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov r11, r10 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-288+rbp] + add rdi, rax + adc rsi, rdx + adc r11, 0 + mov qword ptr [8+r9], rdi + mov rdi, r11 + mov r11, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc r11, 0 + mov rcx, r11 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-528+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [16+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-520+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [24+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-512+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [32+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-504+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [40+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-496+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [48+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-488+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [56+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-480+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [64+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-472+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [72+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-464+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [80+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-456+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [88+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-448+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [96+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-440+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [104+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-432+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [112+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-424+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [120+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-416+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [128+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-408+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [136+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-400+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [144+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-392+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [152+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-384+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [160+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-376+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [168+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-368+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [176+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-360+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [184+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-352+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [192+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-344+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [200+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-336+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [208+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-328+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [216+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-320+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [224+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-312+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [232+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-304+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [240+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [248+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [256+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [264+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [272+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [280+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [288+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [296+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [304+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [312+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [320+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [328+r9], rcx + mov rdi, r11 + mov r11, r10 + mov r10, r8 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-40+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-48+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-56+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-64+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-72+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-80+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-88+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-96+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-104+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-112+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-120+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-128+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-136+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-144+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-152+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-160+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-168+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-176+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-184+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-192+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-200+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [336+r9], r11 + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-40+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov rcx, r10 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-48+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-56+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-64+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-72+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-80+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-88+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-96+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-104+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-112+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-120+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-128+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-136+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-144+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-152+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-160+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-168+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-176+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-184+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov r11, rsi + mov r10, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-192+rbp] + add rdi, rax + adc r11, rdx + adc r10, 0 + mov qword ptr [344+r9], rdi + mov rcx, r11 + mov rdi, r10 + mov r11, r8 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc r11, 0 + mov rsi, r11 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [352+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [360+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [368+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [376+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [384+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [392+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [400+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [408+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [416+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [424+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [432+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [440+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [448+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [456+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [464+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [472+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [480+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r11, rcx + mov r10, rdi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [488+r9], rsi + mov rcx, r10 + mov rsi, r11 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rsi, rdx + adc r8, 0 + mov qword ptr [496+r9], rcx + mov ecx, dword ptr [r12] + xor ecx, dword ptr [rbx] + test rsi, rsi + mov qword ptr [504+r9], rsi + mov dword ptr [8+r13], 64 + jne L149 + ALIGN 16 +L157: + mov edx, dword ptr [8+r13] + lea ebx, dword ptr [-1+rdx] + test ebx, ebx + mov dword ptr [8+r13], ebx + je L149 + lea r12d, dword ptr [-2+rdx] + cmp qword ptr [r9+r12*8], 0 + je L157 +L149: + mov r9d, dword ptr [8+r13] + xor edx, edx + test r9d, r9d + cmovne edx, ecx + mov dword ptr [r13], edx + add rsp, 520+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_mul_comba_32 ENDP + + +; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + push rbx + sub rsp, 80 + mov r11, rsi + xor esi, esi + mov r10, rsi + mov rbp, rsi + mov r8, rsi + mov rbx, rsi + mov rcx, qword ptr [16+rdi] + mov rdi, rsi + mov rax, qword ptr [rcx] + mul rax + add r10, rax + adc rbx, rdx + adc rdi, 0 + mov qword ptr [-72+80+rsp], r10 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rdi, rdx + adc rbp, 0 + add rbx, rax + adc rdi, rdx + adc rbp, 0 + mov qword ptr [-64+80+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rdi, rax + adc rbp, rdx + adc r8, 0 + add rdi, rax + adc rbp, rdx + adc r8, 0 + mov rbx, rbp + mov rbp, r8 + mov rax, qword ptr [8+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-56+80+rsp], rdi + mov r9, rbp + mov r8, rbx + mov rdi, rsi + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r8, rax + adc r9, rdx + adc rdi, 0 + add r8, rax + adc r9, rdx + adc rdi, 0 + mov rbx, r9 + mov rbp, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r8, rax + adc rbx, rdx + adc rbp, 0 + add r8, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-48+80+rsp], r8 + mov r9, rbp + mov rdi, rbx + mov r8, rsi + mov dword ptr [8+r11], 8 + mov dword ptr [r11], 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add rdi, rax + adc r9, rdx + adc r8, 0 + add rdi, rax + adc r9, rdx + adc r8, 0 + mov rbx, r9 + mov rbp, r8 + mov rax, qword ptr [16+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov rax, rbp + mov qword ptr [-40+80+rsp], rdi + mov rbp, rbx + mov rdi, rax + mov rbx, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add rbp, rax + adc rdi, rdx + adc rbx, 0 + add rbp, rax + adc rdi, rdx + adc rbx, 0 + mov qword ptr [-32+80+rsp], rbp + mov r9, rbx + mov rax, qword ptr [24+rcx] + mul rax + add rdi, rax + adc r9, rdx + adc rsi, 0 + mov rdx, qword ptr [16+r11] + mov qword ptr [-24+80+rsp], rdi + mov qword ptr [-16+80+rsp], r9 + mov qword ptr [rdx], r10 + mov r8, qword ptr [-64+80+rsp] + mov qword ptr [8+rdx], r8 + mov rbp, qword ptr [-56+80+rsp] + mov qword ptr [16+rdx], rbp + mov rdi, qword ptr [-48+80+rsp] + mov qword ptr [24+rdx], rdi + mov rsi, qword ptr [-40+80+rsp] + mov qword ptr [32+rdx], rsi + mov rbx, qword ptr [-32+80+rsp] + mov qword ptr [40+rdx], rbx + mov rcx, qword ptr [-24+80+rsp] + mov qword ptr [48+rdx], rcx + mov rax, qword ptr [-16+80+rsp] + mov qword ptr [56+rdx], rax + mov edx, dword ptr [8+r11] + test edx, edx + je L168 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r11] + mov r10d, ecx + cmp qword ptr [rsi+r10*8], 0 + jne L166 + mov edx, ecx + ALIGN 16 +L167: + test edx, edx + mov ecx, edx + je L171 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L167 + mov dword ptr [8+r11], ecx + mov edx, ecx +L166: + test edx, edx + je L168 + mov eax, dword ptr [r11] + jmp L169 + +L171: + mov dword ptr [8+r11], edx +L168: + xor eax, eax +L169: + add rsp, 80 + pop rbx + pop rbp + mov dword ptr [r11], eax + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_4 ENDP + + +; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + + push r14 + xor r9d, r9d + mov r14, r9 + mov r10, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbp + mov rbp, rsi + mov rsi, r9 + push rbx + mov rbx, r9 + sub rsp, 8+128 + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r14, rax + adc rbx, rdx + adc r12, 0 + mov qword ptr [-120+128+rsp], r14 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc r12, rdx + adc r10, 0 + add rbx, rax + adc r12, rdx + adc r10, 0 + mov qword ptr [-112+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add r12, rax + adc r10, rdx + adc r13, 0 + add r12, rax + adc r10, rdx + adc r13, 0 + mov rbx, r10 + mov r10, r13 + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-104+128+rsp], r12 + mov rdi, r10 + mov r11, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r11, rax + adc rdi, rdx + adc rsi, 0 + add r11, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov r10, rsi + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r11, rax + adc rbx, rdx + adc r10, 0 + add r11, rax + adc rbx, rdx + adc r10, 0 + mov rsi, r9 + mov qword ptr [-96+128+rsp], r11 + mov r8, r10 + mov r12, rbx + mov r11, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rbx, r8 + mov r10, r13 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-88+128+rsp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-80+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-72+128+rsp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + add rbx, r8 + adc r10, rdi + adc rax, rsi + add rbx, r8 + adc r10, rdi + adc rax, rsi + mov qword ptr [-64+128+rsp], rbx + mov r11, rax + mov rbx, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rsi, rbx + mov rdi, r13 + mov rbx, r11 + mov r13, r12 + mov r11, rsi + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-56+128+rsp], r10 + mov r10, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor r13, r13 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov r12, rdi + mov rax, r13 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov qword ptr [-48+128+rsp], rbx + mov r12, r11 + mov rsi, r10 + mov rbx, r9 + mov r11, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc rbx, 0 + add r12, rax + adc rsi, rdx + adc rbx, 0 + mov r13, rbx + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov r13, r9 + mov rax, qword ptr [40+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-40+128+rsp], r12 + mov r8, rbx + mov rdi, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc r8, rdx + adc r11, 0 + add rdi, rax + adc r8, rdx + adc r11, 0 + mov r10, r8 + mov rbx, r11 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add rdi, rax + adc r10, rdx + adc rbx, 0 + add rdi, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-32+128+rsp], rdi + mov rsi, rbx + mov r12, r10 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov rax, qword ptr [48+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-24+128+rsp], r12 + mov rdi, r10 + mov rsi, rbx + mov r10, r9 + mov dword ptr [8+rbp], 16 + mov dword ptr [rbp], 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc rsi, rdx + adc r10, 0 + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov qword ptr [-16+128+rsp], rdi + mov r8, r10 + mov rax, qword ptr [56+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov rax, qword ptr [16+rbp] + mov qword ptr [-8+128+rsp], rsi + mov qword ptr [128+rsp], r8 + mov qword ptr [rax], r14 + mov rbx, qword ptr [-112+128+rsp] + mov qword ptr [8+rax], rbx + mov rcx, qword ptr [-104+128+rsp] + mov qword ptr [16+rax], rcx + mov rdx, qword ptr [-96+128+rsp] + mov qword ptr [24+rax], rdx + mov r14, qword ptr [-88+128+rsp] + mov qword ptr [32+rax], r14 + mov r13, qword ptr [-80+128+rsp] + mov qword ptr [40+rax], r13 + mov r12, qword ptr [-72+128+rsp] + mov qword ptr [48+rax], r12 + mov r11, qword ptr [-64+128+rsp] + mov qword ptr [56+rax], r11 + mov r10, qword ptr [-56+128+rsp] + mov qword ptr [64+rax], r10 + mov r9, qword ptr [-48+128+rsp] + mov qword ptr [72+rax], r9 + mov r8, qword ptr [-40+128+rsp] + mov qword ptr [80+rax], r8 + mov rdi, qword ptr [-32+128+rsp] + mov qword ptr [88+rax], rdi + mov rsi, qword ptr [-24+128+rsp] + mov qword ptr [96+rax], rsi + mov rbx, qword ptr [-16+128+rsp] + mov qword ptr [104+rax], rbx + mov rcx, qword ptr [-8+128+rsp] + mov qword ptr [112+rax], rcx + mov rdx, qword ptr [128+rsp] + mov qword ptr [120+rax], rdx + mov edx, dword ptr [8+rbp] + test edx, edx + je L192 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+rbp] + mov r14d, ecx + cmp qword ptr [rsi+r14*8], 0 + jne L190 + mov edx, ecx + ALIGN 16 +L191: + test edx, edx + mov ecx, edx + je L195 + dec edx + mov r9d, edx + cmp qword ptr [rsi+r9*8], 0 + je L191 + mov dword ptr [8+rbp], ecx + mov edx, ecx +L190: + test edx, edx + je L192 + mov eax, dword ptr [rbp] + jmp L193 + +L195: + mov dword ptr [8+rbp], edx +L192: + xor eax, eax +L193: + mov dword ptr [rbp], eax + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + pop r13 + pop r14 + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_8 ENDP + + +; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B) + + ALIGN 16 +s_mp_sqr_comba_16 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r9d, r9d + mov r8, r9 + mov r11, r9 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbx + mov rbx, r9 + sub rsp, 256+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-288+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-280+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r10, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-272+rbp], rsi + mov rdi, r10 + mov rsi, r9 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r10, rax + adc rdi, rdx + adc r11, 0 + add r10, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov rbx, r11 + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov r11, r9 + mov qword ptr [-264+rbp], r10 + mov r8, rbx + mov r13, r12 + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r10, r12 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-256+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-248+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-240+rbp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-232+rbp], rbx + mov rbx, r9 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-224+rbp], r10 + mov rsi, r12 + mov r10, rbx + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r10, rdi + adc r12, rsi + add r11, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-216+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc rbx, r13 + adc rax, r12 + add r10, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-208+rbp], r10 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add r11, r8 + adc r10, rdi + adc rdx, rsi + add r11, r8 + adc r10, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-200+rbp], r11 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rdx, rdi + mov r11, rsi + add r10, r8 + adc r12, rdx + adc rax, r11 + add r10, r8 + adc r12, rdx + adc rax, r11 + mov rbx, rdx + mov r13, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-192+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-184+rbp], r12 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [56+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r10 + mov r11, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r10, r13 + mov rbx, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r11, rsi + add r12, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [72+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r10 + mov r10, r11 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [80+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r10 + mov r10, r11 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r9 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [88+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r10 + mov r10, r11 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [96+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r10 + mov r10, r9 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r12, rdi + mov rax, rsi + mov rsi, r9 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov r12, r9 + mov qword ptr [-88+rbp], rbx + mov r13, r11 + mov r11, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add r13, rax + adc r11, rdx + adc r12, 0 + add r13, rax + adc r11, rdx + adc r12, 0 + mov rdi, r12 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add r13, rax + adc r11, rdx + adc rdi, 0 + add r13, rax + adc r11, rdx + adc rdi, 0 + mov rbx, r11 + mov r10, rdi + mov r11, r9 + mov rax, qword ptr [104+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-80+rbp], r13 + mov r8, r10 + mov r10, rbx + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add r10, rax + adc r8, rdx + adc rsi, 0 + add r10, rax + adc r8, rdx + adc rsi, 0 + mov r12, r8 + mov rbx, rsi + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov qword ptr [-72+rbp], r10 + mov r13, rbx + mov rbx, r12 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rbx, rax + adc r13, rdx + adc r11, 0 + add rbx, rax + adc r13, rdx + adc r11, 0 + mov r12, r11 + mov r10, r13 + mov rax, qword ptr [112+rcx] + mul rax + add rbx, rax + adc r10, rdx + adc r12, 0 + mov qword ptr [-64+rbp], rbx + mov rdi, r10 + mov rbx, r9 + mov rsi, r12 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rdi, rax + adc rsi, rdx + adc rbx, 0 + add rdi, rax + adc rsi, rdx + adc rbx, 0 + mov qword ptr [-56+rbp], rdi + mov r8, rbx + mov rax, qword ptr [120+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 32 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-288+rbp] +; mov edx, 256 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L232 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r9d, ecx + cmp qword ptr [rsi+r9*8], 0 + jne L230 + mov edx, ecx + ALIGN 16 +L231: + test edx, edx + mov ecx, edx + je L235 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L231 + mov dword ptr [8+r14], ecx + mov edx, ecx +L230: + test edx, edx + je L232 + mov eax, dword ptr [r14] + jmp L233 + +L235: + mov dword ptr [8+r14], edx +L232: + xor eax, eax +L233: + mov dword ptr [r14], eax + add rsp, 256+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_16 ENDP + + +; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_32 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r10d, r10d + mov r8, r10 + mov r11, r10 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r10 + push r13 + mov r13, r10 + push r12 + mov r12, r10 + push rbx + mov rbx, r10 + sub rsp, 512+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-544+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-536+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r9, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-528+rbp], rsi + mov rdi, r9 + mov rsi, r10 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r9, rax + adc rdi, rdx + adc r11, 0 + add r9, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov r13, r11 + mov rdi, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r9, rax + adc r12, rdx + adc r13, 0 + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-520+rbp], r9 + mov r8, r13 + mov r13, r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r9, r12 + mov r8, r10 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-512+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r11, rsi + add rbx, r8 + adc r9, rdi + adc r11, rsi + mov qword ptr [-504+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-496+rbp], r9 + mov r9, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-488+rbp], rbx + mov rbx, r10 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-480+rbp], r9 + mov rsi, r12 + mov r9, rbx + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r9, rdi + adc r12, rsi + add r11, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-472+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rax, r12 + add r9, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-464+rbp], r9 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add r11, r8 + adc r9, rdi + adc rdx, rsi + add r11, r8 + adc r9, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-456+rbp], r11 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, rdi + mov r11, rsi + add r9, r8 + adc r12, rax + adc r13, r11 + add r9, r8 + adc r12, rax + adc r13, r11 + mov rbx, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-448+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-440+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r13 + mov rbx, rdi + mov r13, rsi + add r9, r8 + adc rdx, rbx + adc r12, r13 + add r9, r8 + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov rdi, rbx + mov rsi, r11 + mov rax, qword ptr [56+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-432+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-424+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [128+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-416+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [136+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-408+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [144+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [72+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-400+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [152+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-392+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [160+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [80+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-384+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [168+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-376+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [176+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [88+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-368+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [184+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov rdi, rdx + mov qword ptr [-360+rbp], r12 + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [192+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rbx, r8 + mov rax, rdi + add r9, rsi + adc r12, rbx + adc r13, rax + add r9, rsi + adc r12, rbx + adc r13, rax + mov r11, rax + mov r8, rbx + mov rax, qword ptr [96+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, r11 + mov qword ptr [-352+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [200+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-344+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [208+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [104+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-336+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [216+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-328+rbp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [224+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [112+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-320+rbp], r9 + mov rbx, r13 + mov r9, r10 + mov rax, qword ptr [rcx] + mul qword ptr [232+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r9, rdi + add r12, rsi + adc rbx, r8 + adc r9, rdi + mov qword ptr [-312+rbp], r12 + mov r13, r9 + mov rax, qword ptr [rcx] + mul qword ptr [240+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov r11, r8 + mov rdx, rdi + add rbx, rsi + adc r13, r11 + adc rax, rdx + add rbx, rsi + adc r13, r11 + adc rax, rdx + mov r9, rdx + mov rdx, rax + mov r12, r13 + mov r8, r11 + mov r13, rdx + mov rdi, r9 + mov rax, qword ptr [120+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-304+rbp], rbx + mov rbx, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r13, rdi + add r12, rsi + adc rbx, r8 + adc r13, rdi + mov qword ptr [-296+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [16+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov r11, r8 + mov rax, rdi + add rbx, rsi + adc r12, r11 + adc r13, rax + add rbx, rsi + adc r12, r11 + adc r13, rax + mov r9, rax + mov r8, r11 + mov rax, qword ptr [128+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov rdi, r9 + mov qword ptr [-288+rbp], rbx + mov r9, r13 + mov rax, qword ptr [16+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-280+rbp], r12 + mov r12, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [32+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [136+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-272+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [40+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-264+rbp], r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [48+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [144+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-256+rbp], r9 + mov r9, r13 + mov rax, qword ptr [48+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [56+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r11, rdi + add r12, rsi + adc r9, r8 + adc r11, rdi + mov qword ptr [-248+rbp], r12 + mov r13, r11 + mov rax, qword ptr [56+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [64+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov rdx, rsi + mov rbx, r8 + mov r12, rdi + add r9, rdx + adc r13, rbx + adc rax, r12 + add r9, rdx + adc r13, rbx + adc rax, r12 + mov r11, r12 + mov r8, rdx + mov rdx, rax + mov r12, r13 + mov rdi, rbx + mov r13, rdx + mov rsi, r11 + mov rax, qword ptr [152+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-240+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [64+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-232+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [72+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [80+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [160+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-224+rbp], r9 + mov r9, r13 + mov rax, qword ptr [80+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-216+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [96+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [168+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-208+rbp], r9 + mov r9, r13 + mov rax, qword ptr [96+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-200+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [112+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [176+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-192+rbp], r9 + mov r9, r13 + mov rax, qword ptr [112+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-184+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [128+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [184+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r9 + mov r9, r13 + mov rax, qword ptr [128+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [144+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov rax, rsi + add r9, r8 + adc r12, rbx + adc r13, rax + add r9, r8 + adc r12, rbx + adc r13, rax + mov r11, rax + mov rdi, rbx + mov rbx, r10 + mov rax, qword ptr [192+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r9 + mov r9, r13 + mov rax, qword ptr [144+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [152+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc rbx, rsi + add r12, r8 + adc r9, rdi + adc rbx, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [152+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [160+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rdx, r12 + add r9, r8 + adc rbx, r13 + adc rdx, r12 + mov rax, rdx + mov rdi, r13 + mov rsi, r12 + mov r11, rax + mov r12, r10 + mov rax, qword ptr [200+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r9 + mov r9, r11 + mov rax, qword ptr [160+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [168+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [168+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [176+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [208+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r9 + mov r9, r11 + mov rax, qword ptr [176+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [184+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r10 + mov rax, qword ptr [184+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [192+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r10 + mov rax, qword ptr [216+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r9 + mov r9, r11 + mov rax, qword ptr [192+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [200+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [200+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [208+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov r12, r10 + mov rax, qword ptr [224+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r9 + mov r9, r10 + mov rax, qword ptr [208+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [216+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [224+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov rax, rsi + add rbx, r8 + adc r11, r13 + adc r9, rax + add rbx, r8 + adc r11, r13 + adc r9, rax + mov qword ptr [-88+rbp], rbx + mov rsi, r11 + mov r8, r9 + mov rax, qword ptr [216+rcx] + mul qword ptr [248+rcx] + add rsi, rax + adc r8, rdx + adc r12, 0 + add rsi, rax + adc r8, rdx + adc r12, 0 + mov r11, r12 + mov rax, qword ptr [224+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc r11, 0 + add rsi, rax + adc r8, rdx + adc r11, 0 + mov r13, r8 + mov rbx, r11 + mov rax, qword ptr [232+rcx] + mul rax + add rsi, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-80+rbp], rsi + mov r12, rbx + mov rdi, r13 + mov r13, r10 + mov rax, qword ptr [224+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc r12, rdx + adc r13, 0 + add rdi, rax + adc r12, rdx + adc r13, 0 + mov r9, r12 + mov r12, r13 + mov rax, qword ptr [232+rcx] + mul qword ptr [240+rcx] + add rdi, rax + adc r9, rdx + adc r12, 0 + add rdi, rax + adc r9, rdx + adc r12, 0 + mov qword ptr [-72+rbp], rdi + mov r11, r9 + mov rbx, r12 + mov r9, r10 + mov rax, qword ptr [232+rcx] + mul qword ptr [248+rcx] + add r11, rax + adc rbx, rdx + adc r9, 0 + add r11, rax + adc rbx, rdx + adc r9, 0 + mov r13, rbx + mov rbx, r9 + mov r9, r10 + mov rax, qword ptr [240+rcx] + mul rax + add r11, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-64+rbp], r11 + mov rdi, r13 + mov rsi, rbx + mov rax, qword ptr [240+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc rsi, rdx + adc r9, 0 + add rdi, rax + adc rsi, rdx + adc r9, 0 + mov qword ptr [-56+rbp], rdi + mov r8, r9 + mov rax, qword ptr [248+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r10, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 64 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-544+rbp] +; mov edx, 512 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-544+rbp] + mov r8d, 512 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L304 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r10d, ecx + cmp qword ptr [rsi+r10*8], 0 + jne L302 + mov edx, ecx + ALIGN 16 +L303: + test edx, edx + mov ecx, edx + je L307 + dec edx + mov eax, edx + cmp qword ptr [rsi+rax*8], 0 + je L303 + mov dword ptr [8+r14], ecx + mov edx, ecx +L302: + test edx, edx + je L304 + mov eax, dword ptr [r14] + jmp L305 + +L307: + mov dword ptr [8+r14], edx +L304: + xor eax, eax +L305: + mov dword ptr [r14], eax + add rsp, 512+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_32 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s new file mode 100644 index 0000000000..a5181df332 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s @@ -0,0 +1,16097 @@ +//* TomsFastMath, a fast ISO C bignum library. +/ * +/ * This project is meant to fill in where LibTomMath +/ * falls short. That is speed ;-) +/ * +/ * This project is public domain and free for all purposes. +/ * +/ * Tom St Denis, tomstdenis@iahu.ca +/ */ + +//* +/ * The source file from which this assembly was derived +/ * comes from TFM v0.03, which has the above license. +/ * This source was compiled with an unnamed compiler at +/ * the highest optimization level. Afterwards, the +/ * trailing .section was removed because it causes errors +/ * in the Studio 10 compiler on AMD 64. +/ */ + + .file "mp_comba.c" + .text + .align 16 +.globl s_mp_mul_comba_4 + .type s_mp_mul_comba_4, @function +s_mp_mul_comba_4: +.LFB2: + pushq %r12 +.LCFI0: + pushq %rbp +.LCFI1: + pushq %rbx +.LCFI2: + movq 16(%rdi), %r9 + movq %rdx, %rbx + movq 16(%rsi), %rdx + movq (%r9), %rax + movq %rax, -64(%rsp) + movq 8(%r9), %r8 + movq %r8, -56(%rsp) + movq 16(%r9), %rbp + movq %rbp, -48(%rsp) + movq 24(%r9), %r12 + movq %r12, -40(%rsp) + movq (%rdx), %rcx + movq %rcx, -32(%rsp) + movq 8(%rdx), %r10 + movq %r10, -24(%rsp) + movq 16(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -16(%rsp) + movq 16(%rbx), %r11 + movq 24(%rdx), %rax + movq %rax, -8(%rsp) +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -56(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -56(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -48(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -40(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -56(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -48(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -40(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -40(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 48(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 56(%r11) + movl $8, 8(%rbx) + jne .L9 + .align 16 +.L18: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L9 + leal -2(%rdx), %r10d + cmpq $0, (%r11,%r10,8) + je .L18 +.L9: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + popq %rbx + popq %rbp + popq %r12 + ret +.LFE2: + .size s_mp_mul_comba_4, .-s_mp_mul_comba_4 + .align 16 +.globl s_mp_mul_comba_8 + .type s_mp_mul_comba_8, @function +s_mp_mul_comba_8: +.LFB3: + pushq %r12 +.LCFI3: + pushq %rbp +.LCFI4: + pushq %rbx +.LCFI5: + movq %rdx, %rbx + subq $8, %rsp +.LCFI6: + movq 16(%rdi), %rdx + movq (%rdx), %r8 + movq %r8, -120(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -112(%rsp) + movq 16(%rdx), %r9 + movq %r9, -104(%rsp) + movq 24(%rdx), %r12 + movq %r12, -96(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -88(%rsp) + movq 40(%rdx), %r10 + movq %r10, -80(%rsp) + movq 48(%rdx), %r11 + movq %r11, -72(%rsp) + movq 56(%rdx), %rax + movq 16(%rsi), %rdx + movq %rax, -64(%rsp) + movq (%rdx), %r8 + movq %r8, -56(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -48(%rsp) + movq 16(%rdx), %r9 + movq %r9, -40(%rsp) + movq 24(%rdx), %r12 + movq %r12, -32(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -24(%rsp) + movq 40(%rdx), %r10 + movq %r10, -16(%rsp) + movq 48(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -8(%rsp) + movq 16(%rbx), %r11 + movq 56(%rdx), %rax + movq %rax, (%rsp) +/APP + movq -120(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -64(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 112(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 120(%r11) + movl $16, 8(%rbx) + jne .L35 + .align 16 +.L43: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L35 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L43 +.L35: + movl 8(%rbx), %r11d + xorl %edx, %edx + testl %r11d, %r11d + cmovne %esi, %edx + movl %edx, (%rbx) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE3: + .size s_mp_mul_comba_8, .-s_mp_mul_comba_8 + .align 16 +.globl s_mp_mul_comba_16 + .type s_mp_mul_comba_16, @function +s_mp_mul_comba_16: +.LFB4: + pushq %r12 +.LCFI7: + pushq %rbp +.LCFI8: + pushq %rbx +.LCFI9: + movq %rdx, %rbx + subq $136, %rsp +.LCFI10: + movq 16(%rdi), %rax + movq (%rax), %r8 + movq %r8, -120(%rsp) + movq 8(%rax), %rbp + movq %rbp, -112(%rsp) + movq 16(%rax), %r9 + movq %r9, -104(%rsp) + movq 24(%rax), %r12 + movq %r12, -96(%rsp) + movq 32(%rax), %rcx + movq %rcx, -88(%rsp) + movq 40(%rax), %r10 + movq %r10, -80(%rsp) + movq 48(%rax), %rdx + movq %rdx, -72(%rsp) + movq 56(%rax), %r11 + movq %r11, -64(%rsp) + movq 64(%rax), %r8 + movq %r8, -56(%rsp) + movq 72(%rax), %rbp + movq %rbp, -48(%rsp) + movq 80(%rax), %r9 + movq %r9, -40(%rsp) + movq 88(%rax), %r12 + movq %r12, -32(%rsp) + movq 96(%rax), %rcx + movq %rcx, -24(%rsp) + movq 104(%rax), %r10 + movq %r10, -16(%rsp) + movq 112(%rax), %rdx + movq %rdx, -8(%rsp) + movq 120(%rax), %r11 + movq %r11, (%rsp) + movq 16(%rsi), %r11 + movq (%r11), %r8 + movq %r8, 8(%rsp) + movq 8(%r11), %rbp + movq %rbp, 16(%rsp) + movq 16(%r11), %r9 + movq %r9, 24(%rsp) + movq 24(%r11), %r12 + movq %r12, 32(%rsp) + movq 32(%r11), %rcx + movq %rcx, 40(%rsp) + movq 40(%r11), %r10 + movq %r10, 48(%rsp) + movq 48(%r11), %rdx + movq %rdx, 56(%rsp) + movq 56(%r11), %rax + movq %rax, 64(%rsp) + movq 64(%r11), %r8 + movq %r8, 72(%rsp) + movq 72(%r11), %rbp + movq %rbp, 80(%rsp) + movq 80(%r11), %r9 + movq %r9, 88(%rsp) + movq 88(%r11), %r12 + movq %r12, 96(%rsp) + movq 96(%r11), %rcx + movq %rcx, 104(%rsp) + movq 104(%r11), %r10 + movq %r10, 112(%rsp) + movq 112(%r11), %rdx + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %rdx, 120(%rsp) + movq 120(%r11), %rax + movq %rax, 128(%rsp) + movq 16(%rbx), %r11 +/APP + movq -120(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -56(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -48(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -32(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -24(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -16(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -8(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 112(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 120(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 128(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 136(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 144(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 152(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 160(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 168(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -64(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 176(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -56(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 184(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -48(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 192(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -40(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 200(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -32(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 208(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -24(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 216(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -16(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 224(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -8(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq (%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 232(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq (%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 240(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 248(%r11) + movl $32, 8(%rbx) + jne .L76 + .align 16 +.L84: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L76 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L84 +.L76: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + addq $136, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE4: + .size s_mp_mul_comba_16, .-s_mp_mul_comba_16 + .align 16 +.globl s_mp_mul_comba_32 + .type s_mp_mul_comba_32, @function +s_mp_mul_comba_32: +.LFB5: + pushq %rbp +.LCFI11: + movq %rsp, %rbp +.LCFI12: + pushq %r13 +.LCFI13: + movq %rdx, %r13 + movl $256, %edx + pushq %r12 +.LCFI14: + movq %rsi, %r12 + pushq %rbx +.LCFI15: + movq %rdi, %rbx + subq $520, %rsp +.LCFI16: + movq 16(%rdi), %rsi + leaq -544(%rbp), %rdi + call memcpy@PLT + movq 16(%r12), %rsi + leaq -288(%rbp), %rdi + movl $256, %edx + call memcpy@PLT + movq 16(%r13), %r9 + xorl %r8d, %r8d + movq %r8, %rsi + movq %r8, %rdi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %rsi, (%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %r11 +/APP + movq -536(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r11 + +/NO_APP + movq %rdi, 8(%r9) + movq %r11, %rdi + movq %r8, %r11 +/APP + movq -544(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rcx +/APP + movq -536(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -528(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 16(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -520(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 24(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -512(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 32(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -504(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 40(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -496(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 48(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -488(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 56(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -480(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 64(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -472(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 72(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -464(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 80(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -456(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 88(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -448(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 96(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -440(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 104(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -432(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 112(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -424(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 120(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -416(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 128(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -408(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 136(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -400(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 144(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -392(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 152(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -384(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 160(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -376(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 168(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -368(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 176(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -360(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 184(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -352(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 192(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -344(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 200(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -336(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 208(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -328(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 216(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -320(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 224(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -312(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 232(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -304(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 240(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 248(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -536(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 256(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -528(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 264(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -520(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 272(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -512(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 280(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -504(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 288(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -496(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 296(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -488(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 304(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -480(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 312(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -472(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 320(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -464(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 328(%r9) + movq %r11, %rdi + movq %r10, %r11 + movq %r8, %r10 +/APP + movq -456(%rbp),%rax + mulq -40(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -448(%rbp),%rax + mulq -48(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -440(%rbp),%rax + mulq -56(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -432(%rbp),%rax + mulq -64(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -424(%rbp),%rax + mulq -72(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -416(%rbp),%rax + mulq -80(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -408(%rbp),%rax + mulq -88(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -400(%rbp),%rax + mulq -96(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -392(%rbp),%rax + mulq -104(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -384(%rbp),%rax + mulq -112(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -376(%rbp),%rax + mulq -120(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -368(%rbp),%rax + mulq -128(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -360(%rbp),%rax + mulq -136(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -352(%rbp),%rax + mulq -144(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -344(%rbp),%rax + mulq -152(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -336(%rbp),%rax + mulq -160(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -328(%rbp),%rax + mulq -168(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -320(%rbp),%rax + mulq -176(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -312(%rbp),%rax + mulq -184(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -304(%rbp),%rax + mulq -192(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -296(%rbp),%rax + mulq -200(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %r11, 336(%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -448(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %rcx +/APP + movq -440(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + +/NO_APP + movq %rsi, %r11 + movq %rcx, %r10 +/APP + movq -296(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rdi + adcq %rdx,%r11 + adcq $0,%r10 + +/NO_APP + movq %rdi, 344(%r9) + movq %r11, %rcx + movq %r10, %rdi + movq %r8, %r11 +/APP + movq -440(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rsi +/APP + movq -432(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 352(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -432(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 360(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -424(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 368(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -416(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 376(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -408(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 384(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -400(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 392(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -392(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 400(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -384(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 408(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -376(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 416(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -368(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 424(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -360(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 432(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -352(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 440(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -344(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 448(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -336(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 456(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -328(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 464(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -320(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 472(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -312(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 480(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -304(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rcx, %r11 + movq %rdi, %r10 +/APP + movq -296(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 488(%r9) + movq %r10, %rcx + movq %r11, %rsi +/APP + movq -296(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rsi + adcq $0,%r8 + +/NO_APP + movq %rcx, 496(%r9) + movl (%r12), %ecx + xorl (%rbx), %ecx + testq %rsi, %rsi + movq %rsi, 504(%r9) + movl $64, 8(%r13) + jne .L149 + .align 16 +.L157: + movl 8(%r13), %edx + leal -1(%rdx), %ebx + testl %ebx, %ebx + movl %ebx, 8(%r13) + je .L149 + leal -2(%rdx), %r12d + cmpq $0, (%r9,%r12,8) + je .L157 +.L149: + movl 8(%r13), %r9d + xorl %edx, %edx + testl %r9d, %r9d + cmovne %ecx, %edx + movl %edx, (%r13) + addq $520, %rsp + popq %rbx + popq %r12 + popq %r13 + leave + ret +.LFE5: + .size s_mp_mul_comba_32, .-s_mp_mul_comba_32 + .align 16 +.globl s_mp_sqr_comba_4 + .type s_mp_sqr_comba_4, @function +s_mp_sqr_comba_4: +.LFB6: + pushq %rbp +.LCFI17: + movq %rsi, %r11 + xorl %esi, %esi + movq %rsi, %r10 + movq %rsi, %rbp + movq %rsi, %r8 + pushq %rbx +.LCFI18: + movq %rsi, %rbx + movq 16(%rdi), %rcx + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%rdi + +/NO_APP + movq %r10, -72(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + +/NO_APP + movq %rbx, -64(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + +/NO_APP + movq %rbp, %rbx + movq %r8, %rbp +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rdi, -56(%rsp) + movq %rbp, %r9 + movq %rbx, %r8 + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + +/NO_APP + movq %r9, %rbx + movq %rdi, %rbp +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %r8, -48(%rsp) + movq %rbp, %r9 + movq %rbx, %rdi + movq %rsi, %r8 + movl $8, 8(%r11) + movl $0, (%r11) +/APP + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbx + movq %r8, %rbp +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rbp, %rax + movq %rdi, -40(%rsp) + movq %rbx, %rbp + movq %rax, %rdi + movq %rsi, %rbx +/APP + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + +/NO_APP + movq %rbp, -32(%rsp) + movq %rbx, %r9 +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%rsi + +/NO_APP + movq 16(%r11), %rdx + movq %rdi, -24(%rsp) + movq %r9, -16(%rsp) + movq %r10, (%rdx) + movq -64(%rsp), %r8 + movq %r8, 8(%rdx) + movq -56(%rsp), %rbp + movq %rbp, 16(%rdx) + movq -48(%rsp), %rdi + movq %rdi, 24(%rdx) + movq -40(%rsp), %rsi + movq %rsi, 32(%rdx) + movq -32(%rsp), %rbx + movq %rbx, 40(%rdx) + movq -24(%rsp), %rcx + movq %rcx, 48(%rdx) + movq -16(%rsp), %rax + movq %rax, 56(%rdx) + movl 8(%r11), %edx + testl %edx, %edx + je .L168 + leal -1(%rdx), %ecx + movq 16(%r11), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L166 + movl %ecx, %edx + .align 16 +.L167: + testl %edx, %edx + movl %edx, %ecx + je .L171 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L167 + movl %ecx, 8(%r11) + movl %ecx, %edx +.L166: + testl %edx, %edx + je .L168 + popq %rbx + popq %rbp + movl (%r11), %eax + movl %eax, (%r11) + ret +.L171: + movl %edx, 8(%r11) + .align 16 +.L168: + popq %rbx + popq %rbp + xorl %eax, %eax + movl %eax, (%r11) + ret +.LFE6: + .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4 + .align 16 +.globl s_mp_sqr_comba_8 + .type s_mp_sqr_comba_8, @function +s_mp_sqr_comba_8: +.LFB7: + pushq %r14 +.LCFI19: + xorl %r9d, %r9d + movq %r9, %r14 + movq %r9, %r10 + pushq %r13 +.LCFI20: + movq %r9, %r13 + pushq %r12 +.LCFI21: + movq %r9, %r12 + pushq %rbp +.LCFI22: + movq %rsi, %rbp + movq %r9, %rsi + pushq %rbx +.LCFI23: + movq %r9, %rbx + subq $8, %rsp +.LCFI24: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r14 + adcq %rdx,%rbx + adcq $0,%r12 + +/NO_APP + movq %r14, -120(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + +/NO_APP + movq %rbx, -112(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + +/NO_APP + movq %r10, %rbx + movq %r13, %r10 + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -104(%rsp) + movq %r10, %rdi + movq %rbx, %r11 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %r10 + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r9, %rsi + movq %r11, -96(%rsp) + movq %r10, %r8 + movq %rbx, %r12 + movq %r9, %r11 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + +/NO_APP + movq %r8, %rbx + movq %r13, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -88(%rsp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -80(%rsp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -72(%rsp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + +/NO_APP + movq %rbx, -64(%rsp) + movq %rax, %r11 + movq %r9, %rbx +/APP + movq 8(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rsi + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %r13 + movq %rsi, %r11 +/APP + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -56(%rsp) + movq %r9, %r10 +/APP + movq 16(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %r13,%r13 + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + +/NO_APP + movq %rdi, %r12 + movq %r13, %rax +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %rbx, -48(%rsp) + movq %r11, %r12 + movq %r10, %rsi + movq %r9, %rbx + movq %r9, %r11 +/APP + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rbx, %r13 +/APP + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx + movq %r9, %r13 +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -40(%rsp) + movq %rbx, %r8 + movq %r10, %rdi +/APP + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r10 + movq %r11, %rbx +/APP + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %rdi, -32(%rsp) + movq %rbx, %rsi + movq %r10, %r12 +/APP + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -24(%rsp) + movq %r10, %rdi + movq %rbx, %rsi + movq %r9, %r10 + movl $16, 8(%rbp) + movl $0, (%rbp) +/APP + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %rdi, -16(%rsp) + movq %r10, %r8 +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq 16(%rbp), %rax + movq %rsi, -8(%rsp) + movq %r8, (%rsp) + movq %r14, (%rax) + movq -112(%rsp), %rbx + movq %rbx, 8(%rax) + movq -104(%rsp), %rcx + movq %rcx, 16(%rax) + movq -96(%rsp), %rdx + movq %rdx, 24(%rax) + movq -88(%rsp), %r14 + movq %r14, 32(%rax) + movq -80(%rsp), %r13 + movq %r13, 40(%rax) + movq -72(%rsp), %r12 + movq %r12, 48(%rax) + movq -64(%rsp), %r11 + movq %r11, 56(%rax) + movq -56(%rsp), %r10 + movq %r10, 64(%rax) + movq -48(%rsp), %r9 + movq %r9, 72(%rax) + movq -40(%rsp), %r8 + movq %r8, 80(%rax) + movq -32(%rsp), %rdi + movq %rdi, 88(%rax) + movq -24(%rsp), %rsi + movq %rsi, 96(%rax) + movq -16(%rsp), %rbx + movq %rbx, 104(%rax) + movq -8(%rsp), %rcx + movq %rcx, 112(%rax) + movq (%rsp), %rdx + movq %rdx, 120(%rax) + movl 8(%rbp), %edx + testl %edx, %edx + je .L192 + leal -1(%rdx), %ecx + movq 16(%rbp), %rsi + mov %ecx, %r14d + cmpq $0, (%rsi,%r14,8) + jne .L190 + movl %ecx, %edx + .align 16 +.L191: + testl %edx, %edx + movl %edx, %ecx + je .L195 + decl %edx + mov %edx, %r9d + cmpq $0, (%rsi,%r9,8) + je .L191 + movl %ecx, 8(%rbp) + movl %ecx, %edx +.L190: + testl %edx, %edx + je .L192 + movl (%rbp), %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.L195: + movl %edx, 8(%rbp) + .align 16 +.L192: + xorl %eax, %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.LFE7: + .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8 + .align 16 +.globl s_mp_sqr_comba_16 + .type s_mp_sqr_comba_16, @function +s_mp_sqr_comba_16: +.LFB8: + pushq %rbp +.LCFI25: + xorl %r9d, %r9d + movq %r9, %r8 + movq %r9, %r11 + movq %rsp, %rbp +.LCFI26: + pushq %r14 +.LCFI27: + movq %rsi, %r14 + movq %r9, %rsi + pushq %r13 +.LCFI28: + movq %r9, %r13 + pushq %r12 +.LCFI29: + movq %r9, %r12 + pushq %rbx +.LCFI30: + movq %r9, %rbx + subq $256, %rsp +.LCFI31: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -288(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -280(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r10 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %rsi, -272(%rbp) + movq %r10, %rdi + movq %r9, %rsi + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %rbx + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r9, %r11 + movq %r10, -264(%rbp) + movq %rbx, %r8 + movq %r12, %r13 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -256(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -248(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -240(%rbp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -232(%rbp) + movq %r9, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r10, -224(%rbp) + movq %r12, %rsi + movq %rbx, %r10 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -216(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r10, -208(%rbp) + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -200(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rdx + movq %rsi, %r11 +/APP + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + +/NO_APP + movq %rdx, %rbx + movq %rax, %r13 + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r10, -192(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -176(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 +/APP + movq 8(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -160(%rbp) + movq %r9, %r11 +/APP + movq 16(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r13, %r10 + movq %r9, %rbx +/APP + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 24(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -144(%rbp) + movq %r11, %r10 +/APP + movq 32(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 40(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -128(%rbp) + movq %r11, %r10 +/APP + movq 48(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r9, %rbx +/APP + movq 56(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -112(%rbp) + movq %r11, %r10 +/APP + movq 64(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 72(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -96(%rbp) + movq %r9, %r10 +/APP + movq 80(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r12 + movq %rsi, %rax + movq %r9, %rsi +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %r9, %r12 + movq %rbx, -88(%rbp) + movq %r11, %r13 + movq %r10, %r11 +/APP + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + +/NO_APP + movq %r12, %rdi +/APP + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + +/NO_APP + movq %r11, %rbx + movq %rdi, %r10 + movq %r9, %r11 +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -80(%rbp) + movq %r10, %r8 + movq %rbx, %r10 +/APP + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + +/NO_APP + movq %r8, %r12 + movq %rsi, %rbx +/APP + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r10, -72(%rbp) + movq %rbx, %r13 + movq %r12, %rbx +/APP + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + +/NO_APP + movq %r11, %r12 + movq %r13, %r10 +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r10 + adcq $0,%r12 + +/NO_APP + movq %rbx, -64(%rbp) + movq %r10, %rdi + movq %r9, %rbx + movq %r12, %rsi +/APP + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rdi, -56(%rbp) + movq %rbx, %r8 +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -288(%rbp), %rsi + movl $256, %edx + movq %r8, -40(%rbp) + movl $32, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L232 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r9d + cmpq $0, (%rsi,%r9,8) + jne .L230 + movl %ecx, %edx + .align 16 +.L231: + testl %edx, %edx + movl %edx, %ecx + je .L235 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L231 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L230: + testl %edx, %edx + je .L232 + movl (%r14), %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L235: + movl %edx, 8(%r14) + .align 16 +.L232: + xorl %eax, %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE8: + .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16 + .align 16 +.globl s_mp_sqr_comba_32 + .type s_mp_sqr_comba_32, @function +s_mp_sqr_comba_32: +.LFB9: + pushq %rbp +.LCFI32: + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r11 + movq %rsp, %rbp +.LCFI33: + pushq %r14 +.LCFI34: + movq %rsi, %r14 + movq %r10, %rsi + pushq %r13 +.LCFI35: + movq %r10, %r13 + pushq %r12 +.LCFI36: + movq %r10, %r12 + pushq %rbx +.LCFI37: + movq %r10, %rbx + subq $512, %rsp +.LCFI38: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -544(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -536(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r9 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rsi, -528(%rbp) + movq %r9, %rdi + movq %r10, %rsi + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %r13 + movq %r10, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -520(%rbp) + movq %r13, %r8 + movq %r12, %r13 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r9 + movq %r10, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %r13, -512(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -504(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -496(%rbp) + movq %r11, %r9 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -488(%rbp) + movq %r10, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r9, -480(%rbp) + movq %r12, %rsi + movq %rbx, %r9 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -472(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r9, -464(%rbp) + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -456(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rax + movq %rsi, %r11 +/APP + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + +/NO_APP + movq %rax, %rbx + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r9, -448(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -440(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r13, %rdx + movq %rdi, %rbx + movq %rsi, %r13 +/APP + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %rdi + movq %r11, %rsi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -432(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -424(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 128(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -416(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 136(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -408(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 144(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -400(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 152(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -392(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 160(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -384(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 168(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -376(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 176(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -368(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 184(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %rdi + movq %r12, -360(%rbp) + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 192(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax +/APP + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %r8 +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rdi + movq %r9, -352(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 200(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -344(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 208(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -336(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 216(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -328(%rbp) +/APP + movq (%rcx),%rax + mulq 224(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -320(%rbp) + movq %r13, %rbx + movq %r10, %r9 +/APP + movq (%rcx),%rax + mulq 232(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + +/NO_APP + movq %r12, -312(%rbp) + movq %r9, %r13 +/APP + movq (%rcx),%rax + mulq 240(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %r8, %r11 + movq %rdi, %rdx +/APP + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r9 + movq %rax, %rdx + movq %r13, %r12 + movq %r11, %r8 + movq %rdx, %r13 + movq %r9, %rdi +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, -304(%rbp) + movq %r13, %rbx + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + +/NO_APP + movq %r12, -296(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 16(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %r11 + movq %rdi, %rax +/APP + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r9 + movq %r11, %r8 +/APP + movq 128(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, %rdi + movq %rbx, -288(%rbp) + movq %r13, %r9 +/APP + movq 16(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 24(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -280(%rbp) + movq %r10, %r12 +/APP + movq 24(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 32(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 136(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -272(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 32(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 40(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -264(%rbp) +/APP + movq 40(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 48(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 144(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -256(%rbp) + movq %r13, %r9 +/APP + movq 48(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 56(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + +/NO_APP + movq %r12, -248(%rbp) + movq %r11, %r13 +/APP + movq 56(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 64(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %rsi, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + +/NO_APP + movq %r12, %r11 + movq %rdx, %r8 + movq %rax, %rdx + movq %r13, %r12 + movq %rbx, %rdi + movq %rdx, %r13 + movq %r11, %rsi +/APP + movq 152(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -240(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 64(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -232(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 72(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 80(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 160(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -224(%rbp) + movq %r13, %r9 +/APP + movq 80(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -216(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 96(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 168(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -208(%rbp) + movq %r13, %r9 +/APP + movq 96(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -200(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 112(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 176(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -192(%rbp) + movq %r13, %r9 +/APP + movq 112(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 128(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 176(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 184(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -176(%rbp) + movq %r13, %r9 +/APP + movq 128(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 144(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %rax +/APP + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %rdi + movq %r10, %rbx +/APP + movq 192(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -160(%rbp) + movq %r13, %r9 +/APP + movq 144(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 152(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 152(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 160(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rax + movq %r13, %rdi + movq %r12, %rsi + movq %rax, %r11 + movq %r10, %r12 +/APP + movq 200(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -144(%rbp) + movq %r11, %r9 +/APP + movq 160(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 168(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 168(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 176(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 208(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -128(%rbp) + movq %r11, %r9 +/APP + movq 176(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 184(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r10, %rbx +/APP + movq 184(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 192(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r10, %r12 +/APP + movq 216(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -112(%rbp) + movq %r11, %r9 +/APP + movq 192(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 200(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 200(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 208(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi + movq %r10, %r12 +/APP + movq 224(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -96(%rbp) + movq %r10, %r9 +/APP + movq 208(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 216(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 224(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %rax +/APP + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + +/NO_APP + movq %rbx, -88(%rbp) + movq %r11, %rsi + movq %r9, %r8 +/APP + movq 216(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r12, %r11 +/APP + movq 224(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r13 + movq %r11, %rbx +/APP + movq 232(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %rsi, -80(%rbp) + movq %rbx, %r12 + movq %r13, %rdi + movq %r10, %r13 +/APP + movq 224(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %r9 + movq %r13, %r12 +/APP + movq 232(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %rdi, -72(%rbp) + movq %r9, %r11 + movq %r12, %rbx + movq %r10, %r9 +/APP + movq 232(%rcx),%rax + mulq 248(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rbx, %r13 + movq %r9, %rbx + movq %r10, %r9 +/APP + movq 240(%rcx),%rax + mulq %rax + addq %rax,%r11 + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %r11, -64(%rbp) + movq %r13, %rdi + movq %rbx, %rsi +/APP + movq 240(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + +/NO_APP + movq %rdi, -56(%rbp) + movq %r9, %r8 +/APP + movq 248(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r10 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -544(%rbp), %rsi + movl $512, %edx + movq %r8, -40(%rbp) + movl $64, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L304 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L302 + movl %ecx, %edx + .align 16 +.L303: + testl %edx, %edx + movl %edx, %ecx + je .L307 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L303 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L302: + testl %edx, %edx + je .L304 + movl (%r14), %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L307: + movl %edx, 8(%r14) + .align 16 +.L304: + xorl %eax, %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE9: + .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32 diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h new file mode 100644 index 0000000000..5be4da4bf2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h @@ -0,0 +1,73 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_PRIV_H_ +#define _MP_GF2M_PRIV_H_ + +#include "mpi-priv.h" + +extern const mp_digit mp_gf2m_sqr_tb[16]; + +#if defined(MP_USE_UINT_DIGIT) +#define MP_DIGIT_BITS 32 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 5 +#define MP_DIGIT_BITS_MASK 0x1f +#else +#define MP_DIGIT_BITS 64 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 6 +#define MP_DIGIT_BITS_MASK 0x3f +#endif + +/* Platform-specific macros for fast binary polynomial squaring. */ +#if MP_DIGIT_BITS == 32 +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#else +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#endif + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b); + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0); + +#endif /* _MP_GF2M_PRIV_H_ */ diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c new file mode 100644 index 0000000000..878b7cae8c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.c @@ -0,0 +1,677 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mp_gf2m.h" +#include "mp_gf2m-priv.h" +#include "mplogic.h" +#include "mpi-priv.h" + +const mp_digit mp_gf2m_sqr_tb[16] = { + 0, 1, 4, 5, 16, 17, 20, 21, + 64, 65, 68, 69, 80, 81, 84, 85 +}; + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +#if MP_DIGIT_BITS == 32 +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[8], top2b = a >> 30; + register mp_digit a1, a2, a4; + + a1 = a & (0x3FFFFFFF); + a2 = a1 << 1; + a4 = a2 << 1; + + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + + s = tab[b & 0x7]; + l = s; + s = tab[b >> 3 & 0x7]; + l ^= s << 3; + h = s >> 29; + s = tab[b >> 6 & 0x7]; + l ^= s << 6; + h ^= s >> 26; + s = tab[b >> 9 & 0x7]; + l ^= s << 9; + h ^= s >> 23; + s = tab[b >> 12 & 0x7]; + l ^= s << 12; + h ^= s >> 20; + s = tab[b >> 15 & 0x7]; + l ^= s << 15; + h ^= s >> 17; + s = tab[b >> 18 & 0x7]; + l ^= s << 18; + h ^= s >> 14; + s = tab[b >> 21 & 0x7]; + l ^= s << 21; + h ^= s >> 11; + s = tab[b >> 24 & 0x7]; + l ^= s << 24; + h ^= s >> 8; + s = tab[b >> 27 & 0x7]; + l ^= s << 27; + h ^= s >> 5; + s = tab[b >> 30]; + l ^= s << 30; + h ^= s >> 2; + + /* compensate for the top two bits of a */ + + if (top2b & 01) { + l ^= b << 30; + h ^= b >> 2; + } + if (top2b & 02) { + l ^= b << 31; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#else +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[16], top3b = a >> 61; + register mp_digit a1, a2, a4, a8; + + a1 = a & (0x1FFFFFFFFFFFFFFFULL); + a2 = a1 << 1; + a4 = a2 << 1; + a8 = a4 << 1; + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + tab[8] = a8; + tab[9] = a1 ^ a8; + tab[10] = a2 ^ a8; + tab[11] = a1 ^ a2 ^ a8; + tab[12] = a4 ^ a8; + tab[13] = a1 ^ a4 ^ a8; + tab[14] = a2 ^ a4 ^ a8; + tab[15] = a1 ^ a2 ^ a4 ^ a8; + + s = tab[b & 0xF]; + l = s; + s = tab[b >> 4 & 0xF]; + l ^= s << 4; + h = s >> 60; + s = tab[b >> 8 & 0xF]; + l ^= s << 8; + h ^= s >> 56; + s = tab[b >> 12 & 0xF]; + l ^= s << 12; + h ^= s >> 52; + s = tab[b >> 16 & 0xF]; + l ^= s << 16; + h ^= s >> 48; + s = tab[b >> 20 & 0xF]; + l ^= s << 20; + h ^= s >> 44; + s = tab[b >> 24 & 0xF]; + l ^= s << 24; + h ^= s >> 40; + s = tab[b >> 28 & 0xF]; + l ^= s << 28; + h ^= s >> 36; + s = tab[b >> 32 & 0xF]; + l ^= s << 32; + h ^= s >> 32; + s = tab[b >> 36 & 0xF]; + l ^= s << 36; + h ^= s >> 28; + s = tab[b >> 40 & 0xF]; + l ^= s << 40; + h ^= s >> 24; + s = tab[b >> 44 & 0xF]; + l ^= s << 44; + h ^= s >> 20; + s = tab[b >> 48 & 0xF]; + l ^= s << 48; + h ^= s >> 16; + s = tab[b >> 52 & 0xF]; + l ^= s << 52; + h ^= s >> 12; + s = tab[b >> 56 & 0xF]; + l ^= s << 56; + h ^= s >> 8; + s = tab[b >> 60]; + l ^= s << 60; + h ^= s >> 4; + + /* compensate for the top three bits of a */ + + if (top3b & 01) { + l ^= b << 61; + h ^= b >> 3; + } + if (top3b & 02) { + l ^= b << 62; + h ^= b >> 2; + } + if (top3b & 04) { + l ^= b << 63; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#endif + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0) +{ + mp_digit m1, m0; + /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */ + s_bmul_1x1(r + 3, r + 2, a1, b1); + s_bmul_1x1(r + 1, r, a0, b0); + s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1); + /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */ + r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ + r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ +} + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */ + s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3]; + zm[2] ^= r[2]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */ + s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3] ^ r[7]; + zm[2] ^= r[2] ^ r[6]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute addition of two binary polynomials a and b, + * store result in c; c could be a or b, a and b could be equal; + * c is the bitwise XOR of a and b. + */ +mp_err +mp_badd(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; + mp_size ix; + mp_size used_pa, used_pb; + mp_err res = MP_OKAY; + + /* Add all digits up to the precision of b. If b had more + * precision than a initially, swap a, b first + */ + if (MP_USED(a) >= MP_USED(b)) { + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used_pa = MP_USED(a); + used_pb = MP_USED(b); + } else { + pa = MP_DIGITS(b); + pb = MP_DIGITS(a); + used_pa = MP_USED(b); + used_pb = MP_USED(a); + } + + /* Make sure c has enough precision for the output value */ + MP_CHECKOK(s_mp_pad(c, used_pa)); + + /* Do word-by-word xor */ + pc = MP_DIGITS(c); + for (ix = 0; ix < used_pb; ix++) { + (*pc++) = (*pa++) ^ (*pb++); + } + + /* Finish the rest of digits until we're actually done */ + for (; ix < used_pa; ++ix) { + *pc++ = *pa++; + } + + MP_USED(c) = used_pa; + MP_SIGN(c) = ZPOS; + s_mp_clamp(c); + +CLEANUP: + return res; +} + +#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1)); + +/* Compute binary polynomial multiply d = a * b */ +static void +s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ = a0b0 ^ carry; + carry = a1b1; + } + *d = carry; +} + +/* Compute binary polynomial xor multiply accumulate d ^= a * b */ +static void +s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ ^= a0b0 ^ carry; + carry = a1b1; + } + *d ^= carry; +} + +/* Compute binary polynomial xor multiply c = a * b. + * All parameters may be identical. + */ +mp_err +mp_bmul(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pb, b_i; + mp_int tmp; + mp_size ib, a_used, b_used; + mp_err res = MP_OKAY; + + MP_DIGITS(&tmp) = 0; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + MP_CHECKOK(mp_init_copy(&tmp, b)); + b = &tmp; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b if b longer */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b))); + + pb = MP_DIGITS(b); + s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + a_used = MP_USED(a); + b_used = MP_USED(b); + MP_USED(c) = a_used + b_used; + for (ib = 1; ib < b_used; ib++) { + b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + a_used) = b_i; + } + + s_mp_clamp(c); + + SIGN(c) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute modular reduction of a and store result in r. + * r could be a. + * For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err +mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + int j, k; + int n, dN, d0, d1; + mp_digit zz, *z, tmp; + mp_size used; + mp_err res = MP_OKAY; + + /* The algorithm does the reduction in place in r, + * if a != r, copy a into r first so reduction can be done in r + */ + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + z = MP_DIGITS(r); + + /* start reduction */ + /*dN = p[0] / MP_DIGIT_BITS; */ + dN = p[0] >> MP_DIGIT_BITS_LOG_2; + used = MP_USED(r); + + for (j = used - 1; j > dN;) { + + zz = z[j]; + if (zz == 0) { + j--; + continue; + } + z[j] = 0; + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k] */ + n = p[0] - p[k]; + /*d0 = n % MP_DIGIT_BITS; */ + d0 = n & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + /*n /= MP_DIGIT_BITS; */ + n >>= MP_DIGIT_BITS_LOG_2; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* reducing component t^0 */ + n = dN; + /*d0 = p[0] % MP_DIGIT_BITS;*/ + d0 = p[0] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* final round of reduction */ + while (j == dN) { + + /* d0 = p[0] % MP_DIGIT_BITS; */ + d0 = p[0] & MP_DIGIT_BITS_MASK; + zz = z[dN] >> d0; + if (zz == 0) + break; + d1 = MP_DIGIT_BITS - d0; + + /* clear up the top d1 bits */ + if (d0) { + z[dN] = (z[dN] << d1) >> d1; + } else { + z[dN] = 0; + } + *z ^= zz; /* reduction t^0 component */ + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k]*/ + /* n = p[k] / MP_DIGIT_BITS; */ + n = p[k] >> MP_DIGIT_BITS_LOG_2; + /* d0 = p[k] % MP_DIGIT_BITS; */ + d0 = p[k] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[n] ^= (zz << d0); + tmp = zz >> d1; + if (d0 && tmp) + z[n + 1] ^= tmp; + } + } + + s_mp_clamp(r); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p, + * Store the result in r. r could be a or b; a could be b. + */ +mp_err +mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r) +{ + mp_err res; + + if (a == b) + return mp_bsqrmod(a, p, r); + if ((res = mp_bmul(a, b, r)) != MP_OKAY) + return res; + return mp_bmod(r, p, r); +} + +/* Compute binary polynomial squaring c = a*a mod p . + * Parameter r and a can be identical. + */ + +mp_err +mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + mp_digit *pa, *pr, a_i; + mp_int tmp; + mp_size ia, a_used; + mp_err res; + + ARGCHK(a != NULL && r != NULL, MP_BADARG); + MP_DIGITS(&tmp) = 0; + + if (a == r) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + a = &tmp; + } + + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + MP_CHECKOK(s_mp_pad(r, 2 * USED(a))); + + pa = MP_DIGITS(a); + pr = MP_DIGITS(r); + a_used = MP_USED(a); + MP_USED(r) = 2 * a_used; + + for (ia = 0; ia < a_used; ia++) { + a_i = *pa++; + *pr++ = gf2m_SQR0(a_i); + *pr++ = gf2m_SQR1(a_i); + } + + MP_CHECKOK(mp_bmod(r, p, r)); + s_mp_clamp(r); + SIGN(r) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p. + * Store the result in r. r could be x or y, and x could equal y. + * Uses algorithm Modular_Division_GF(2^m) from + * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to + * the Great Divide". + */ +int +mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r) +{ + mp_int aa, bb, uu; + mp_int *a, *b, *u, *v; + mp_err res = MP_OKAY; + + MP_DIGITS(&aa) = 0; + MP_DIGITS(&bb) = 0; + MP_DIGITS(&uu) = 0; + + MP_CHECKOK(mp_init_copy(&aa, x)); + MP_CHECKOK(mp_init_copy(&uu, y)); + MP_CHECKOK(mp_init_copy(&bb, pp)); + MP_CHECKOK(s_mp_pad(r, USED(pp))); + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + + a = &aa; + b = &bb; + u = &uu; + v = r; + /* reduce x and y mod p */ + MP_CHECKOK(mp_bmod(a, p, a)); + MP_CHECKOK(mp_bmod(u, p, u)); + + while (!mp_isodd(a)) { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } + + do { + if (mp_cmp_mag(b, a) > 0) { + MP_CHECKOK(mp_badd(b, a, b)); + MP_CHECKOK(mp_badd(v, u, v)); + do { + s_mp_div2(b); + if (mp_isodd(v)) { + MP_CHECKOK(mp_badd(v, pp, v)); + } + s_mp_div2(v); + } while (!mp_isodd(b)); + } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1)) + break; + else { + MP_CHECKOK(mp_badd(a, b, a)); + MP_CHECKOK(mp_badd(u, v, u)); + do { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } while (!mp_isodd(a)); + } + } while (1); + + MP_CHECKOK(mp_copy(u, r)); + +CLEANUP: + mp_clear(&aa); + mp_clear(&bb); + mp_clear(&uu); + return res; +} + +/* Convert the bit-string representation of a polynomial a into an array + * of integers corresponding to the bits with non-zero coefficient. + * Up to max elements of the array will be filled. Return value is total + * number of coefficients that would be extracted if array was large enough. + */ +int +mp_bpoly2arr(const mp_int *a, unsigned int p[], int max) +{ + int i, j, k; + mp_digit top_bit, mask; + + top_bit = 1; + top_bit <<= MP_DIGIT_BIT - 1; + + for (k = 0; k < max; k++) + p[k] = 0; + k = 0; + + for (i = MP_USED(a) - 1; i >= 0; i--) { + mask = top_bit; + for (j = MP_DIGIT_BIT - 1; j >= 0; j--) { + if (MP_DIGITS(a)[i] & mask) { + if (k < max) + p[k] = MP_DIGIT_BIT * i + j; + k++; + } + mask >>= 1; + } + } + + return k; +} + +/* Convert the coefficient array representation of a polynomial to a + * bit-string. The array must be terminated by 0. + */ +mp_err +mp_barr2poly(const unsigned int p[], mp_int *a) +{ + + mp_err res = MP_OKAY; + int i; + + mp_zero(a); + for (i = 0; p[i] > 0; i++) { + MP_CHECKOK(mpl_set_bit(a, p[i], 1)); + } + MP_CHECKOK(mpl_set_bit(a, 0, 1)); + +CLEANUP: + return res; +} diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h new file mode 100644 index 0000000000..ed2c85493c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.h @@ -0,0 +1,28 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_H_ +#define _MP_GF2M_H_ + +#include "mpi.h" + +mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c); + +/* For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], + mp_int *r); +mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r); + +int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max); +mp_err mp_barr2poly(const unsigned int p[], mp_int *a); + +#endif /* _MP_GF2M_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c new file mode 100644 index 0000000000..ddc21ec1cb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache.c @@ -0,0 +1,788 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "prtypes.h" + +/* + * This file implements a single function: s_mpi_getProcessorLineSize(); + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * Currently the file returns good data for most modern x86 processors, and + * reasonable data on 64-bit ppc processors. All other processors are assumed + * to have a cache line size of 32 bytes. + * + */ + +#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +/* X86 processors have special instructions that tell us about the cache */ +#include "string.h" + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define AMD_64 1 +#endif + +/* Generic CPUID function */ +#if defined(AMD_64) + +#if defined(__GNUC__) + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + __asm__("xor %%ecx, %%ecx\n\t" + "cpuid\n\t" + : "=a"(*eax), + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +#elif defined(_MSC_VER) + +#include + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + int intrinsic_out[4]; + + __cpuid(intrinsic_out, op); + *eax = intrinsic_out[0]; + *ebx = intrinsic_out[1]; + *ecx = intrinsic_out[2]; + *edx = intrinsic_out[3]; +} + +#endif + +#else /* !defined(AMD_64) */ + +/* x86 */ + +#if defined(__GNUC__) +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + /* Some older processors don't fill the ecx register with cpuid, so clobber it + * before calling cpuid, so that there's no risk of picking random bits that + * erroneously indicate that absent CPU features are present. + * Also, GCC isn't smart enough to save the ebx PIC register on its own + * in this case, so do it by hand. Use edi to store ebx and pass the + * value returned in ebx from cpuid through edi. */ + __asm__("xor %%ecx, %%ecx\n\t" + "mov %%ebx,%%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx,%%edi\n\t" + : "=a"(*eax), + "=D"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +/* + * try flipping a processor flag to determine CPU type + */ +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm__("pushfl\n\t" /* get the flags */ + "popl %0\n\t" + "movl %0,%1\n\t" /* save the original flags */ + "xorl %2,%0\n\t" /* flip the bit */ + "pushl %0\n\t" /* set the flags */ + "popfl\n\t" + "pushfl\n\t" /* get the flags again (for return) */ + "popl %0\n\t" + "pushl %1\n\t" /* restore the original flags */ + "popfl\n\t" + : "=r"(changedFlags), + "=r"(originalFlags), + "=r"(flag) + : "2"(flag)); + return changedFlags ^ originalFlags; +} + +#elif defined(_MSC_VER) + +/* + * windows versions of the above assembler + */ +#define wcpuid __asm __emit 0fh __asm __emit 0a2h +void +freebl_cpuid(unsigned long op, unsigned long *Reax, + unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) +{ + unsigned long Leax, Lebx, Lecx, Ledx; + __asm { + pushad + xor ecx,ecx + mov eax,op + wcpuid + mov Leax,eax + mov Lebx,ebx + mov Lecx,ecx + mov Ledx,edx + popad + } + *Reax = Leax; + *Rebx = Lebx; + *Recx = Lecx; + *Redx = Ledx; +} + +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm { + push eax + push ebx + pushfd /* get the flags */ + pop eax + push eax /* save the flags on the stack */ + mov originalFlags,eax /* save the original flags */ + mov ebx,flag + xor eax,ebx /* flip the bit */ + push eax /* set the flags */ + popfd + pushfd /* get the flags again (for return) */ + pop eax + popfd /* restore the original flags */ + mov changedFlags,eax + pop ebx + pop eax + } + return changedFlags ^ originalFlags; +} +#endif + +#endif + +#if !defined(AMD_64) +#define AC_FLAG 0x40000 +#define ID_FLAG 0x200000 + +/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ +static int +is386() +{ + return changeFlag(AC_FLAG) == 0; +} + +/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ +static int +is486() +{ + return changeFlag(ID_FLAG) == 0; +} +#endif + +/* + * table for Intel Cache. + * See Intel Application Note AP-485 for more information + */ + +typedef unsigned char CacheTypeEntry; + +typedef enum { + Cache_NONE = 0, + Cache_UNKNOWN = 1, + Cache_TLB = 2, + Cache_TLBi = 3, + Cache_TLBd = 4, + Cache_Trace = 5, + Cache_L1 = 6, + Cache_L1i = 7, + Cache_L1d = 8, + Cache_L2 = 9, + Cache_L2i = 10, + Cache_L2d = 11, + Cache_L3 = 12, + Cache_L3i = 13, + Cache_L3d = 14 +} CacheType; + +struct _cache { + CacheTypeEntry type; + unsigned char lineSize; +}; +static const struct _cache CacheMap[256] = { + /* 00 */ { Cache_NONE, 0 }, + /* 01 */ { Cache_TLBi, 0 }, + /* 02 */ { Cache_TLBi, 0 }, + /* 03 */ { Cache_TLBd, 0 }, + /* 04 */ { + Cache_TLBd, + }, + /* 05 */ { Cache_UNKNOWN, 0 }, + /* 06 */ { Cache_L1i, 32 }, + /* 07 */ { Cache_UNKNOWN, 0 }, + /* 08 */ { Cache_L1i, 32 }, + /* 09 */ { Cache_UNKNOWN, 0 }, + /* 0a */ { Cache_L1d, 32 }, + /* 0b */ { Cache_UNKNOWN, 0 }, + /* 0c */ { Cache_L1d, 32 }, + /* 0d */ { Cache_UNKNOWN, 0 }, + /* 0e */ { Cache_UNKNOWN, 0 }, + /* 0f */ { Cache_UNKNOWN, 0 }, + /* 10 */ { Cache_UNKNOWN, 0 }, + /* 11 */ { Cache_UNKNOWN, 0 }, + /* 12 */ { Cache_UNKNOWN, 0 }, + /* 13 */ { Cache_UNKNOWN, 0 }, + /* 14 */ { Cache_UNKNOWN, 0 }, + /* 15 */ { Cache_UNKNOWN, 0 }, + /* 16 */ { Cache_UNKNOWN, 0 }, + /* 17 */ { Cache_UNKNOWN, 0 }, + /* 18 */ { Cache_UNKNOWN, 0 }, + /* 19 */ { Cache_UNKNOWN, 0 }, + /* 1a */ { Cache_UNKNOWN, 0 }, + /* 1b */ { Cache_UNKNOWN, 0 }, + /* 1c */ { Cache_UNKNOWN, 0 }, + /* 1d */ { Cache_UNKNOWN, 0 }, + /* 1e */ { Cache_UNKNOWN, 0 }, + /* 1f */ { Cache_UNKNOWN, 0 }, + /* 20 */ { Cache_UNKNOWN, 0 }, + /* 21 */ { Cache_UNKNOWN, 0 }, + /* 22 */ { Cache_L3, 64 }, + /* 23 */ { Cache_L3, 64 }, + /* 24 */ { Cache_UNKNOWN, 0 }, + /* 25 */ { Cache_L3, 64 }, + /* 26 */ { Cache_UNKNOWN, 0 }, + /* 27 */ { Cache_UNKNOWN, 0 }, + /* 28 */ { Cache_UNKNOWN, 0 }, + /* 29 */ { Cache_L3, 64 }, + /* 2a */ { Cache_UNKNOWN, 0 }, + /* 2b */ { Cache_UNKNOWN, 0 }, + /* 2c */ { Cache_L1d, 64 }, + /* 2d */ { Cache_UNKNOWN, 0 }, + /* 2e */ { Cache_UNKNOWN, 0 }, + /* 2f */ { Cache_UNKNOWN, 0 }, + /* 30 */ { Cache_L1i, 64 }, + /* 31 */ { Cache_UNKNOWN, 0 }, + /* 32 */ { Cache_UNKNOWN, 0 }, + /* 33 */ { Cache_UNKNOWN, 0 }, + /* 34 */ { Cache_UNKNOWN, 0 }, + /* 35 */ { Cache_UNKNOWN, 0 }, + /* 36 */ { Cache_UNKNOWN, 0 }, + /* 37 */ { Cache_UNKNOWN, 0 }, + /* 38 */ { Cache_UNKNOWN, 0 }, + /* 39 */ { Cache_L2, 64 }, + /* 3a */ { Cache_UNKNOWN, 0 }, + /* 3b */ { Cache_L2, 64 }, + /* 3c */ { Cache_L2, 64 }, + /* 3d */ { Cache_UNKNOWN, 0 }, + /* 3e */ { Cache_UNKNOWN, 0 }, + /* 3f */ { Cache_UNKNOWN, 0 }, + /* 40 */ { Cache_L2, 0 }, + /* 41 */ { Cache_L2, 32 }, + /* 42 */ { Cache_L2, 32 }, + /* 43 */ { Cache_L2, 32 }, + /* 44 */ { Cache_L2, 32 }, + /* 45 */ { Cache_L2, 32 }, + /* 46 */ { Cache_UNKNOWN, 0 }, + /* 47 */ { Cache_UNKNOWN, 0 }, + /* 48 */ { Cache_UNKNOWN, 0 }, + /* 49 */ { Cache_UNKNOWN, 0 }, + /* 4a */ { Cache_UNKNOWN, 0 }, + /* 4b */ { Cache_UNKNOWN, 0 }, + /* 4c */ { Cache_UNKNOWN, 0 }, + /* 4d */ { Cache_UNKNOWN, 0 }, + /* 4e */ { Cache_UNKNOWN, 0 }, + /* 4f */ { Cache_UNKNOWN, 0 }, + /* 50 */ { Cache_TLBi, 0 }, + /* 51 */ { Cache_TLBi, 0 }, + /* 52 */ { Cache_TLBi, 0 }, + /* 53 */ { Cache_UNKNOWN, 0 }, + /* 54 */ { Cache_UNKNOWN, 0 }, + /* 55 */ { Cache_UNKNOWN, 0 }, + /* 56 */ { Cache_UNKNOWN, 0 }, + /* 57 */ { Cache_UNKNOWN, 0 }, + /* 58 */ { Cache_UNKNOWN, 0 }, + /* 59 */ { Cache_UNKNOWN, 0 }, + /* 5a */ { Cache_UNKNOWN, 0 }, + /* 5b */ { Cache_TLBd, 0 }, + /* 5c */ { Cache_TLBd, 0 }, + /* 5d */ { Cache_TLBd, 0 }, + /* 5e */ { Cache_UNKNOWN, 0 }, + /* 5f */ { Cache_UNKNOWN, 0 }, + /* 60 */ { Cache_UNKNOWN, 0 }, + /* 61 */ { Cache_UNKNOWN, 0 }, + /* 62 */ { Cache_UNKNOWN, 0 }, + /* 63 */ { Cache_UNKNOWN, 0 }, + /* 64 */ { Cache_UNKNOWN, 0 }, + /* 65 */ { Cache_UNKNOWN, 0 }, + /* 66 */ { Cache_L1d, 64 }, + /* 67 */ { Cache_L1d, 64 }, + /* 68 */ { Cache_L1d, 64 }, + /* 69 */ { Cache_UNKNOWN, 0 }, + /* 6a */ { Cache_UNKNOWN, 0 }, + /* 6b */ { Cache_UNKNOWN, 0 }, + /* 6c */ { Cache_UNKNOWN, 0 }, + /* 6d */ { Cache_UNKNOWN, 0 }, + /* 6e */ { Cache_UNKNOWN, 0 }, + /* 6f */ { Cache_UNKNOWN, 0 }, + /* 70 */ { Cache_Trace, 1 }, + /* 71 */ { Cache_Trace, 1 }, + /* 72 */ { Cache_Trace, 1 }, + /* 73 */ { Cache_UNKNOWN, 0 }, + /* 74 */ { Cache_UNKNOWN, 0 }, + /* 75 */ { Cache_UNKNOWN, 0 }, + /* 76 */ { Cache_UNKNOWN, 0 }, + /* 77 */ { Cache_UNKNOWN, 0 }, + /* 78 */ { Cache_UNKNOWN, 0 }, + /* 79 */ { Cache_L2, 64 }, + /* 7a */ { Cache_L2, 64 }, + /* 7b */ { Cache_L2, 64 }, + /* 7c */ { Cache_L2, 64 }, + /* 7d */ { Cache_UNKNOWN, 0 }, + /* 7e */ { Cache_UNKNOWN, 0 }, + /* 7f */ { Cache_UNKNOWN, 0 }, + /* 80 */ { Cache_UNKNOWN, 0 }, + /* 81 */ { Cache_UNKNOWN, 0 }, + /* 82 */ { Cache_L2, 32 }, + /* 83 */ { Cache_L2, 32 }, + /* 84 */ { Cache_L2, 32 }, + /* 85 */ { Cache_L2, 32 }, + /* 86 */ { Cache_L2, 64 }, + /* 87 */ { Cache_L2, 64 }, + /* 88 */ { Cache_UNKNOWN, 0 }, + /* 89 */ { Cache_UNKNOWN, 0 }, + /* 8a */ { Cache_UNKNOWN, 0 }, + /* 8b */ { Cache_UNKNOWN, 0 }, + /* 8c */ { Cache_UNKNOWN, 0 }, + /* 8d */ { Cache_UNKNOWN, 0 }, + /* 8e */ { Cache_UNKNOWN, 0 }, + /* 8f */ { Cache_UNKNOWN, 0 }, + /* 90 */ { Cache_UNKNOWN, 0 }, + /* 91 */ { Cache_UNKNOWN, 0 }, + /* 92 */ { Cache_UNKNOWN, 0 }, + /* 93 */ { Cache_UNKNOWN, 0 }, + /* 94 */ { Cache_UNKNOWN, 0 }, + /* 95 */ { Cache_UNKNOWN, 0 }, + /* 96 */ { Cache_UNKNOWN, 0 }, + /* 97 */ { Cache_UNKNOWN, 0 }, + /* 98 */ { Cache_UNKNOWN, 0 }, + /* 99 */ { Cache_UNKNOWN, 0 }, + /* 9a */ { Cache_UNKNOWN, 0 }, + /* 9b */ { Cache_UNKNOWN, 0 }, + /* 9c */ { Cache_UNKNOWN, 0 }, + /* 9d */ { Cache_UNKNOWN, 0 }, + /* 9e */ { Cache_UNKNOWN, 0 }, + /* 9f */ { Cache_UNKNOWN, 0 }, + /* a0 */ { Cache_UNKNOWN, 0 }, + /* a1 */ { Cache_UNKNOWN, 0 }, + /* a2 */ { Cache_UNKNOWN, 0 }, + /* a3 */ { Cache_UNKNOWN, 0 }, + /* a4 */ { Cache_UNKNOWN, 0 }, + /* a5 */ { Cache_UNKNOWN, 0 }, + /* a6 */ { Cache_UNKNOWN, 0 }, + /* a7 */ { Cache_UNKNOWN, 0 }, + /* a8 */ { Cache_UNKNOWN, 0 }, + /* a9 */ { Cache_UNKNOWN, 0 }, + /* aa */ { Cache_UNKNOWN, 0 }, + /* ab */ { Cache_UNKNOWN, 0 }, + /* ac */ { Cache_UNKNOWN, 0 }, + /* ad */ { Cache_UNKNOWN, 0 }, + /* ae */ { Cache_UNKNOWN, 0 }, + /* af */ { Cache_UNKNOWN, 0 }, + /* b0 */ { Cache_TLBi, 0 }, + /* b1 */ { Cache_UNKNOWN, 0 }, + /* b2 */ { Cache_UNKNOWN, 0 }, + /* b3 */ { Cache_TLBd, 0 }, + /* b4 */ { Cache_UNKNOWN, 0 }, + /* b5 */ { Cache_UNKNOWN, 0 }, + /* b6 */ { Cache_UNKNOWN, 0 }, + /* b7 */ { Cache_UNKNOWN, 0 }, + /* b8 */ { Cache_UNKNOWN, 0 }, + /* b9 */ { Cache_UNKNOWN, 0 }, + /* ba */ { Cache_UNKNOWN, 0 }, + /* bb */ { Cache_UNKNOWN, 0 }, + /* bc */ { Cache_UNKNOWN, 0 }, + /* bd */ { Cache_UNKNOWN, 0 }, + /* be */ { Cache_UNKNOWN, 0 }, + /* bf */ { Cache_UNKNOWN, 0 }, + /* c0 */ { Cache_UNKNOWN, 0 }, + /* c1 */ { Cache_UNKNOWN, 0 }, + /* c2 */ { Cache_UNKNOWN, 0 }, + /* c3 */ { Cache_UNKNOWN, 0 }, + /* c4 */ { Cache_UNKNOWN, 0 }, + /* c5 */ { Cache_UNKNOWN, 0 }, + /* c6 */ { Cache_UNKNOWN, 0 }, + /* c7 */ { Cache_UNKNOWN, 0 }, + /* c8 */ { Cache_UNKNOWN, 0 }, + /* c9 */ { Cache_UNKNOWN, 0 }, + /* ca */ { Cache_UNKNOWN, 0 }, + /* cb */ { Cache_UNKNOWN, 0 }, + /* cc */ { Cache_UNKNOWN, 0 }, + /* cd */ { Cache_UNKNOWN, 0 }, + /* ce */ { Cache_UNKNOWN, 0 }, + /* cf */ { Cache_UNKNOWN, 0 }, + /* d0 */ { Cache_UNKNOWN, 0 }, + /* d1 */ { Cache_UNKNOWN, 0 }, + /* d2 */ { Cache_UNKNOWN, 0 }, + /* d3 */ { Cache_UNKNOWN, 0 }, + /* d4 */ { Cache_UNKNOWN, 0 }, + /* d5 */ { Cache_UNKNOWN, 0 }, + /* d6 */ { Cache_UNKNOWN, 0 }, + /* d7 */ { Cache_UNKNOWN, 0 }, + /* d8 */ { Cache_UNKNOWN, 0 }, + /* d9 */ { Cache_UNKNOWN, 0 }, + /* da */ { Cache_UNKNOWN, 0 }, + /* db */ { Cache_UNKNOWN, 0 }, + /* dc */ { Cache_UNKNOWN, 0 }, + /* dd */ { Cache_UNKNOWN, 0 }, + /* de */ { Cache_UNKNOWN, 0 }, + /* df */ { Cache_UNKNOWN, 0 }, + /* e0 */ { Cache_UNKNOWN, 0 }, + /* e1 */ { Cache_UNKNOWN, 0 }, + /* e2 */ { Cache_UNKNOWN, 0 }, + /* e3 */ { Cache_UNKNOWN, 0 }, + /* e4 */ { Cache_UNKNOWN, 0 }, + /* e5 */ { Cache_UNKNOWN, 0 }, + /* e6 */ { Cache_UNKNOWN, 0 }, + /* e7 */ { Cache_UNKNOWN, 0 }, + /* e8 */ { Cache_UNKNOWN, 0 }, + /* e9 */ { Cache_UNKNOWN, 0 }, + /* ea */ { Cache_UNKNOWN, 0 }, + /* eb */ { Cache_UNKNOWN, 0 }, + /* ec */ { Cache_UNKNOWN, 0 }, + /* ed */ { Cache_UNKNOWN, 0 }, + /* ee */ { Cache_UNKNOWN, 0 }, + /* ef */ { Cache_UNKNOWN, 0 }, + /* f0 */ { Cache_UNKNOWN, 0 }, + /* f1 */ { Cache_UNKNOWN, 0 }, + /* f2 */ { Cache_UNKNOWN, 0 }, + /* f3 */ { Cache_UNKNOWN, 0 }, + /* f4 */ { Cache_UNKNOWN, 0 }, + /* f5 */ { Cache_UNKNOWN, 0 }, + /* f6 */ { Cache_UNKNOWN, 0 }, + /* f7 */ { Cache_UNKNOWN, 0 }, + /* f8 */ { Cache_UNKNOWN, 0 }, + /* f9 */ { Cache_UNKNOWN, 0 }, + /* fa */ { Cache_UNKNOWN, 0 }, + /* fb */ { Cache_UNKNOWN, 0 }, + /* fc */ { Cache_UNKNOWN, 0 }, + /* fd */ { Cache_UNKNOWN, 0 }, + /* fe */ { Cache_UNKNOWN, 0 }, + /* ff */ { Cache_UNKNOWN, 0 } +}; + +/* + * use the above table to determine the CacheEntryLineSize. + */ +static void +getIntelCacheEntryLineSize(unsigned long val, int *level, + unsigned long *lineSize) +{ + CacheType type; + + type = CacheMap[val].type; + /* only interested in data caches */ + /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. + * this data check has the side effect of rejecting that entry. If + * that wasn't the case, we could have to reject it explicitly */ + if (CacheMap[val].lineSize == 0) { + return; + } + /* look at the caches, skip types we aren't interested in. + * if we already have a value for a lower level cache, skip the + * current entry */ + if ((type == Cache_L1) || (type == Cache_L1d)) { + *level = 1; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { + *level = 2; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { + *level = 3; + *lineSize = CacheMap[val].lineSize; + } + return; +} + +static void +getIntelRegisterCacheLineSize(unsigned long val, + int *level, unsigned long *lineSize) +{ + getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val & 0xff, level, lineSize); +} + +/* + * returns '0' if no recognized cache is found, or if the cache + * information is supported by this processor + */ +static unsigned long +getIntelCacheLineSize(int cpuidLevel) +{ + int level = 4; + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + int repeat, count; + + if (cpuidLevel < 2) { + return 0; + } + + /* command '2' of the cpuid is intel's cache info call. Each byte of the + * 4 registers contain a potential descriptor for the cache. The CacheMap + * table maps the cache entry with the processor cache. Register 'al' + * contains a count value that cpuid '2' needs to be called in order to + * find all the cache descriptors. Only registers with the high bit set + * to 'zero' have valid descriptors. This code loops through all the + * required calls to cpuid '2' and passes any valid descriptors it finds + * to the getIntelRegisterCacheLineSize code, which breaks the registers + * down into their component descriptors. In the end the lineSize of the + * lowest level cache data cache is returned. */ + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + repeat = eax & 0xf; + for (count = 0; count < repeat; count++) { + if ((eax & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); + } + if ((ebx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ebx, &level, &lineSize); + } + if ((ecx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ecx, &level, &lineSize); + } + if ((edx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(edx, &level, &lineSize); + } + if (count + 1 != repeat) { + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + } + } + return lineSize; +} + +/* + * returns '0' if the cache info is not supported by this processor. + * This is based on the AMD extended cache commands for cpuid. + * (see "AMD Processor Recognition Application Note" Publication 20734). + * Some other processors use the identical scheme. + * (see "Processor Recognition, Transmeta Corporation"). + */ +static unsigned long +getOtherCacheLineSize(unsigned long cpuidLevel) +{ + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + + /* get the Extended CPUID level */ + freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + + if (cpuidLevel >= 0x80000005) { + freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ + } + return lineSize; +} + +static const char *const manMap[] = { +#define INTEL 0 + "GenuineIntel", +#define AMD 1 + "AuthenticAMD", +#define CYRIX 2 + "CyrixInstead", +#define CENTAUR 2 + "CentaurHauls", +#define NEXGEN 3 + "NexGenDriven", +#define TRANSMETA 4 + "GenuineTMx86", +#define RISE 5 + "RiseRiseRise", +#define UMC 6 + "UMC UMC UMC ", +#define SIS 7 + "Sis Sis Sis ", +#define NATIONAL 8 + "Geode by NSC", +}; + +static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); + +#define MAN_UNKNOWN 9 + +#if !defined(AMD_64) +#define SSE2_FLAG (1 << 26) +unsigned long +s_mpi_is_sse2() +{ + unsigned long eax, ebx, ecx, edx; + + if (is386() || is486()) { + return 0; + } + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + + /* has no SSE2 extensions */ + if (eax == 0) { + return 0; + } + + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + return (edx & SSE2_FLAG) == SSE2_FLAG; +} +#endif + +unsigned long +s_mpi_getProcessorLineSize() +{ + unsigned long eax, ebx, ecx, edx; + PRUint32 cpuid[3]; + unsigned long cpuidLevel; + unsigned long cacheLineSize = 0; + int manufacturer = MAN_UNKNOWN; + int i; + char string[13]; + +#if !defined(AMD_64) + if (is386()) { + return 0; /* 386 had no cache */ + } + if (is486()) { + return 32; /* really? need more info */ + } +#endif + + /* Pentium, cpuid command is available */ + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + /* string holds the CPU's manufacturer ID string - a twelve + * character ASCII string stored in ebx, edx, ecx, and + * the 32-bit extended feature flags are in edx, ecx. + */ + cpuid[0] = ebx; + cpuid[1] = ecx; + cpuid[2] = edx; + memcpy(string, cpuid, sizeof(cpuid)); + string[12] = 0; + + manufacturer = MAN_UNKNOWN; + for (i = 0; i < n_manufacturers; i++) { + if (strcmp(manMap[i], string) == 0) { + manufacturer = i; + } + } + + if (manufacturer == INTEL) { + cacheLineSize = getIntelCacheLineSize(cpuidLevel); + } else { + cacheLineSize = getOtherCacheLineSize(cpuidLevel); + } + /* doesn't support cache info based on cpuid. This means + * an old pentium class processor, which have cache lines of + * 32. If we learn differently, we can use a switch based on + * the Manufacturer id */ + if (cacheLineSize == 0) { + cacheLineSize = 32; + } + return cacheLineSize; +} +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +#if defined(__ppc64__) +/* + * Sigh, The PPC has some really nice features to help us determine cache + * size, since it had lots of direct control functions to do so. The POWER + * processor even has an instruction to do this, but it was dropped in + * PowerPC. Unfortunately most of them are not available in user mode. + * + * The dcbz function would be a great way to determine cache line size except + * 1) it only works on write-back memory (it throws an exception otherwise), + * and 2) because so many mac programs 'knew' the processor cache size was + * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new + * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep + * these programs happy. dcbzl work if 64 bit instructions are supported. + * If you know 64 bit instructions are supported, and that stack is + * write-back, you can use this code. + */ +#include "memory.h" + +/* clear the cache line that contains 'array' */ +static inline void +dcbzl(char *array) +{ + __asm__("dcbzl %0, %1" + : /*no result*/ + : "b%"(array), "r"(0) + : "memory"); +} + +#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) + +#define PPC_MAX_LINE_SIZE 256 +unsigned long +s_mpi_getProcessorLineSize() +{ + char testArray[2 * PPC_MAX_LINE_SIZE + 1]; + char *test; + int i; + + /* align the array on a maximum line size boundary, so we + * know we are starting to clear from the first address */ + test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); + /* set all the values to 1's */ + memset(test, 0xff, PPC_MAX_LINE_SIZE); + /* clear one cache block starting at 'test' */ + dcbzl(test); + + /* find the size of the cleared area, that's our block size */ + for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { + if (test[i - 1] == 0) { + return i; + } + } + return 0; +} + +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +/* + * put other processor and platform specific cache code here + * return the smallest cache line size in bytes on the processor + * (usually the L1 cache). If the OS has a call, this would be + * a greate place to put it. + * + * If there is no cache, return 0; + * + * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions + * below aren't compiled. + * + */ + +/* If no way to get the processor cache line size has been defined, assume + * it's 32 bytes (most common value, does not significantly impact performance) + */ +#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED +unsigned long +s_mpi_getProcessorLineSize() +{ + return 32; +} +#endif diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s new file mode 100644 index 0000000000..d493b4762f --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s @@ -0,0 +1,861 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 80 +manMap: + .quad .LC0 + .quad .LC1 + .quad .LC2 + .quad .LC3 + .quad .LC4 + .quad .LC5 + .quad .LC6 + .quad .LC7 + .quad .LC8 + .quad .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 16 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: +.LFB2: + movq %rdx, %r10 + pushq %rbx +.LCFI0: + movq %rcx, %r11 + movq %rdi, %rax +/APP + cpuid + +/NO_APP + movq %rax, (%rsi) + movq %rbx, (%r10) + popq %rbx + movq %rcx, (%r11) + movq %rdx, (%r8) + ret +.LFE2: + .size freebl_cpuid, .-freebl_cpuid + .align 16 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: +.LFB3: + leaq CacheMap(%rip), %r9 + movq %rdx, %r10 + movzbl 1(%r9,%rdi,2), %ecx + movzbl (%r9,%rdi,2), %r8d + testb %cl, %cl + je .L2 + cmpl $6, %r8d + sete %dl + cmpl $8, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L4 + movl $1, (%rsi) +.L9: + movzbl %cl, %eax + movq %rax, (%r10) + ret + .align 16 +.L4: + movl (%rsi), %r11d + cmpl $1, %r11d + jg .L11 +.L6: + cmpl $2, %r11d + jle .L2 + cmpl $12, %r8d + sete %dl + cmpl $14, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L2 + movzbq 1(%r9,%rdi,2), %rax + movl $3, (%rsi) + movq %rax, (%r10) + .align 16 +.L2: + rep ; ret + .align 16 +.L11: + cmpl $9, %r8d + sete %dl + cmpl $11, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L6 + movl $2, (%rsi) + jmp .L9 +.LFE3: + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 16 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: +.LFB4: + pushq %rbp +.LCFI1: + movq %rsp, %rbp +.LCFI2: + movq %rbx, -24(%rbp) +.LCFI3: + movq %rdi, %rbx + shrq $24, %rdi + movq %r12, -16(%rbp) +.LCFI4: + movq %r13, -8(%rbp) +.LCFI5: + andl $255, %edi + subq $24, %rsp +.LCFI6: + movq %rsi, %r13 + movq %rdx, %r12 + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $16, %rdi + andl $255, %edi + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $8, %rdi + andl $255, %ebx + andl $255, %edi + call getIntelCacheEntryLineSize + movq %r12, %rdx + movq %r13, %rsi + movq %rbx, %rdi + movq 8(%rsp), %r12 + movq (%rsp), %rbx + movq 16(%rsp), %r13 + leave + jmp getIntelCacheEntryLineSize +.LFE4: + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 16 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: +.LFB7: + pushq %rbp +.LCFI7: + xorl %edi, %edi + movq %rsp, %rbp +.LCFI8: + pushq %r15 +.LCFI9: + leaq -136(%rbp), %r8 + leaq -144(%rbp), %rcx + leaq -152(%rbp), %rdx + pushq %r14 +.LCFI10: + leaq -160(%rbp), %rsi + leaq -128(%rbp), %r14 + pushq %r13 +.LCFI11: + leaq manMap(%rip), %r13 + pushq %r12 +.LCFI12: + movl $9, %r12d + pushq %rbx +.LCFI13: + xorl %ebx, %ebx + subq $200, %rsp +.LCFI14: + call freebl_cpuid + movq -152(%rbp), %rax + movq -160(%rbp), %r15 + movb $0, -116(%rbp) + movl %eax, -128(%rbp) + movq -136(%rbp), %rax + movl %eax, -124(%rbp) + movq -144(%rbp), %rax + movl %eax, -120(%rbp) + .align 16 +.L18: + movslq %ebx,%rax + movq %r14, %rsi + movq (%r13,%rax,8), %rdi + call strcmp@PLT + testl %eax, %eax + cmove %ebx, %r12d + incl %ebx + cmpl $9, %ebx + jle .L18 + testl %r12d, %r12d + jne .L19 + xorl %eax, %eax + decl %r15d + movl $4, -204(%rbp) + movq $0, -200(%rbp) + jle .L21 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + xorl %ebx, %ebx + call freebl_cpuid + movq -192(%rbp), %rdi + movl %edi, %r12d + andl $15, %r12d + cmpl %r12d, %ebx + jl .L30 + jmp .L38 + .align 16 +.L25: + movq -184(%rbp), %rdi + testl $2147483648, %edi + je .L40 +.L26: + movq -176(%rbp), %rdi + testl $2147483648, %edi + je .L41 +.L27: + movq -168(%rbp), %rdi + testl $2147483648, %edi + je .L42 +.L28: + incl %ebx + cmpl %r12d, %ebx + je .L24 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + call freebl_cpuid +.L24: + cmpl %r12d, %ebx + jge .L38 + movq -192(%rbp), %rdi +.L30: + testl $2147483648, %edi + jne .L25 + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + andl $4294967040, %edi + call getIntelRegisterCacheLineSize + movq -184(%rbp), %rdi + testl $2147483648, %edi + jne .L26 +.L40: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -176(%rbp), %rdi + testl $2147483648, %edi + jne .L27 +.L41: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -168(%rbp), %rdi + testl $2147483648, %edi + jne .L28 +.L42: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + jmp .L28 +.L38: + movq -200(%rbp), %rax +.L21: + movq %rax, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L19: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483648, %edi + xorl %ebx, %ebx + call freebl_cpuid + movl $2147483652, %eax + cmpq %rax, -240(%rbp) + ja .L43 +.L32: + movq %rbx, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L43: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483653, %edi + call freebl_cpuid + movzbq -224(%rbp), %rbx + jmp .L32 +.LFE7: + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s new file mode 100644 index 0000000000..af17ebcb42 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s @@ -0,0 +1,902 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 40 +manMap: + .long .LC0 + .long .LC1 + .long .LC2 + .long .LC3 + .long .LC4 + .long .LC5 + .long .LC6 + .long .LC7 + .long .LC8 + .long .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 4 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: + pushl %ebp + pushl %edi + pushl %esi + subl $8, %esp + movl %edx, %ebp +/APP + pushl %ebx + xorl %ecx, %ecx + cpuid + mov %ebx,%esi + popl %ebx + +/NO_APP + movl %eax, (%ebp) + movl 24(%esp), %eax + movl %esi, (%eax) + movl 28(%esp), %eax + movl %ecx, (%eax) + movl 32(%esp), %eax + movl %edx, (%eax) + addl $8, %esp + popl %esi + popl %edi + popl %ebp + ret + .size freebl_cpuid, .-freebl_cpuid + .align 4 + .type changeFlag, @function +changeFlag: +/APP + pushfl + popl %edx + movl %edx,%ecx + xorl %eax,%edx + pushl %edx + popfl + pushfl + popl %edx + pushl %ecx + popfl + +/NO_APP + xorl %ecx, %edx + movl %edx, %eax + ret + .size changeFlag, .-changeFlag + .align 4 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: + pushl %edi + pushl %esi + pushl %ebx + call .L17 +.L17: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx + movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx + movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al + testb %al, %al + movl 16(%esp), %edi + je .L3 + cmpl $6, %ecx + je .L6 + cmpl $8, %ecx + je .L6 + movl (%edx), %esi + cmpl $1, %esi + jg .L15 +.L8: + cmpl $2, %esi + jle .L3 + cmpl $12, %ecx + je .L12 + cmpl $14, %ecx + je .L12 + .align 4 +.L3: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L6: + movzbl %al, %eax + movl $1, (%edx) + movl %eax, (%edi) +.L16: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L15: + cmpl $9, %ecx + je .L9 + cmpl $11, %ecx + jne .L8 +.L9: + movzbl %al, %eax + movl $2, (%edx) + movl %eax, (%edi) + jmp .L16 +.L12: + movzbl %al, %eax + movl $3, (%edx) + movl %eax, (%edi) + jmp .L16 + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 4 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ecx + movl 8(%ebp), %edi + movl %eax, %esi + movl %edx, -12(%ebp) + shrl $24, %eax + pushl %edi + call getIntelCacheEntryLineSize + movl %esi, %eax + pushl %edi + shrl $16, %eax + movl -12(%ebp), %edx + andl $255, %eax + call getIntelCacheEntryLineSize + pushl %edi + movl %esi, %edx + movzbl %dh, %eax + movl -12(%ebp), %edx + call getIntelCacheEntryLineSize + andl $255, %esi + movl %edi, 8(%ebp) + movl -12(%ebp), %edx + addl $12, %esp + leal -8(%ebp), %esp + movl %esi, %eax + popl %esi + popl %edi + leave + jmp getIntelCacheEntryLineSize + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 4 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $188, %esp + call .L52 +.L52: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx + movl $9, -168(%ebp) + movl $262144, %eax + call changeFlag + xorl %edx, %edx + testl %eax, %eax + jne .L50 +.L19: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %edx, %eax + popl %edi + leave + ret + .align 4 +.L50: + movl $2097152, %eax + call changeFlag + testl %eax, %eax + movl $32, %edx + je .L19 + leal -108(%ebp), %eax + pushl %eax + leal -112(%ebp), %eax + pushl %eax + leal -116(%ebp), %eax + pushl %eax + leal -120(%ebp), %edx + xorl %eax, %eax + call freebl_cpuid + movl -120(%ebp), %eax + movl %eax, -164(%ebp) + movl -116(%ebp), %eax + movl %eax, -104(%ebp) + movl -108(%ebp), %eax + movl %eax, -100(%ebp) + movl -112(%ebp), %eax + movl %eax, -96(%ebp) + movb $0, -92(%ebp) + xorl %esi, %esi + addl $12, %esp + leal -104(%ebp), %edi + .align 4 +.L28: + subl $8, %esp + pushl %edi + pushl manMap@GOTOFF(%ebx,%esi,4) + call strcmp@PLT + addl $16, %esp + testl %eax, %eax + jne .L26 + movl %esi, -168(%ebp) +.L26: + incl %esi + cmpl $9, %esi + jle .L28 + movl -168(%ebp), %eax + testl %eax, %eax + jne .L29 + xorl %eax, %eax + cmpl $1, -164(%ebp) + movl $4, -144(%ebp) + movl $0, -140(%ebp) + jle .L41 + leal -124(%ebp), %edx + movl %edx, -188(%ebp) + leal -128(%ebp), %eax + pushl %edx + movl %eax, -184(%ebp) + leal -132(%ebp), %edx + pushl %eax + movl %edx, -180(%ebp) + movl $2, %eax + pushl %edx + leal -136(%ebp), %edx + call freebl_cpuid + movl -136(%ebp), %eax + movl %eax, %edi + andl $15, %edi + xorl %esi, %esi + addl $12, %esp + leal -140(%ebp), %edx + cmpl %edi, %esi + movl %edx, -176(%ebp) + jl .L40 + jmp .L48 + .align 4 +.L49: + movl -136(%ebp), %eax +.L40: + testl %eax, %eax + js .L35 + xorb %al, %al + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L35: + movl -132(%ebp), %eax + testl %eax, %eax + js .L36 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L36: + movl -128(%ebp), %eax + testl %eax, %eax + js .L37 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L37: + movl -124(%ebp), %eax + testl %eax, %eax + js .L38 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L38: + incl %esi + cmpl %edi, %esi + je .L34 + pushl -188(%ebp) + pushl -184(%ebp) + pushl -180(%ebp) + leal -136(%ebp), %edx + movl $2, %eax + call freebl_cpuid + addl $12, %esp +.L34: + cmpl %edi, %esi + jl .L49 +.L48: + movl -140(%ebp), %eax +.L41: + testl %eax, %eax + jne .L44 + movb $32, %al +.L44: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %eax, %edx + movl %edx, %eax + popl %edi + leave + ret +.L29: + leal -148(%ebp), %eax + movl %eax, -192(%ebp) + movl $0, -172(%ebp) + leal -152(%ebp), %edi + pushl %eax + pushl %edi + leal -156(%ebp), %esi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483648, %eax + call freebl_cpuid + addl $12, %esp + cmpl $-2147483644, -160(%ebp) + ja .L51 +.L42: + movl -172(%ebp), %eax + jmp .L41 +.L51: + pushl -192(%ebp) + pushl %edi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483643, %eax + call freebl_cpuid + movzbl -152(%ebp), %edx + addl $12, %esp + movl %edx, -172(%ebp) + jmp .L42 + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h new file mode 100644 index 0000000000..0cc868a14b --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-config.h @@ -0,0 +1,56 @@ +/* Default configuration for MPI library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_CONFIG_H_ +#define MPI_CONFIG_H_ + +/* + For boolean options, + 0 = no + 1 = yes + + Other options are documented individually. + + */ + +#ifndef MP_IOFUNC +#define MP_IOFUNC 0 /* include mp_print() ? */ +#endif + +#ifndef MP_MODARITH +#define MP_MODARITH 1 /* include modular arithmetic ? */ +#endif + +#ifndef MP_LOGTAB +#define MP_LOGTAB 1 /* use table of logs instead of log()? */ +#endif + +#ifndef MP_ARGCHK +/* + 0 = no parameter checks + 1 = runtime checks, continue execution and return an error to caller + 2 = assertions; dump core on parameter errors + */ +#ifdef DEBUG +#define MP_ARGCHK 2 /* how to check input arguments */ +#else +#define MP_ARGCHK 1 /* how to check input arguments */ +#endif +#endif + +#ifndef MP_DEBUG +#define MP_DEBUG 0 /* print diagnostic output? */ +#endif + +#ifndef MP_DEFPREC +#define MP_DEFPREC 64 /* default precision, in digits */ +#endif + +#ifndef MP_SQUARE +#define MP_SQUARE 1 /* use separate squaring code? */ +#endif + +#endif /* ifndef MPI_CONFIG_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h new file mode 100644 index 0000000000..b4333fb6b4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-priv.h @@ -0,0 +1,246 @@ +/* + * mpi-priv.h - Private header file for MPI + * Arbitrary precision integer arithmetic library + * + * NOTE WELL: the content of this header file is NOT part of the "public" + * API for the MPI library, and may change at any time. + * Application programs that use libmpi should NOT include this header file. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef _MPI_PRIV_H_ +#define _MPI_PRIV_H_ 1 + +#include "mpi.h" +#include +#include +#include + +#if MP_DEBUG +#include + +#define DIAG(T, V) \ + { \ + fprintf(stderr, T); \ + mp_print(V, stderr); \ + fputc('\n', stderr); \ + } +#else +#define DIAG(T, V) +#endif + +/* If we aren't using a wired-in logarithm table, we need to include + the math library to get the log() function + */ + +/* {{{ s_logv_2[] - log table for 2 in various bases */ + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ + +extern const float s_logv_2[]; +#define LOG_V_2(R) s_logv_2[(R)] + +#else + +/* + If MP_LOGTAB is not defined, use the math library to compute the + logarithms on the fly. Otherwise, use the table. + Pick which works best for your system. + */ + +#include +#define LOG_V_2(R) (log(2.0) / log(R)) + +#endif /* if MP_LOGTAB */ + +/* }}} */ + +/* {{{ Digit arithmetic macros */ + +/* + When adding and multiplying digits, the results can be larger than + can be contained in an mp_digit. Thus, an mp_word is used. These + macros mask off the upper and lower digits of the mp_word (the + mp_word may be more than 2 mp_digits wide, but we only concern + ourselves with the low-order 2 mp_digits) + */ + +#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT) +#define ACCUM(W) (mp_digit)(W) + +#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b)) +#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b)) + +/* }}} */ + +/* {{{ Comparison constants */ + +#define MP_LT -1 +#define MP_EQ 0 +#define MP_GT 1 + +/* }}} */ + +/* {{{ private function declarations */ + +void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ +void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ +void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ +void s_mp_free(void *ptr); /* general free function */ + +mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ +mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ + +void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ + +void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ + +mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ +void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ +mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */ +void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ +void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ +void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ +mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ +mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd); +/* normalize for division */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ +mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ +mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); +/* unsigned digit divide */ +mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu); +/* Barrett reduction */ +mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */ +mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */ +mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset); +/* a += b * RADIX^offset */ +mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */ +#if MP_SQUARE +mp_err s_mp_sqr(mp_int *a); /* magnitude square */ +#else +#define s_mp_sqr(a) s_mp_mul(a, a) +#endif +mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */ +mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ +int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */ +int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */ +int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */ +int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ + +int s_mp_tovalue(char ch, int r); /* convert ch to value */ +char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */ +int s_mp_outlen(int bits, int r); /* output length in bytes */ +mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */ +mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c); +mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c); +mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c); + +#ifdef NSS_USE_COMBA +PR_STATIC_ASSERT(sizeof(mp_digit) == 8); +#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1))) + +void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C); + +void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_16(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + +#endif /* end NSS_USE_COMBA */ + +/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */ +#if defined(__OS2__) && defined(__IBMC__) +#define MPI_ASM_DECL __cdecl +#else +#define MPI_ASM_DECL +#endif + +#ifdef MPI_AMD64 + +mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit); +mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit); + +/* c = a * b */ +#define s_mpv_mul_d(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b) + +/* c += a * b */ +#define s_mpv_mul_d_add(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b) + +#else + +void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); +void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); + +#endif + +void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, + mp_size a_len, mp_digit b, + mp_digit *c); +void MPI_ASM_DECL s_mpv_mul_d_add_propCT(const mp_digit *a, + mp_size a_len, mp_digit b, + mp_digit *c, mp_size c_len); +void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, + mp_size a_len, + mp_digit *sqrs); + +mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, + mp_digit divisor, mp_digit *quot, mp_digit *rem); + +/* c += a * b * (MP_RADIX ** offset); */ +/* Callers of this macro should be aware that the return type might vary; + * it should be treated as a void function. */ +#define s_mp_mul_d_add_offset(a, b, c, off) \ + s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off) + +typedef struct { + mp_int N; /* modulus N */ + mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */ +} mp_mont_modulus; + +mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm); +mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm); + +/* + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * see mpcpucache.c for the implementation. + */ +unsigned long s_mpi_getProcessorLineSize(); + +/* }}} */ +#endif diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c new file mode 100644 index 0000000000..7749dc710f --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -0,0 +1,5241 @@ +/* + * mpi.c + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mplogic.h" + +#include + +#if defined(__arm__) && \ + ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__)) +/* 16-bit thumb or ARM v3 doesn't work inlined assember version */ +#undef MP_ASSEMBLY_MULTIPLY +#undef MP_ASSEMBLY_SQUARE +#endif + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ +#include "logtab.h" +#endif + +#ifdef CT_VERIF +#include +#endif + +/* {{{ Constant strings */ + +/* Constant strings returned by mp_strerror() */ +static const char *mp_err_string[] = { + "unknown result code", /* say what? */ + "boolean true", /* MP_OKAY, MP_YES */ + "boolean false", /* MP_NO */ + "out of memory", /* MP_MEM */ + "argument out of range", /* MP_RANGE */ + "invalid input parameter", /* MP_BADARG */ + "result is undefined" /* MP_UNDEF */ +}; + +/* Value to digit maps for radix conversion */ + +/* s_dmap_1 - standard digits and letters */ +static const char *s_dmap_1 = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +/* }}} */ + +/* {{{ Default precision manipulation */ + +/* Default precision for newly created mp_int's */ +static mp_size s_mp_defprec = MP_DEFPREC; + +mp_size +mp_get_prec(void) +{ + return s_mp_defprec; + +} /* end mp_get_prec() */ + +void +mp_set_prec(mp_size prec) +{ + if (prec == 0) + s_mp_defprec = MP_DEFPREC; + else + s_mp_defprec = prec; + +} /* end mp_set_prec() */ + +/* }}} */ + +#ifdef CT_VERIF +void +mp_taint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} + +void +mp_untaint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} +#endif + +/*------------------------------------------------------------------------*/ +/* {{{ mp_init(mp) */ + +/* + mp_init(mp) + + Initialize a new zero-valued mp_int. Returns MP_OKAY if successful, + MP_MEM if memory could not be allocated for the structure. + */ + +mp_err +mp_init(mp_int *mp) +{ + return mp_init_size(mp, s_mp_defprec); + +} /* end mp_init() */ + +/* }}} */ + +/* {{{ mp_init_size(mp, prec) */ + +/* + mp_init_size(mp, prec) + + Initialize a new zero-valued mp_int with at least the given + precision; returns MP_OKAY if successful, or MP_MEM if memory could + not be allocated for the structure. + */ + +mp_err +mp_init_size(mp_int *mp, mp_size prec) +{ + ARGCHK(mp != NULL && prec > 0, MP_BADARG); + + prec = MP_ROUNDUP(prec, s_mp_defprec); + if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL) + return MP_MEM; + + SIGN(mp) = ZPOS; + USED(mp) = 1; + ALLOC(mp) = prec; + + return MP_OKAY; + +} /* end mp_init_size() */ + +/* }}} */ + +/* {{{ mp_init_copy(mp, from) */ + +/* + mp_init_copy(mp, from) + + Initialize mp as an exact copy of from. Returns MP_OKAY if + successful, MP_MEM if memory could not be allocated for the new + structure. + */ + +mp_err +mp_init_copy(mp_int *mp, const mp_int *from) +{ + ARGCHK(mp != NULL && from != NULL, MP_BADARG); + + if (mp == from) + return MP_OKAY; + + if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), DIGITS(mp), USED(from)); + USED(mp) = USED(from); + ALLOC(mp) = ALLOC(from); + SIGN(mp) = SIGN(from); + + return MP_OKAY; + +} /* end mp_init_copy() */ + +/* }}} */ + +/* {{{ mp_copy(from, to) */ + +/* + mp_copy(from, to) + + Copies the mp_int 'from' to the mp_int 'to'. It is presumed that + 'to' has already been initialized (if not, use mp_init_copy() + instead). If 'from' and 'to' are identical, nothing happens. + */ + +mp_err +mp_copy(const mp_int *from, mp_int *to) +{ + ARGCHK(from != NULL && to != NULL, MP_BADARG); + + if (from == to) + return MP_OKAY; + + { /* copy */ + mp_digit *tmp; + + /* + If the allocated buffer in 'to' already has enough space to hold + all the used digits of 'from', we'll re-use it to avoid hitting + the memory allocater more than necessary; otherwise, we'd have + to grow anyway, so we just allocate a hunk and make the copy as + usual + */ + if (ALLOC(to) >= USED(from)) { + s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from)); + s_mp_copy(DIGITS(from), DIGITS(to), USED(from)); + + } else { + if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), tmp, USED(from)); + + if (DIGITS(to) != NULL) { + s_mp_setz(DIGITS(to), ALLOC(to)); + s_mp_free(DIGITS(to)); + } + + DIGITS(to) = tmp; + ALLOC(to) = ALLOC(from); + } + + /* Copy the precision and sign from the original */ + USED(to) = USED(from); + SIGN(to) = SIGN(from); + } /* end copy */ + + return MP_OKAY; + +} /* end mp_copy() */ + +/* }}} */ + +/* {{{ mp_exch(mp1, mp2) */ + +/* + mp_exch(mp1, mp2) + + Exchange mp1 and mp2 without allocating any intermediate memory + (well, unless you count the stack space needed for this call and the + locals it creates...). This cannot fail. + */ + +void +mp_exch(mp_int *mp1, mp_int *mp2) +{ +#if MP_ARGCHK == 2 + assert(mp1 != NULL && mp2 != NULL); +#else + if (mp1 == NULL || mp2 == NULL) + return; +#endif + + s_mp_exch(mp1, mp2); + +} /* end mp_exch() */ + +/* }}} */ + +/* {{{ mp_clear(mp) */ + +/* + mp_clear(mp) + + Release the storage used by an mp_int, and void its fields so that + if someone calls mp_clear() again for the same int later, we won't + get tollchocked. + */ + +void +mp_clear(mp_int *mp) +{ + if (mp == NULL) + return; + + if (DIGITS(mp) != NULL) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = NULL; + } + + USED(mp) = 0; + ALLOC(mp) = 0; + +} /* end mp_clear() */ + +/* }}} */ + +/* {{{ mp_zero(mp) */ + +/* + mp_zero(mp) + + Set mp to zero. Does not change the allocated size of the structure, + and therefore cannot fail (except on a bad argument, which we ignore) + */ +void +mp_zero(mp_int *mp) +{ + if (mp == NULL) + return; + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + +} /* end mp_zero() */ + +/* }}} */ + +/* {{{ mp_set(mp, d) */ + +void +mp_set(mp_int *mp, mp_digit d) +{ + if (mp == NULL) + return; + + mp_zero(mp); + DIGIT(mp, 0) = d; + +} /* end mp_set() */ + +/* }}} */ + +/* {{{ mp_set_int(mp, z) */ + +mp_err +mp_set_int(mp_int *mp, long z) +{ + unsigned long v = labs(z); + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + /* https://bugzilla.mozilla.org/show_bug.cgi?id=1509432 */ + if ((res = mp_set_ulong(mp, v)) != MP_OKAY) { /* avoids duplicated code */ + return res; + } + + if (z < 0) { + SIGN(mp) = NEG; + } + + return MP_OKAY; +} /* end mp_set_int() */ + +/* }}} */ + +/* {{{ mp_set_ulong(mp, z) */ + +mp_err +mp_set_ulong(mp_int *mp, unsigned long z) +{ + int ix; + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + mp_zero(mp); + if (z == 0) + return MP_OKAY; /* shortcut for zero */ + + if (sizeof z <= sizeof(mp_digit)) { + DIGIT(mp, 0) = z; + } else { + for (ix = sizeof(long) - 1; ix >= 0; ix--) { + if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY) + return res; + + res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX)); + if (res != MP_OKAY) + return res; + } + } + return MP_OKAY; +} /* end mp_set_ulong() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Digit arithmetic */ + +/* {{{ mp_add_d(a, d, b) */ + +/* + mp_add_d(a, d, b) + + Compute the sum b = a + d, for a single digit d. Respects the sign of + its primary addend (single digits are unsigned anyway). + */ + +mp_err +mp_add_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == ZPOS) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_add_d() */ + +/* }}} */ + +/* {{{ mp_sub_d(a, d, b) */ + +/* + mp_sub_d(a, d, b) + + Compute the difference b = a - d, for a single digit d. Respects the + sign of its subtrahend (single digits are unsigned anyway). + */ + +mp_err +mp_sub_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == NEG) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + SIGN(&tmp) = NEG; + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sub_d() */ + +/* }}} */ + +/* {{{ mp_mul_d(a, d, b) */ + +/* + mp_mul_d(a, d, b) + + Compute the product b = a * d, for a single digit d. Respects the sign + of its multiplicand (single digits are unsigned anyway) + */ + +mp_err +mp_mul_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if (d == 0) { + mp_zero(b); + return MP_OKAY; + } + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + res = s_mp_mul_d(b, d); + + return res; + +} /* end mp_mul_d() */ + +/* }}} */ + +/* {{{ mp_mul_2(a, c) */ + +mp_err +mp_mul_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + return s_mp_mul_2(c); + +} /* end mp_mul_2() */ + +/* }}} */ + +/* {{{ mp_div_d(a, d, q, r) */ + +/* + mp_div_d(a, d, q, r) + + Compute the quotient q = a / d and remainder r = a mod d, for a + single digit d. Respects the sign of its divisor (single digits are + unsigned anyway). + */ + +mp_err +mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r) +{ + mp_err res; + mp_int qp; + mp_digit rem = 0; + int pow; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_RANGE; + + /* Shortcut for powers of two ... */ + if ((pow = s_mp_ispow2d(d)) >= 0) { + mp_digit mask; + + mask = ((mp_digit)1 << pow) - 1; + rem = DIGIT(a, 0) & mask; + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) { + return res; + } + s_mp_div_2d(q, pow); + } + + if (r) + *r = rem; + + return MP_OKAY; + } + + if ((res = mp_init_copy(&qp, a)) != MP_OKAY) + return res; + + res = s_mp_div_d(&qp, d, &rem); + + if (s_mp_cmp_d(&qp, 0) == 0) + SIGN(q) = ZPOS; + + if (r) { + *r = rem; + } + + if (q) + s_mp_exch(&qp, q); + + mp_clear(&qp); + return res; + +} /* end mp_div_d() */ + +/* }}} */ + +/* {{{ mp_div_2(a, c) */ + +/* + mp_div_2(a, c) + + Compute c = a / 2, disregarding the remainder. + */ + +mp_err +mp_div_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + s_mp_div_2(c); + + return MP_OKAY; + +} /* end mp_div_2() */ + +/* }}} */ + +/* {{{ mp_expt_d(a, d, b) */ + +mp_err +mp_expt_d(const mp_int *a, mp_digit d, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + DIGIT(&s, 0) = 1; + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Full arithmetic */ + +/* {{{ mp_abs(a, b) */ + +/* + mp_abs(a, b) + + Compute b = |a|. 'a' and 'b' may be identical. + */ + +mp_err +mp_abs(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + SIGN(b) = ZPOS; + + return MP_OKAY; + +} /* end mp_abs() */ + +/* }}} */ + +/* {{{ mp_neg(a, b) */ + +/* + mp_neg(a, b) + + Compute b = -a. 'a' and 'b' may be identical. + */ + +mp_err +mp_neg(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if (s_mp_cmp_d(b, 0) == MP_EQ) + SIGN(b) = ZPOS; + else + SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG; + + return MP_OKAY; + +} /* end mp_neg() */ + +/* }}} */ + +/* {{{ mp_add(a, b, c) */ + +/* + mp_add(a, b, c) + + Compute c = a + b. All parameters may be identical. + */ + +mp_err +mp_add(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */ + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { /* different sign: |a| < |b| */ + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + +CLEANUP: + return res; + +} /* end mp_add() */ + +/* }}} */ + +/* {{{ mp_sub(a, b, c) */ + +/* + mp_sub(a, b, c) + + Compute c = a - b. All parameters may be identical. + */ + +mp_err +mp_sub(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + int magDiff; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == b) { + mp_zero(c); + return MP_OKAY; + } + + if (MP_SIGN(a) != MP_SIGN(b)) { + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (!(magDiff = s_mp_cmp(a, b))) { + mp_zero(c); + res = MP_OKAY; + } else if (magDiff > 0) { + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + MP_SIGN(c) = !MP_SIGN(a); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + MP_SIGN(c) = MP_ZPOS; + +CLEANUP: + return res; + +} /* end mp_sub() */ + +/* }}} */ + +/* {{{ s_mp_mulg(a, b, c) */ + +/* + s_mp_mulg(a, b, c) + + Compute c = a * b. All parameters may be identical. if constantTime is set, + then the operations are done in constant time. The original is mostly + constant time as long as s_mpv_mul_d_add() is constant time. This is true + of the x86 assembler, as well as the current c code. + */ +mp_err +s_mp_mulg(const mp_int *a, const mp_int *b, mp_int *c, int constantTime) +{ + mp_digit *pb; + mp_int tmp; + mp_err res; + mp_size ib; + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + if ((res = mp_init_copy(&tmp, b)) != MP_OKAY) + return res; + b = &tmp; + } else { + MP_DIGITS(&tmp) = 0; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY) + goto CLEANUP; + +#ifdef NSS_USE_COMBA + /* comba isn't constant time because it clamps! If we cared + * (we needed a constant time version of multiply that was 'faster' + * we could easily pass constantTime down to the comba code and + * get it to skip the clamp... but here are assembler versions + * which add comba to platforms that can't compile the normal + * comba's imbedded assembler which would also need to change, so + * for now we just skip comba when we are running constant time. */ + if (!constantTime && (MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { + if (MP_USED(a) == 4) { + s_mp_mul_comba_4(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_mul_comba_8(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_mul_comba_16(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_mul_comba_32(a, b, c); + goto CLEANUP; + } + } +#endif + + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + useda = MP_USED(a); + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (constantTime || b_i) + s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + useda) = b_i; + } + + if (!constantTime) { + s_mp_clamp(c); + } + + if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + else + SIGN(c) = NEG; + +CLEANUP: + mp_clear(&tmp); + return res; +} /* end smp_mulg() */ + +/* }}} */ + +/* {{{ mp_mul(a, b, c) */ + +/* + mp_mul(a, b, c) + + Compute c = a * b. All parameters may be identical. + */ + +mp_err +mp_mul(const mp_int *a, const mp_int *b, mp_int *c) +{ + return s_mp_mulg(a, b, c, 0); +} /* end mp_mul() */ + +/* }}} */ + +/* {{{ mp_mulCT(a, b, c) */ + +/* + mp_mulCT(a, b, c) + + Compute c = a * b. In constant time. Parameters may not be identical. + NOTE: a and b may be modified. + */ + +mp_err +mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize) +{ + mp_err res; + + /* make the multiply values fixed length so multiply + * doesn't leak the length. at this point all the + * values are blinded, but once we finish we want the + * output size to be hidden (so no clamping the out put) */ + MP_CHECKOK(s_mp_pad(a, setSize)); + MP_CHECKOK(s_mp_pad(b, setSize)); + MP_CHECKOK(s_mp_pad(c, 2 * setSize)); + MP_CHECKOK(s_mp_mulg(a, b, c, 1)); +CLEANUP: + return res; +} /* end mp_mulCT() */ + +/* }}} */ + +/* {{{ mp_sqr(a, sqr) */ + +#if MP_SQUARE +/* + Computes the square of a. This can be done more + efficiently than a general multiplication, because many of the + computation steps are redundant when squaring. The inner product + step is a bit more complicated, but we save a fair number of + iterations of the multiplication loop. + */ + +/* sqr = a^2; Caller provides both a and tmp; */ +mp_err +mp_sqr(const mp_int *a, mp_int *sqr) +{ + mp_digit *pa; + mp_digit d; + mp_err res; + mp_size ix; + mp_int tmp; + int count; + + ARGCHK(a != NULL && sqr != NULL, MP_BADARG); + + if (a == sqr) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + a = &tmp; + } else { + DIGITS(&tmp) = 0; + res = MP_OKAY; + } + + ix = 2 * MP_USED(a); + if (ix > MP_ALLOC(sqr)) { + MP_USED(sqr) = 1; + MP_CHECKOK(s_mp_grow(sqr, ix)); + } + MP_USED(sqr) = ix; + MP_DIGIT(sqr, 0) = 0; + +#ifdef NSS_USE_COMBA + if (IS_POWER_OF_2(MP_USED(a))) { + if (MP_USED(a) == 4) { + s_mp_sqr_comba_4(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_sqr_comba_8(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_sqr_comba_16(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_sqr_comba_32(a, sqr); + goto CLEANUP; + } + } +#endif + + pa = MP_DIGITS(a); + count = MP_USED(a) - 1; + if (count > 0) { + d = *pa++; + s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1); + for (ix = 3; --count > 0; ix += 2) { + d = *pa++; + s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix); + } /* for(ix ...) */ + MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */ + + /* now sqr *= 2 */ + s_mp_mul_2(sqr); + } else { + MP_DIGIT(sqr, 1) = 0; + } + + /* now add the squares of the digits of a to sqr. */ + s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr)); + + SIGN(sqr) = ZPOS; + s_mp_clamp(sqr); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sqr() */ +#endif + +/* }}} */ + +/* {{{ mp_div(a, b, q, r) */ + +/* + mp_div(a, b, q, r) + + Compute q = a / b and r = a mod b. Input parameters may be re-used + as output parameters. If q or r is NULL, that portion of the + computation will be discarded (although it will still be computed) + */ +mp_err +mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) +{ + mp_err res; + mp_int *pQ, *pR; + mp_int qtmp, rtmp, btmp; + int cmp; + mp_sign signA; + mp_sign signB; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + signA = MP_SIGN(a); + signB = MP_SIGN(b); + + if (mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + + DIGITS(&qtmp) = 0; + DIGITS(&rtmp) = 0; + DIGITS(&btmp) = 0; + + /* Set up some temporaries... */ + if (!r || r == a || r == b) { + MP_CHECKOK(mp_init_copy(&rtmp, a)); + pR = &rtmp; + } else { + MP_CHECKOK(mp_copy(a, r)); + pR = r; + } + + if (!q || q == a || q == b) { + MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a))); + pQ = &qtmp; + } else { + MP_CHECKOK(s_mp_pad(q, MP_USED(a))); + pQ = q; + mp_zero(pQ); + } + + /* + If |a| <= |b|, we can compute the solution without division; + otherwise, we actually do the work required. + */ + if ((cmp = s_mp_cmp(a, b)) <= 0) { + if (cmp) { + /* r was set to a above. */ + mp_zero(pQ); + } else { + mp_set(pQ, 1); + mp_zero(pR); + } + } else { + MP_CHECKOK(mp_init_copy(&btmp, b)); + MP_CHECKOK(s_mp_div(pR, &btmp, pQ)); + } + + /* Compute the signs for the output */ + MP_SIGN(pR) = signA; /* Sr = Sa */ + /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */ + MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG; + + if (s_mp_cmp_d(pQ, 0) == MP_EQ) + SIGN(pQ) = ZPOS; + if (s_mp_cmp_d(pR, 0) == MP_EQ) + SIGN(pR) = ZPOS; + + /* Copy output, if it is needed */ + if (q && q != pQ) + s_mp_exch(pQ, q); + + if (r && r != pR) + s_mp_exch(pR, r); + +CLEANUP: + mp_clear(&btmp); + mp_clear(&rtmp); + mp_clear(&qtmp); + + return res; + +} /* end mp_div() */ + +/* }}} */ + +/* {{{ mp_div_2d(a, d, q, r) */ + +mp_err +mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r) +{ + mp_err res; + + ARGCHK(a != NULL, MP_BADARG); + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) + return res; + } + if (r) { + if ((res = mp_copy(a, r)) != MP_OKAY) + return res; + } + if (q) { + s_mp_div_2d(q, d); + } + if (r) { + s_mp_mod_2d(r, d); + } + + return MP_OKAY; + +} /* end mp_div_2d() */ + +/* }}} */ + +/* {{{ mp_expt(a, b, c) */ + +/* + mp_expt(a, b, c) + + Compute c = a ** b, that is, raise a to the b power. Uses a + standard iterative square-and-multiply technique. + */ + +mp_err +mp_expt(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int s, x; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + + mp_set(&s, 1); + + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + /* Loop over low-order digits in ascending order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over bits of each non-maximal digit */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Consider now the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + if (mp_iseven(b)) + SIGN(&s) = SIGN(a); + + res = mp_copy(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt() */ + +/* }}} */ + +/* {{{ mp_2expt(a, k) */ + +/* Compute a = 2^k */ + +mp_err +mp_2expt(mp_int *a, mp_digit k) +{ + ARGCHK(a != NULL, MP_BADARG); + + return s_mp_2expt(a, k); + +} /* end mp_2expt() */ + +/* }}} */ + +/* {{{ mp_mod(a, m, c) */ + +/* + mp_mod(a, m, c) + + Compute c = a (mod m). Result will always be 0 <= c < m. + */ + +mp_err +mp_mod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + int mag; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (SIGN(m) == NEG) + return MP_RANGE; + + /* + If |a| > m, we need to divide to get the remainder and take the + absolute value. + + If |a| < m, we don't need to do any division, just copy and adjust + the sign (if a is negative). + + If |a| == m, we can simply set the result to zero. + + This order is intended to minimize the average path length of the + comparison chain on common workloads -- the most frequent cases are + that |a| != m, so we do those first. + */ + if ((mag = s_mp_cmp(a, m)) > 0) { + if ((res = mp_div(a, m, NULL, c)) != MP_OKAY) + return res; + + if (SIGN(c) == NEG) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else if (mag < 0) { + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if (mp_cmp_z(a) < 0) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else { + mp_zero(c); + } + + return MP_OKAY; + +} /* end mp_mod() */ + +/* }}} */ + +/* {{{ s_mp_subCT_d(a, b, borrow, c) */ + +/* + s_mp_subCT_d(a, b, borrow, c) + + Compute c = (a -b) - subtract in constant time. returns borrow + */ +mp_digit +s_mp_subCT_d(mp_digit a, mp_digit b, mp_digit borrow, mp_digit *ret) +{ + *ret = a - b - borrow; + return MP_CT_LTU(a, *ret) | (MP_CT_EQ(a, *ret) & borrow); +} /* s_mp_subCT_d() */ + +/* }}} */ + +/* {{{ mp_subCT(a, b, ret, borrow) */ + +/* return ret= a - b and borrow in borrow. done in constant time. + * b could be modified. + */ +mp_err +mp_subCT(const mp_int *a, mp_int *b, mp_int *ret, mp_digit *borrow) +{ + mp_size used_a = MP_USED(a); + mp_size i; + mp_err res; + + MP_CHECKOK(s_mp_pad(b, used_a)); + MP_CHECKOK(s_mp_pad(ret, used_a)); + *borrow = 0; + for (i = 0; i < used_a; i++) { + *borrow = s_mp_subCT_d(MP_DIGIT(a, i), MP_DIGIT(b, i), *borrow, + &MP_DIGIT(ret, i)); + } + + res = MP_OKAY; +CLEANUP: + return res; +} /* end mp_subCT() */ + +/* }}} */ + +/* {{{ mp_selectCT(cond, a, b, ret) */ + +/* + * return ret= cond ? a : b; cond should be either 0 or 1 + */ +mp_err +mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret) +{ + mp_size used_a = MP_USED(a); + mp_err res; + mp_size i; + + cond *= MP_DIGIT_MAX; + + /* we currently require these to be equal on input, + * we could use pad to extend one of them, but that might + * leak data as it wouldn't be constant time */ + if (used_a != MP_USED(b)) { + return MP_BADARG; + } + + MP_CHECKOK(s_mp_pad(ret, used_a)); + for (i = 0; i < used_a; i++) { + MP_DIGIT(ret, i) = MP_CT_SEL_DIGIT(cond, MP_DIGIT(a, i), MP_DIGIT(b, i)); + } + res = MP_OKAY; +CLEANUP: + return res; +} /* end mp_selectCT() */ + +/* {{{ mp_reduceCT(a, m, c) */ + +/* + mp_reduceCT(a, m, c) + + Compute c = aR^-1 (mod m) in constant time. + input should be in montgomery form. If input is the + result of a montgomery multiply then out put will be + in mongomery form. + Result will be reduced to MP_USED(m), but not be + clamped. + */ + +mp_err +mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *c) +{ + mp_size used_m = MP_USED(m); + mp_size used_c = used_m * 2 + 1; + mp_digit *m_digits, *c_digits; + mp_size i; + mp_digit borrow, carry; + mp_err res; + mp_int sub; + + MP_DIGITS(&sub) = 0; + MP_CHECKOK(mp_init_size(&sub, used_m)); + + if (a != c) { + MP_CHECKOK(mp_copy(a, c)); + } + MP_CHECKOK(s_mp_pad(c, used_c)); + m_digits = MP_DIGITS(m); + c_digits = MP_DIGITS(c); + for (i = 0; i < used_m; i++) { + mp_digit m_i = MP_DIGIT(c, i) * n0i; + s_mpv_mul_d_add_propCT(m_digits, used_m, m_i, c_digits++, used_c--); + } + s_mp_rshd(c, used_m); + /* MP_USED(c) should be used_m+1 with the high word being any carry + * from the previous multiply, save that carry and drop the high + * word for the substraction below */ + carry = MP_DIGIT(c, used_m); + MP_DIGIT(c, used_m) = 0; + MP_USED(c) = used_m; + /* mp_subCT wants c and m to be the same size, we've already + * guarrenteed that in the previous statement, so mp_subCT won't actually + * modify m, so it's safe to recast */ + MP_CHECKOK(mp_subCT(c, (mp_int *)m, &sub, &borrow)); + + /* we return c-m if c >= m no borrow or there was a borrow and a carry */ + MP_CHECKOK(mp_selectCT(borrow ^ carry, c, &sub, c)); + res = MP_OKAY; +CLEANUP: + mp_clear(&sub); + return res; +} /* end mp_reduceCT() */ + +/* }}} */ + +/* {{{ mp_mod_d(a, d, c) */ + +/* + mp_mod_d(a, d, c) + + Compute c = a (mod d). Result will always be 0 <= c < d + */ +mp_err +mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if (s_mp_cmp_d(a, d) > 0) { + if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY) + return res; + + } else { + if (SIGN(a) == NEG) + rem = d - DIGIT(a, 0); + else + rem = DIGIT(a, 0); + } + + if (c) + *c = rem; + + return MP_OKAY; + +} /* end mp_mod_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Modular arithmetic */ + +#if MP_MODARITH +/* {{{ mp_addmod(a, b, m, c) */ + +/* + mp_addmod(a, b, m, c) + + Compute c = (a + b) mod m + */ + +mp_err +mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_add(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_submod(a, b, m, c) */ + +/* + mp_submod(a, b, m, c) + + Compute c = (a - b) mod m + */ + +mp_err +mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sub(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_mulmod(a, b, m, c) */ + +/* + mp_mulmod(a, b, m, c) + + Compute c = (a * b) mod m + */ + +mp_err +mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_mul(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_mulmontmodCT(a, b, m, c) */ + +/* + mp_mulmontmodCT(a, b, m, c) + + Compute c = (a * b) mod m in constant time wrt a and b. either a or b + should be in montgomery form and the output is native. If both a and b + are in montgomery form, then the output will also be in montgomery form + and can be recovered with an mp_reduceCT call. + NOTE: a and b may be modified. + */ + +mp_err +mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, + mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_mulCT(a, b, c, MP_USED(m))) != MP_OKAY) + return res; + + if ((res = mp_reduceCT(c, m, n0i, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_sqrmod(a, m, c) */ + +#if MP_SQUARE +mp_err +mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sqr(a, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} /* end mp_sqrmod() */ +#endif + +/* }}} */ + +/* {{{ s_mp_exptmod(a, b, m, c) */ + +/* + s_mp_exptmod(a, b, m, c) + + Compute c = (a ** b) mod m. Uses a standard square-and-multiply + method with modular reductions at each step. (This is basically the + same code as mp_expt(), except for the addition of the reductions) + + The modular reductions are done using Barrett's algorithm (see + s_mp_reduce() below for details) + */ + +mp_err +s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_int s, x, mu; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL && m != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto X; + if ((res = mp_init(&mu)) != MP_OKAY) + goto MU; + + mp_set(&s, 1); + + /* mu = b^2k / m */ + if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY) + goto CLEANUP; + if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY) + goto CLEANUP; + + /* Loop over digits of b in ascending order, except highest order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over the bits of the lower-order digits */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Now do the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&mu); +MU: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end s_mp_exptmod() */ + +/* }}} */ + +/* {{{ mp_exptmod_d(a, d, m, c) */ + +mp_err +mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL && m != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + mp_set(&s, 1); + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY || + (res = mp_mod(&s, m, &s)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_exptmod_d() */ + +/* }}} */ +#endif /* if MP_MODARITH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Comparison functions */ + +/* {{{ mp_cmp_z(a) */ + +/* + mp_cmp_z(a) + + Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0. + */ + +int +mp_cmp_z(const mp_int *a) +{ + ARGMPCHK(a != NULL); + + if (SIGN(a) == NEG) + return MP_LT; + else if (USED(a) == 1 && DIGIT(a, 0) == 0) + return MP_EQ; + else + return MP_GT; + +} /* end mp_cmp_z() */ + +/* }}} */ + +/* {{{ mp_cmp_d(a, d) */ + +/* + mp_cmp_d(a, d) + + Compare a <=> d. Returns <0 if a0 if a>d + */ + +int +mp_cmp_d(const mp_int *a, mp_digit d) +{ + ARGCHK(a != NULL, MP_EQ); + + if (SIGN(a) == NEG) + return MP_LT; + + return s_mp_cmp_d(a, d); + +} /* end mp_cmp_d() */ + +/* }}} */ + +/* {{{ mp_cmp(a, b) */ + +int +mp_cmp(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + if (SIGN(a) == SIGN(b)) { + int mag; + + if ((mag = s_mp_cmp(a, b)) == MP_EQ) + return MP_EQ; + + if (SIGN(a) == ZPOS) + return mag; + else + return -mag; + + } else if (SIGN(a) == ZPOS) { + return MP_GT; + } else { + return MP_LT; + } + +} /* end mp_cmp() */ + +/* }}} */ + +/* {{{ mp_cmp_mag(a, b) */ + +/* + mp_cmp_mag(a, b) + + Compares |a| <=> |b|, and returns an appropriate comparison result + */ + +int +mp_cmp_mag(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + return s_mp_cmp(a, b); + +} /* end mp_cmp_mag() */ + +/* }}} */ + +/* {{{ mp_isodd(a) */ + +/* + mp_isodd(a) + + Returns a true (non-zero) value if a is odd, false (zero) otherwise. + */ +int +mp_isodd(const mp_int *a) +{ + ARGMPCHK(a != NULL); + + return (int)(DIGIT(a, 0) & 1); + +} /* end mp_isodd() */ + +/* }}} */ + +/* {{{ mp_iseven(a) */ + +int +mp_iseven(const mp_int *a) +{ + return !mp_isodd(a); + +} /* end mp_iseven() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Number theoretic functions */ + +/* {{{ mp_gcd(a, b, c) */ + +/* + Computes the GCD using the constant-time algorithm + by Bernstein and Yang (https://eprint.iacr.org/2019/266) + "Fast constant-time gcd computation and modular inversion" + */ +mp_err +mp_gcd(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + mp_digit cond = 0, mask = 0; + mp_int g, temp, f; + int i, j, m, bit = 1, delta = 1, shifts = 0, last = -1; + mp_size top, flen, glen; + mp_int *clear[3]; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + /* + Early exit if either of the inputs is zero. + Caller is responsible for the proper handling of inputs. + */ + if (mp_cmp_z(a) == MP_EQ) { + res = mp_copy(b, c); + SIGN(c) = ZPOS; + return res; + } else if (mp_cmp_z(b) == MP_EQ) { + res = mp_copy(a, c); + SIGN(c) = ZPOS; + return res; + } + + MP_CHECKOK(mp_init(&temp)); + clear[++last] = &temp; + MP_CHECKOK(mp_init_copy(&g, a)); + clear[++last] = &g; + MP_CHECKOK(mp_init_copy(&f, b)); + clear[++last] = &f; + + /* + For even case compute the number of + shared powers of 2 in f and g. + */ + for (i = 0; i < USED(&f) && i < USED(&g); i++) { + mask = ~(DIGIT(&f, i) | DIGIT(&g, i)); + for (j = 0; j < MP_DIGIT_BIT; j++) { + bit &= mask; + shifts += bit; + mask >>= 1; + } + } + /* Reduce to the odd case by removing the powers of 2. */ + s_mp_div_2d(&f, shifts); + s_mp_div_2d(&g, shifts); + + /* Allocate to the size of largest mp_int. */ + top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g)); + MP_CHECKOK(s_mp_grow(&f, top)); + MP_CHECKOK(s_mp_grow(&g, top)); + MP_CHECKOK(s_mp_grow(&temp, top)); + + /* Make sure f contains the odd value. */ + MP_CHECKOK(mp_cswap((~DIGIT(&f, 0) & 1), &f, &g, top)); + + /* Upper bound for the total iterations. */ + flen = mpl_significant_bits(&f); + glen = mpl_significant_bits(&g); + m = 4 + 3 * ((flen >= glen) ? flen : glen); + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + + for (i = 0; i < m; i++) { + /* Step 1: conditional swap. */ + /* Set cond if delta > 0 and g is odd. */ + cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1; + /* If cond is set replace (delta,f) with (-delta,-f). */ + delta = (-cond & -delta) | ((cond - 1) & delta); + SIGN(&f) ^= cond; + /* If cond is set swap f with g. */ + MP_CHECKOK(mp_cswap(cond, &f, &g, top)); + + /* Step 2: elemination. */ + /* Update delta. */ + delta++; + /* If g is odd, right shift (g+f) else right shift g. */ + MP_CHECKOK(mp_add(&g, &f, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top)); + s_mp_div_2(&g); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* GCD is in f, take the absolute value. */ + SIGN(&f) = ZPOS; + + /* Add back the removed powers of 2. */ + MP_CHECKOK(s_mp_mul_2d(&f, shifts)); + + MP_CHECKOK(mp_copy(&f, c)); + +CLEANUP: + while (last >= 0) + mp_clear(clear[last--]); + return res; +} /* end mp_gcd() */ + +/* }}} */ + +/* {{{ mp_lcm(a, b, c) */ + +/* We compute the least common multiple using the rule: + + ab = [a, b](a, b) + + ... by computing the product, and dividing out the gcd. + */ + +mp_err +mp_lcm(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int gcd, prod; + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + /* Set up temporaries */ + if ((res = mp_init(&gcd)) != MP_OKAY) + return res; + if ((res = mp_init(&prod)) != MP_OKAY) + goto GCD; + + if ((res = mp_mul(a, b, &prod)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY) + goto CLEANUP; + + res = mp_div(&prod, &gcd, c, NULL); + +CLEANUP: + mp_clear(&prod); +GCD: + mp_clear(&gcd); + + return res; + +} /* end mp_lcm() */ + +/* }}} */ + +/* {{{ mp_xgcd(a, b, g, x, y) */ + +/* + mp_xgcd(a, b, g, x, y) + + Compute g = (a, b) and values x and y satisfying Bezout's identity + (that is, ax + by = g). This uses the binary extended GCD algorithm + based on the Stein algorithm used for mp_gcd() + See algorithm 14.61 in Handbook of Applied Cryptogrpahy. + */ + +mp_err +mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y) +{ + mp_int gx, xc, yc, u, v, A, B, C, D; + mp_int *clean[9]; + mp_err res; + int last = -1; + + if (mp_cmp_z(b) == 0) + return MP_RANGE; + + /* Initialize all these variables we need */ + MP_CHECKOK(mp_init(&u)); + clean[++last] = &u; + MP_CHECKOK(mp_init(&v)); + clean[++last] = &v; + MP_CHECKOK(mp_init(&gx)); + clean[++last] = &gx; + MP_CHECKOK(mp_init(&A)); + clean[++last] = &A; + MP_CHECKOK(mp_init(&B)); + clean[++last] = &B; + MP_CHECKOK(mp_init(&C)); + clean[++last] = &C; + MP_CHECKOK(mp_init(&D)); + clean[++last] = &D; + MP_CHECKOK(mp_init_copy(&xc, a)); + clean[++last] = &xc; + mp_abs(&xc, &xc); + MP_CHECKOK(mp_init_copy(&yc, b)); + clean[++last] = &yc; + mp_abs(&yc, &yc); + + mp_set(&gx, 1); + + /* Divide by two until at least one of them is odd */ + while (mp_iseven(&xc) && mp_iseven(&yc)) { + mp_size nx = mp_trailing_zeros(&xc); + mp_size ny = mp_trailing_zeros(&yc); + mp_size n = MP_MIN(nx, ny); + s_mp_div_2d(&xc, n); + s_mp_div_2d(&yc, n); + MP_CHECKOK(s_mp_mul_2d(&gx, n)); + } + + MP_CHECKOK(mp_copy(&xc, &u)); + MP_CHECKOK(mp_copy(&yc, &v)); + mp_set(&A, 1); + mp_set(&D, 1); + + /* Loop through binary GCD algorithm */ + do { + while (mp_iseven(&u)) { + s_mp_div_2(&u); + + if (mp_iseven(&A) && mp_iseven(&B)) { + s_mp_div_2(&A); + s_mp_div_2(&B); + } else { + MP_CHECKOK(mp_add(&A, &yc, &A)); + s_mp_div_2(&A); + MP_CHECKOK(mp_sub(&B, &xc, &B)); + s_mp_div_2(&B); + } + } + + while (mp_iseven(&v)) { + s_mp_div_2(&v); + + if (mp_iseven(&C) && mp_iseven(&D)) { + s_mp_div_2(&C); + s_mp_div_2(&D); + } else { + MP_CHECKOK(mp_add(&C, &yc, &C)); + s_mp_div_2(&C); + MP_CHECKOK(mp_sub(&D, &xc, &D)); + s_mp_div_2(&D); + } + } + + if (mp_cmp(&u, &v) >= 0) { + MP_CHECKOK(mp_sub(&u, &v, &u)); + MP_CHECKOK(mp_sub(&A, &C, &A)); + MP_CHECKOK(mp_sub(&B, &D, &B)); + } else { + MP_CHECKOK(mp_sub(&v, &u, &v)); + MP_CHECKOK(mp_sub(&C, &A, &C)); + MP_CHECKOK(mp_sub(&D, &B, &D)); + } + } while (mp_cmp_z(&u) != 0); + + /* copy results to output */ + if (x) + MP_CHECKOK(mp_copy(&C, x)); + + if (y) + MP_CHECKOK(mp_copy(&D, y)); + + if (g) + MP_CHECKOK(mp_mul(&gx, &v, g)); + +CLEANUP: + while (last >= 0) + mp_clear(clean[last--]); + + return res; + +} /* end mp_xgcd() */ + +/* }}} */ + +mp_size +mp_trailing_zeros(const mp_int *mp) +{ + mp_digit d; + mp_size n = 0; + unsigned int ix; + + if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp)) + return n; + + for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix) + n += MP_DIGIT_BIT; + if (!d) + return 0; /* shouldn't happen, but ... */ +#if !defined(MP_USE_UINT_DIGIT) + if (!(d & 0xffffffffU)) { + d >>= 32; + n += 32; + } +#endif + if (!(d & 0xffffU)) { + d >>= 16; + n += 16; + } + if (!(d & 0xffU)) { + d >>= 8; + n += 8; + } + if (!(d & 0xfU)) { + d >>= 4; + n += 4; + } + if (!(d & 0x3U)) { + d >>= 2; + n += 2; + } + if (!(d & 0x1U)) { + d >>= 1; + n += 1; + } +#if MP_ARGCHK == 2 + assert(0 != (d & 1)); +#endif + return n; +} + +/* Given a and prime p, computes c and k such that a*c == 2**k (mod p). +** Returns k (positive) or error (negative). +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c) +{ + mp_err res; + mp_err k = 0; + mp_int d, f, g; + + ARGCHK(a != NULL && p != NULL && c != NULL, MP_BADARG); + + MP_DIGITS(&d) = 0; + MP_DIGITS(&f) = 0; + MP_DIGITS(&g) = 0; + MP_CHECKOK(mp_init(&d)); + MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */ + MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */ + + mp_set(c, 1); + mp_zero(&d); + + if (mp_cmp_z(&f) == 0) { + res = MP_UNDEF; + } else + for (;;) { + int diff_sign; + while (mp_iseven(&f)) { + mp_size n = mp_trailing_zeros(&f); + if (!n) { + res = MP_UNDEF; + goto CLEANUP; + } + s_mp_div_2d(&f, n); + MP_CHECKOK(s_mp_mul_2d(&d, n)); + k += n; + } + if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */ + res = k; + break; + } + diff_sign = mp_cmp(&f, &g); + if (diff_sign < 0) { /* f < g */ + s_mp_exch(&f, &g); + s_mp_exch(c, &d); + } else if (diff_sign == 0) { /* f == g */ + res = MP_UNDEF; /* a and p are not relatively prime */ + break; + } + if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) { + MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */ + MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */ + } else { + MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */ + MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */ + } + } + if (res >= 0) { + if (mp_cmp_mag(c, p) >= 0) { + MP_CHECKOK(mp_div(c, p, NULL, c)); + } + if (MP_SIGN(c) != MP_ZPOS) { + MP_CHECKOK(mp_add(c, p, c)); + } + res = k; + } + +CLEANUP: + mp_clear(&d); + mp_clear(&f); + mp_clear(&g); + return res; +} + +/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_digit +s_mp_invmod_radix(mp_digit P) +{ + mp_digit T = P; + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); +#if !defined(MP_USE_UINT_DIGIT) + T *= 2 - (P * T); + T *= 2 - (P * T); +#endif + return T; +} + +/* Given c, k, and prime p, where a*c == 2**k (mod p), +** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x) +{ + int k_orig = k; + mp_digit r; + mp_size ix; + mp_err res; + + if (mp_cmp_z(c) < 0) { /* c < 0 */ + MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */ + } else { + MP_CHECKOK(mp_copy(c, x)); /* x = c */ + } + + /* make sure x is large enough */ + ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1; + ix = MP_MAX(ix, MP_USED(x)); + MP_CHECKOK(s_mp_pad(x, ix)); + + r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0)); + + for (ix = 0; k > 0; ix++) { + int j = MP_MIN(k, MP_DIGIT_BIT); + mp_digit v = r * MP_DIGIT(x, ix); + if (j < MP_DIGIT_BIT) { + v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */ + } + s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */ + k -= j; + } + s_mp_clamp(x); + s_mp_div_2d(x, k_orig); + res = MP_OKAY; + +CLEANUP: + return res; +} + +/* + Computes the modular inverse using the constant-time algorithm + by Bernstein and Yang (https://eprint.iacr.org/2019/266) + "Fast constant-time gcd computation and modular inversion" + */ +mp_err +s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + mp_digit cond = 0; + mp_int g, f, v, r, temp; + int i, its, delta = 1, last = -1; + mp_size top, flen, glen; + mp_int *clear[6]; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + /* Check for invalid inputs. */ + if (mp_cmp_z(a) == MP_EQ || mp_cmp_d(m, 2) == MP_LT) + return MP_RANGE; + + if (a == m || mp_iseven(m)) + return MP_UNDEF; + + MP_CHECKOK(mp_init(&temp)); + clear[++last] = &temp; + MP_CHECKOK(mp_init(&v)); + clear[++last] = &v; + MP_CHECKOK(mp_init(&r)); + clear[++last] = &r; + MP_CHECKOK(mp_init_copy(&g, a)); + clear[++last] = &g; + MP_CHECKOK(mp_init_copy(&f, m)); + clear[++last] = &f; + + mp_set(&v, 0); + mp_set(&r, 1); + + /* Allocate to the size of largest mp_int. */ + top = (mp_size)1 + ((USED(&f) >= USED(&g)) ? USED(&f) : USED(&g)); + MP_CHECKOK(s_mp_grow(&f, top)); + MP_CHECKOK(s_mp_grow(&g, top)); + MP_CHECKOK(s_mp_grow(&temp, top)); + MP_CHECKOK(s_mp_grow(&v, top)); + MP_CHECKOK(s_mp_grow(&r, top)); + + /* Upper bound for the total iterations. */ + flen = mpl_significant_bits(&f); + glen = mpl_significant_bits(&g); + its = 4 + 3 * ((flen >= glen) ? flen : glen); + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + + for (i = 0; i < its; i++) { + /* Step 1: conditional swap. */ + /* Set cond if delta > 0 and g is odd. */ + cond = (-delta >> (8 * sizeof(delta) - 1)) & DIGIT(&g, 0) & 1; + /* If cond is set replace (delta,f,v) with (-delta,-f,-v). */ + delta = (-cond & -delta) | ((cond - 1) & delta); + SIGN(&f) ^= cond; + SIGN(&v) ^= cond; + /* If cond is set swap (f,v) with (g,r). */ + MP_CHECKOK(mp_cswap(cond, &f, &g, top)); + MP_CHECKOK(mp_cswap(cond, &v, &r, top)); + + /* Step 2: elemination. */ + /* Update delta */ + delta++; + /* If g is odd replace r with (r+v). */ + MP_CHECKOK(mp_add(&r, &v, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &r, &temp, top)); + /* If g is odd, right shift (g+f) else right shift g. */ + MP_CHECKOK(mp_add(&g, &f, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&g, 0) & 1), &g, &temp, top)); + s_mp_div_2(&g); + /* + If r is even, right shift it. + If r is odd, right shift (r+m) which is even because m is odd. + We want the result modulo m so adding in multiples of m here vanish. + */ + MP_CHECKOK(mp_add(&r, m, &temp)); + MP_CHECKOK(mp_cswap((DIGIT(&r, 0) & 1), &r, &temp, top)); + s_mp_div_2(&r); + } + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + /* We have the inverse in v, propagate sign from f. */ + SIGN(&v) ^= SIGN(&f); + /* GCD is in f, take the absolute value. */ + SIGN(&f) = ZPOS; + + /* If gcd != 1, not invertible. */ + if (mp_cmp_d(&f, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + /* Return inverse modulo m. */ + MP_CHECKOK(mp_mod(&v, m, c)); + +CLEANUP: + while (last >= 0) + mp_clear(clear[last--]); + return res; +} + +/* Known good algorithm for computing modular inverse. But slow. */ +mp_err +mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_int g, x; + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_CHECKOK(mp_init(&x)); + MP_CHECKOK(mp_init(&g)); + + MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL)); + + if (mp_cmp_d(&g, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + res = mp_mod(&x, m, c); + SIGN(c) = SIGN(a); + +CLEANUP: + mp_clear(&x); + mp_clear(&g); + + return res; +} + +/* modular inverse where modulus is 2**k. */ +/* c = a**-1 mod 2**k */ +mp_err +s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c) +{ + mp_err res; + mp_size ix = k + 4; + mp_int t0, t1, val, tmp, two2k; + + static const mp_digit d2 = 2; + static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 }; + + if (mp_iseven(a)) + return MP_UNDEF; + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned mp_digit +#endif + if (k <= MP_DIGIT_BIT) { + mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0)); + /* propagate the sign from mp_int */ + i = (i ^ -(mp_digit)SIGN(a)) + (mp_digit)SIGN(a); + if (k < MP_DIGIT_BIT) + i &= ((mp_digit)1 << k) - (mp_digit)1; + mp_set(c, i); + return MP_OKAY; + } +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&val) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&two2k) = 0; + MP_CHECKOK(mp_init_copy(&val, a)); + s_mp_mod_2d(&val, k); + MP_CHECKOK(mp_init_copy(&t0, &val)); + MP_CHECKOK(mp_init_copy(&t1, &t0)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&two2k)); + MP_CHECKOK(s_mp_2expt(&two2k, k)); + do { + MP_CHECKOK(mp_mul(&val, &t1, &tmp)); + MP_CHECKOK(mp_sub(&two, &tmp, &tmp)); + MP_CHECKOK(mp_mul(&t1, &tmp, &t1)); + s_mp_mod_2d(&t1, k); + while (MP_SIGN(&t1) != MP_ZPOS) { + MP_CHECKOK(mp_add(&t1, &two2k, &t1)); + } + if (mp_cmp(&t1, &t0) == MP_EQ) + break; + MP_CHECKOK(mp_copy(&t1, &t0)); + } while (--ix > 0); + if (!ix) { + res = MP_UNDEF; + } else { + mp_exch(c, &t1); + } + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&val); + mp_clear(&tmp); + mp_clear(&two2k); + return res; +} + +mp_err +s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + mp_size k; + mp_int oddFactor, evenFactor; /* factors of the modulus */ + mp_int oddPart, evenPart; /* parts to combine via CRT. */ + mp_int C2, tmp1, tmp2; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + /*static const mp_digit d1 = 1; */ + /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */ + + if ((res = s_mp_ispow2(m)) >= 0) { + k = res; + return s_mp_invmod_2d(a, k, c); + } + MP_DIGITS(&oddFactor) = 0; + MP_DIGITS(&evenFactor) = 0; + MP_DIGITS(&oddPart) = 0; + MP_DIGITS(&evenPart) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&tmp1) = 0; + MP_DIGITS(&tmp2) = 0; + + MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */ + MP_CHECKOK(mp_init(&evenFactor)); + MP_CHECKOK(mp_init(&oddPart)); + MP_CHECKOK(mp_init(&evenPart)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&tmp1)); + MP_CHECKOK(mp_init(&tmp2)); + + k = mp_trailing_zeros(m); + s_mp_div_2d(&oddFactor, k); + MP_CHECKOK(s_mp_2expt(&evenFactor, k)); + + /* compute a**-1 mod oddFactor. */ + MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart)); + /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */ + MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart)); + + /* Use Chinese Remainer theorem to compute a**-1 mod m. */ + /* let m1 = oddFactor, v1 = oddPart, + * let m2 = evenFactor, v2 = evenPart. + */ + + /* Compute C2 = m1**-1 mod m2. */ + MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2)); + + /* compute u = (v2 - v1)*C2 mod m2 */ + MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1)); + MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2)); + s_mp_mod_2d(&tmp2, k); + while (MP_SIGN(&tmp2) != MP_ZPOS) { + MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2)); + } + + /* compute answer = v1 + u*m1 */ + MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c)); + MP_CHECKOK(mp_add(&oddPart, c, c)); + /* not sure this is necessary, but it's low cost if not. */ + MP_CHECKOK(mp_mod(c, m, c)); + +CLEANUP: + mp_clear(&oddFactor); + mp_clear(&evenFactor); + mp_clear(&oddPart); + mp_clear(&evenPart); + mp_clear(&C2); + mp_clear(&tmp1); + mp_clear(&tmp2); + return res; +} + +/* {{{ mp_invmod(a, m, c) */ + +/* + mp_invmod(a, m, c) + + Compute c = a^-1 (mod m), if there is an inverse for a (mod m). + This is equivalent to the question of whether (a, m) = 1. If not, + MP_UNDEF is returned, and there is no inverse. + */ + +mp_err +mp_invmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + if (mp_isodd(m)) { + return s_mp_invmod_odd_m(a, m, c); + } + if (mp_iseven(a)) + return MP_UNDEF; /* not invertable */ + + return s_mp_invmod_even_m(a, m, c); + +} /* end mp_invmod() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ mp_print(mp, ofp) */ + +#if MP_IOFUNC +/* + mp_print(mp, ofp) + + Print a textual representation of the given mp_int on the output + stream 'ofp'. Output is generated using the internal radix. + */ + +void +mp_print(mp_int *mp, FILE *ofp) +{ + int ix; + + if (mp == NULL || ofp == NULL) + return; + + fputc((SIGN(mp) == NEG) ? '-' : '+', ofp); + + for (ix = USED(mp) - 1; ix >= 0; ix--) { + fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix)); + } + +} /* end mp_print() */ + +#endif /* if MP_IOFUNC */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ More I/O Functions */ + +/* {{{ mp_read_raw(mp, str, len) */ + +/* + mp_read_raw(mp, str, len) + + Read in a raw value (base 256) into the given mp_int + */ + +mp_err +mp_read_raw(mp_int *mp, char *str, int len) +{ + int ix; + mp_err res; + unsigned char *ustr = (unsigned char *)str; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + /* Read the rest of the digits */ + for (ix = 1; ix < len; ix++) { + if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY) + return res; + if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY) + return res; + } + + /* Get sign from first byte */ + if (ustr[0]) + SIGN(mp) = NEG; + else + SIGN(mp) = ZPOS; + + return MP_OKAY; + +} /* end mp_read_raw() */ + +/* }}} */ + +/* {{{ mp_raw_size(mp) */ + +int +mp_raw_size(mp_int *mp) +{ + ARGCHK(mp != NULL, 0); + + return (USED(mp) * sizeof(mp_digit)) + 1; + +} /* end mp_raw_size() */ + +/* }}} */ + +/* {{{ mp_toraw(mp, str) */ + +mp_err +mp_toraw(mp_int *mp, char *str) +{ + int ix, jx, pos = 1; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + + str[0] = (char)SIGN(mp); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + str[pos++] = (char)(d >> (jx * CHAR_BIT)); + } + } + + return MP_OKAY; + +} /* end mp_toraw() */ + +/* }}} */ + +/* {{{ mp_read_radix(mp, str, radix) */ + +/* + mp_read_radix(mp, str, radix) + + Read an integer from the given string, and set mp to the resulting + value. The input is presumed to be in base 10. Leading non-digit + characters are ignored, and the function reads until a non-digit + character or the end of the string. + */ + +mp_err +mp_read_radix(mp_int *mp, const char *str, int radix) +{ + int ix = 0, val = 0; + mp_err res; + mp_sign sig = ZPOS; + + ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, + MP_BADARG); + + mp_zero(mp); + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while (str[ix] && + (s_mp_tovalue(str[ix], radix) < 0) && + str[ix] != '-' && + str[ix] != '+') { + ++ix; + } + + if (str[ix] == '-') { + sig = NEG; + ++ix; + } else if (str[ix] == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++ix; + } + + while ((val = s_mp_tovalue(str[ix], radix)) >= 0) { + if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY) + return res; + if ((res = s_mp_add_d(mp, val)) != MP_OKAY) + return res; + ++ix; + } + + if (s_mp_cmp_d(mp, 0) == MP_EQ) + SIGN(mp) = ZPOS; + else + SIGN(mp) = sig; + + return MP_OKAY; + +} /* end mp_read_radix() */ + +mp_err +mp_read_variable_radix(mp_int *a, const char *str, int default_radix) +{ + int radix = default_radix; + int cx; + mp_sign sig = ZPOS; + mp_err res; + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while ((cx = *str) != 0 && + (s_mp_tovalue(cx, radix) < 0) && + cx != '-' && + cx != '+') { + ++str; + } + + if (cx == '-') { + sig = NEG; + ++str; + } else if (cx == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++str; + } + + if (str[0] == '0') { + if ((str[1] | 0x20) == 'x') { + radix = 16; + str += 2; + } else { + radix = 8; + str++; + } + } + res = mp_read_radix(a, str, radix); + if (res == MP_OKAY) { + MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig; + } + return res; +} + +/* }}} */ + +/* {{{ mp_radix_size(mp, radix) */ + +int +mp_radix_size(mp_int *mp, int radix) +{ + int bits; + + if (!mp || radix < 2 || radix > MAX_RADIX) + return 0; + + bits = USED(mp) * DIGIT_BIT - 1; + + return SIGN(mp) + s_mp_outlen(bits, radix); + +} /* end mp_radix_size() */ + +/* }}} */ + +/* {{{ mp_toradix(mp, str, radix) */ + +mp_err +mp_toradix(mp_int *mp, char *str, int radix) +{ + int ix, pos = 0; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE); + + if (mp_cmp_z(mp) == MP_EQ) { + str[0] = '0'; + str[1] = '\0'; + } else { + mp_err res; + mp_int tmp; + mp_sign sgn; + mp_digit rem, rdx = (mp_digit)radix; + char ch; + + if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY) + return res; + + /* Save sign for later, and take absolute value */ + sgn = SIGN(&tmp); + SIGN(&tmp) = ZPOS; + + /* Generate output digits in reverse order */ + while (mp_cmp_z(&tmp) != 0) { + if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + /* Generate digits, use capital letters */ + ch = s_mp_todigit(rem, radix, 0); + + str[pos++] = ch; + } + + /* Add - sign if original value was negative */ + if (sgn == NEG) + str[pos++] = '-'; + + /* Add trailing NUL to end the string */ + str[pos--] = '\0'; + + /* Reverse the digits and sign indicator */ + ix = 0; + while (ix < pos) { + char tmpc = str[ix]; + + str[ix] = str[pos]; + str[pos] = tmpc; + ++ix; + --pos; + } + + mp_clear(&tmp); + } + + return MP_OKAY; + +} /* end mp_toradix() */ + +/* }}} */ + +/* {{{ mp_tovalue(ch, r) */ + +int +mp_tovalue(char ch, int r) +{ + return s_mp_tovalue(ch, r); + +} /* end mp_tovalue() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_strerror(ec) */ + +/* + mp_strerror(ec) + + Return a string describing the meaning of error code 'ec'. The + string returned is allocated in static memory, so the caller should + not attempt to modify or free the memory associated with this + string. + */ +const char * +mp_strerror(mp_err ec) +{ + int aec = (ec < 0) ? -ec : ec; + + /* Code values are negative, so the senses of these comparisons + are accurate */ + if (ec < MP_LAST_CODE || ec > MP_OKAY) { + return mp_err_string[0]; /* unknown error code */ + } else { + return mp_err_string[aec + 1]; + } + +} /* end mp_strerror() */ + +/* }}} */ + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static function definitions (internal use only) */ + +/* {{{ Memory management */ + +/* {{{ s_mp_grow(mp, min) */ + +/* Make sure there are at least 'min' digits allocated to mp */ +mp_err +s_mp_grow(mp_int *mp, mp_size min) +{ + ARGCHK(mp != NULL, MP_BADARG); + + if (min > ALLOC(mp)) { + mp_digit *tmp; + + /* Set min to next nearest default precision block size */ + min = MP_ROUNDUP(min, s_mp_defprec); + + if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(mp), tmp, USED(mp)); + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = tmp; + ALLOC(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_grow() */ + +/* }}} */ + +/* {{{ s_mp_pad(mp, min) */ + +/* Make sure the used size of mp is at least 'min', growing if needed */ +mp_err +s_mp_pad(mp_int *mp, mp_size min) +{ + ARGCHK(mp != NULL, MP_BADARG); + + if (min > USED(mp)) { + mp_err res; + + /* Make sure there is room to increase precision */ + if (min > ALLOC(mp)) { + if ((res = s_mp_grow(mp, min)) != MP_OKAY) + return res; + } else { + s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); + } + + /* Increase precision; should already be 0-filled */ + USED(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_pad() */ + +/* }}} */ + +/* {{{ s_mp_setz(dp, count) */ + +/* Set 'count' digits pointed to by dp to be zeroes */ +void +s_mp_setz(mp_digit *dp, mp_size count) +{ + memset(dp, 0, count * sizeof(mp_digit)); +} /* end s_mp_setz() */ + +/* }}} */ + +/* {{{ s_mp_copy(sp, dp, count) */ + +/* Copy 'count' digits from sp to dp */ +void +s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count) +{ + memcpy(dp, sp, count * sizeof(mp_digit)); +} /* end s_mp_copy() */ + +/* }}} */ + +/* {{{ s_mp_alloc(nb, ni) */ + +/* Allocate ni records of nb bytes each, and return a pointer to that */ +void * +s_mp_alloc(size_t nb, size_t ni) +{ + return calloc(nb, ni); + +} /* end s_mp_alloc() */ + +/* }}} */ + +/* {{{ s_mp_free(ptr) */ + +/* Free the memory pointed to by ptr */ +void +s_mp_free(void *ptr) +{ + if (ptr) { + free(ptr); + } +} /* end s_mp_free() */ + +/* }}} */ + +/* {{{ s_mp_clamp(mp) */ + +/* Remove leading zeroes from the given value */ +void +s_mp_clamp(mp_int *mp) +{ + mp_size used = MP_USED(mp); + while (used > 1 && DIGIT(mp, used - 1) == 0) + --used; + MP_USED(mp) = used; + if (used == 1 && DIGIT(mp, 0) == 0) + MP_SIGN(mp) = ZPOS; +} /* end s_mp_clamp() */ + +/* }}} */ + +/* {{{ s_mp_exch(a, b) */ + +/* Exchange the data for a and b; (b, a) = (a, b) */ +void +s_mp_exch(mp_int *a, mp_int *b) +{ + mp_int tmp; + if (!a || !b) { + return; + } + + tmp = *a; + *a = *b; + *b = tmp; + +} /* end s_mp_exch() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Arithmetic helpers */ + +/* {{{ s_mp_lshd(mp, p) */ + +/* + Shift mp leftward by p digits, growing if needed, and zero-filling + the in-shifted digits at the right end. This is a convenient + alternative to multiplication by powers of the radix + */ + +mp_err +s_mp_lshd(mp_int *mp, mp_size p) +{ + mp_err res; + unsigned int ix; + + ARGCHK(mp != NULL, MP_BADARG); + + if (p == 0) + return MP_OKAY; + + if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0) + return MP_OKAY; + + if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY) + return res; + + /* Shift all the significant figures over as needed */ + for (ix = USED(mp) - p; ix-- > 0;) { + DIGIT(mp, ix + p) = DIGIT(mp, ix); + } + + /* Fill the bottom digits with zeroes */ + for (ix = 0; (mp_size)ix < p; ix++) + DIGIT(mp, ix) = 0; + + return MP_OKAY; + +} /* end s_mp_lshd() */ + +/* }}} */ + +/* {{{ s_mp_mul_2d(mp, d) */ + +/* + Multiply the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value. + */ +mp_err +s_mp_mul_2d(mp_int *mp, mp_digit d) +{ + mp_err res; + mp_digit dshift, rshift, mask, x, prev = 0; + mp_digit *pa = NULL; + int i; + + ARGCHK(mp != NULL, MP_BADARG); + + dshift = d / MP_DIGIT_BIT; + d %= MP_DIGIT_BIT; + /* mp_digit >> rshift is undefined behavior for rshift >= MP_DIGIT_BIT */ + /* mod and corresponding mask logic avoid that when d = 0 */ + rshift = MP_DIGIT_BIT - d; + rshift %= MP_DIGIT_BIT; + /* mask = (2**d - 1) * 2**(w-d) mod 2**w */ + mask = (DIGIT_MAX << rshift) + 1; + mask &= DIGIT_MAX - 1; + /* bits to be shifted out of the top word */ + x = MP_DIGIT(mp, MP_USED(mp) - 1) & mask; + + if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (x != 0)))) + return res; + + if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift))) + return res; + + pa = MP_DIGITS(mp) + dshift; + + for (i = MP_USED(mp) - dshift; i > 0; i--) { + x = *pa; + *pa++ = (x << d) | prev; + prev = (x & mask) >> rshift; + } + + s_mp_clamp(mp); + return MP_OKAY; +} /* end s_mp_mul_2d() */ + +/* {{{ s_mp_rshd(mp, p) */ + +/* + Shift mp rightward by p digits. Maintains the invariant that + digits above the precision are all zero. Digits shifted off the + end are lost. Cannot fail. + */ + +void +s_mp_rshd(mp_int *mp, mp_size p) +{ + mp_size ix; + mp_digit *src, *dst; + + if (p == 0) + return; + + /* Shortcut when all digits are to be shifted off */ + if (p >= USED(mp)) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + return; + } + + /* Shift all the significant figures over as needed */ + dst = MP_DIGITS(mp); + src = dst + p; + for (ix = USED(mp) - p; ix > 0; ix--) + *dst++ = *src++; + + MP_USED(mp) -= p; + /* Fill the top digits with zeroes */ + while (p-- > 0) + *dst++ = 0; + +} /* end s_mp_rshd() */ + +/* }}} */ + +/* {{{ s_mp_div_2(mp) */ + +/* Divide by two -- take advantage of radix properties to do it fast */ +void +s_mp_div_2(mp_int *mp) +{ + s_mp_div_2d(mp, 1); + +} /* end s_mp_div_2() */ + +/* }}} */ + +/* {{{ s_mp_mul_2(mp) */ + +mp_err +s_mp_mul_2(mp_int *mp) +{ + mp_digit *pd; + unsigned int ix, used; + mp_digit kin = 0; + + ARGCHK(mp != NULL, MP_BADARG); + + /* Shift digits leftward by 1 bit */ + used = MP_USED(mp); + pd = MP_DIGITS(mp); + for (ix = 0; ix < used; ix++) { + mp_digit d = *pd; + *pd++ = (d << 1) | kin; + kin = (d >> (DIGIT_BIT - 1)); + } + + /* Deal with rollover from last digit */ + if (kin) { + if (ix >= ALLOC(mp)) { + mp_err res; + if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) + return res; + } + + DIGIT(mp, ix) = kin; + USED(mp) += 1; + } + + return MP_OKAY; + +} /* end s_mp_mul_2() */ + +/* }}} */ + +/* {{{ s_mp_mod_2d(mp, d) */ + +/* + Remainder the integer by 2^d, where d is a number of bits. This + amounts to a bitwise AND of the value, and does not require the full + division code + */ +void +s_mp_mod_2d(mp_int *mp, mp_digit d) +{ + mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT); + mp_size ix; + mp_digit dmask; + + if (ndig >= USED(mp)) + return; + + /* Flush all the bits above 2^d in its digit */ + dmask = ((mp_digit)1 << nbit) - 1; + DIGIT(mp, ndig) &= dmask; + + /* Flush all digits above the one with 2^d in it */ + for (ix = ndig + 1; ix < USED(mp); ix++) + DIGIT(mp, ix) = 0; + + s_mp_clamp(mp); + +} /* end s_mp_mod_2d() */ + +/* }}} */ + +/* {{{ s_mp_div_2d(mp, d) */ + +/* + Divide the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value, and does not require the + full division code (used in Barrett reduction, see below) + */ +void +s_mp_div_2d(mp_int *mp, mp_digit d) +{ + int ix; + mp_digit save, next, mask, lshift; + + s_mp_rshd(mp, d / DIGIT_BIT); + d %= DIGIT_BIT; + /* mp_digit << lshift is undefined behavior for lshift >= MP_DIGIT_BIT */ + /* mod and corresponding mask logic avoid that when d = 0 */ + lshift = DIGIT_BIT - d; + lshift %= DIGIT_BIT; + mask = ((mp_digit)1 << d) - 1; + save = 0; + for (ix = USED(mp) - 1; ix >= 0; ix--) { + next = DIGIT(mp, ix) & mask; + DIGIT(mp, ix) = (save << lshift) | (DIGIT(mp, ix) >> d); + save = next; + } + s_mp_clamp(mp); + +} /* end s_mp_div_2d() */ + +/* }}} */ + +/* {{{ s_mp_norm(a, b, *d) */ + +/* + s_mp_norm(a, b, *d) + + Normalize a and b for division, where b is the divisor. In order + that we might make good guesses for quotient digits, we want the + leading digit of b to be at least half the radix, which we + accomplish by multiplying a and b by a power of 2. The exponent + (shift count) is placed in *pd, so that the remainder can be shifted + back at the end of the division process. + */ + +mp_err +s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd) +{ + mp_digit d; + mp_digit mask; + mp_digit b_msd; + mp_err res = MP_OKAY; + + ARGCHK(a != NULL && b != NULL && pd != NULL, MP_BADARG); + + d = 0; + mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */ + b_msd = DIGIT(b, USED(b) - 1); + while (!(b_msd & mask)) { + b_msd <<= 1; + ++d; + } + + if (d) { + MP_CHECKOK(s_mp_mul_2d(a, d)); + MP_CHECKOK(s_mp_mul_2d(b, d)); + } + + *pd = d; +CLEANUP: + return res; + +} /* end s_mp_norm() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive digit arithmetic */ + +/* {{{ s_mp_add_d(mp, d) */ + +/* Add d to |mp| in place */ +mp_err +s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; + mp_size ix = 1; + + w = (mp_word)DIGIT(mp, 0) + d; + DIGIT(mp, 0) = ACCUM(w); + k = CARRYOUT(w); + + while (ix < USED(mp) && k) { + w = (mp_word)DIGIT(mp, ix) + k; + DIGIT(mp, ix) = ACCUM(w); + k = CARRYOUT(w); + ++ix; + } + + if (k != 0) { + mp_err res; + + if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY) + return res; + + DIGIT(mp, ix) = (mp_digit)k; + } + + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit sum, mp_i, carry = 0; + mp_err res = MP_OKAY; + int used = (int)MP_USED(mp); + + mp_i = *pmp; + *pmp++ = sum = d + mp_i; + carry = (sum < d); + while (carry && --used > 0) { + mp_i = *pmp; + *pmp++ = sum = carry + mp_i; + carry = !sum; + } + if (carry && !used) { + /* mp is growing */ + used = MP_USED(mp); + MP_CHECKOK(s_mp_pad(mp, used + 1)); + MP_DIGIT(mp, used) = carry; + } +CLEANUP: + return res; +#endif +} /* end s_mp_add_d() */ + +/* }}} */ + +/* {{{ s_mp_sub_d(mp, d) */ + +/* Subtract d from |mp| in place, assumes |mp| > d */ +mp_err +s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_word w, b = 0; + mp_size ix = 1; + + /* Compute initial subtraction */ + w = (RADIX + (mp_word)DIGIT(mp, 0)) - d; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, 0) = ACCUM(w); + + /* Propagate borrows leftward */ + while (b && ix < USED(mp)) { + w = (RADIX + (mp_word)DIGIT(mp, ix)) - b; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, ix) = ACCUM(w); + ++ix; + } + + /* Remove leading zeroes */ + s_mp_clamp(mp); + + /* If we have a borrow out, it's a violation of the input invariant */ + if (b) + return MP_RANGE; + else + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit mp_i, diff, borrow; + mp_size used = MP_USED(mp); + + mp_i = *pmp; + *pmp++ = diff = mp_i - d; + borrow = (diff > mp_i); + while (borrow && --used) { + mp_i = *pmp; + *pmp++ = diff = mp_i - borrow; + borrow = (diff > mp_i); + } + s_mp_clamp(mp); + return (borrow && !used) ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub_d() */ + +/* }}} */ + +/* {{{ s_mp_mul_d(a, d) */ + +/* Compute a = a * d, single digit multiplication */ +mp_err +s_mp_mul_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_size used; + int pow; + + if (!d) { + mp_zero(a); + return MP_OKAY; + } + if (d == 1) + return MP_OKAY; + if (0 <= (pow = s_mp_ispow2d(d))) { + return s_mp_mul_2d(a, (mp_digit)pow); + } + + used = MP_USED(a); + MP_CHECKOK(s_mp_pad(a, used + 1)); + + s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a)); + + s_mp_clamp(a); + +CLEANUP: + return res; + +} /* end s_mp_mul_d() */ + +/* }}} */ + +/* {{{ s_mp_div_d(mp, d, r) */ + +/* + s_mp_div_d(mp, d, r) + + Compute the quotient mp = mp / d and remainder r = mp mod d, for a + single digit d. If r is null, the remainder will be discarded. + */ + +mp_err +s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + mp_word w = 0, q; +#else + mp_digit w = 0, q; +#endif + int ix; + mp_err res; + mp_int quot; + mp_int rem; + + if (d == 0) + return MP_RANGE; + if (d == 1) { + if (r) + *r = 0; + return MP_OKAY; + } + /* could check for power of 2 here, but mp_div_d does that. */ + if (MP_USED(mp) == 1) { + mp_digit n = MP_DIGIT(mp, 0); + mp_digit remdig; + + q = n / d; + remdig = n % d; + MP_DIGIT(mp, 0) = q; + if (r) { + *r = remdig; + } + return MP_OKAY; + } + + MP_DIGITS(&rem) = 0; + MP_DIGITS(") = 0; + /* Make room for the quotient */ + MP_CHECKOK(mp_init_size(", USED(mp))); + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + for (ix = USED(mp) - 1; ix >= 0; ix--) { + w = (w << DIGIT_BIT) | DIGIT(mp, ix); + + if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + s_mp_lshd(", 1); + DIGIT(", 0) = (mp_digit)q; + } +#else + { + mp_digit p; +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + mp_digit norm; +#endif + + MP_CHECKOK(mp_init_copy(&rem, mp)); + +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + MP_DIGIT(", 0) = d; + MP_CHECKOK(s_mp_norm(&rem, ", &norm)); + if (norm) + d <<= norm; + MP_DIGIT(", 0) = 0; +#endif + + p = 0; + for (ix = USED(&rem) - 1; ix >= 0; ix--) { + w = DIGIT(&rem, ix); + + if (p) { + MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w)); + } else if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + MP_CHECKOK(s_mp_lshd(", 1)); + DIGIT(", 0) = q; + p = w; + } +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + if (norm) + w >>= norm; +#endif + } +#endif + + /* Deliver the remainder, if desired */ + if (r) { + *r = (mp_digit)w; + } + + s_mp_clamp("); + mp_exch(", mp); +CLEANUP: + mp_clear("); + mp_clear(&rem); + + return res; +} /* end s_mp_div_d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive full arithmetic */ + +/* {{{ s_mp_add(a, b) */ + +/* Compute a = |a| + |b| */ +mp_err +s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_digit *pa, *pb; + mp_size ix; + mp_size used; + mp_err res; + + /* Make sure a has enough precision for the output value */ + if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa + *pb++; + *pa++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pa++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + used = MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + while (w && ix < used) { + w = w + *pa; + *pa++ = ACCUM(w); + w = CARRYOUT(w); + ++ix; + } +#else + while (carry && ix < used) { + sum = carry + *pa; + *pa++ = sum; + carry = !sum; + ++ix; + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, ix) = (mp_digit)w; + } +#else + if (carry) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, used) = carry; + } +#endif + + return MP_OKAY; +} /* end s_mp_add() */ + +/* }}} */ + +/* Compute c = |a| + |b| */ /* magnitude addition */ +mp_err +s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit sum, carry = 0, d; +#endif + mp_size ix; + mp_size used; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = a; + a = b; + b = xch; + } + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + exchange step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++ + *pb++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa++; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pc++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + for (used = MP_USED(a); ix < used; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + *pc++ = sum = carry + *pa++; + carry = (sum < carry); +#endif + } + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = (mp_digit)w; + ++used; + } +#else + if (carry) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = carry; + ++used; + } +#endif + MP_USED(c) = used; + return MP_OKAY; +} +/* {{{ s_mp_add_offset(a, b, offset) */ + +/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */ +mp_err +s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_size ib; + mp_size ia; + mp_size lim; + mp_err res; + + /* Make sure a has enough precision for the output value */ + lim = MP_USED(b) + offset; + if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + lim = USED(b); + for (ib = 0, ia = offset; ib < lim; ib++, ia++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); +#else + d = MP_DIGIT(a, ia); + sum = d + MP_DIGIT(b, ib); + d = (sum < d); + MP_DIGIT(a, ia) = sum += carry; + carry = d + (sum < carry); +#endif + } + +/* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + for (lim = MP_USED(a); k && (ia < lim); ++ia) { + w = (mp_word)DIGIT(a, ia) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); + } +#else + for (lim = MP_USED(a); carry && (ia < lim); ++ia) { + d = MP_DIGIT(a, ia); + MP_DIGIT(a, ia) = sum = d + carry; + carry = (sum < d); + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (k) { + if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY) + return res; + + DIGIT(a, ia) = (mp_digit)k; + } +#else + if (carry) { + if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY) + return res; + + DIGIT(a, lim) = carry; + } +#endif + s_mp_clamp(a); + + return MP_OKAY; + +} /* end s_mp_add_offset() */ + +/* }}} */ + +/* {{{ s_mp_sub(a, b) */ + +/* Compute a = |a| - |b|, assumes |a| >= |b| */ +mp_err +s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */ +{ + mp_digit *pa, *pb, *limit; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + limit = pb + MP_USED(b); + while (pb < limit) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa - *pb++; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa; + diff = d - *pb++; + d = (diff > d); /* detect borrow */ + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pa++ = diff; + borrow = d; +#endif + } + limit = MP_DIGITS(a) + MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + while (w && pa < limit) { + w = w + *pa; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; + } +#else + while (borrow && pa < limit) { + d = *pa; + *pa++ = diff = d - borrow; + borrow = (diff > d); + } +#endif + + /* Clobber any leading zeroes we created */ + s_mp_clamp(a); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub() */ + +/* }}} */ + +/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */ +mp_err +s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + int ix, limit; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + limit = MP_USED(b); + for (ix = 0; ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++ - *pb++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + diff = d - *pb++; + d = (diff > d); + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pc++ = diff; + borrow = d; +#endif + } + for (limit = MP_USED(a); ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + *pc++ = diff = d - borrow; + borrow = (diff > d); +#endif + } + + /* Clobber any leading zeroes we created */ + MP_USED(c) = ix; + s_mp_clamp(c); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} +/* {{{ s_mp_mul(a, b) */ + +/* Compute a = |a| * |b| */ +mp_err +s_mp_mul(mp_int *a, const mp_int *b) +{ + return mp_mul(a, b, a); +} /* end s_mp_mul() */ + +/* }}} */ + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } +#else +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + mp_digit a0b1, a1b0; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \ + a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \ + a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \ + a1b0 += a0b1; \ + Phi += a1b0 >> MP_HALF_DIGIT_BIT; \ + Phi += (MP_CT_LTU(a1b0, a0b1)) << MP_HALF_DIGIT_BIT; \ + a1b0 <<= MP_HALF_DIGIT_BIT; \ + Plo += a1b0; \ + Phi += MP_CT_LTU(Plo, a1b0); \ + } +#endif + +/* Constant time version of s_mpv_mul_d_add_prop. + * Presently, this is only used by the Constant time Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_propCT(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c, mp_size c_len) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + c_len -= a_len; + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + /* propagate the carry to the end, even if carry is zero */ + while (c_len--) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + c_len -= a_len; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + a1b1 += MP_CT_LTU(a0b0, carry); + a0b0 += a_i = *c; + a1b1 += MP_CT_LTU(a0b0, a_i); + + *c++ = a0b0; + carry = a1b1; + } + /* propagate the carry to the end, even if carry is zero */ + while (c_len--) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = MP_CT_LTU(carry, c_i); + } +#endif +} + +#if !defined(MP_ASSEMBLY_MULTIPLY) +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + a1b1 += MP_CT_LTU(a0b0, carry); + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + a1b1 += MP_CT_LTU(a0b0, carry); + a0b0 += a_i = *c; + a1b1 += MP_CT_LTU(a0b0, a_i); + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} +#endif + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_SQR_D(a, Phi, Plo) \ + { \ + unsigned long long square = (unsigned long long)a * a; \ + Plo = (mp_digit)square; \ + Phi = (mp_digit)(square >> MP_DIGIT_BIT); \ + } +#else +#define MP_SQR_D(a, Phi, Plo) \ + { \ + mp_digit Pmid; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \ + Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \ + Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \ + Pmid <<= (MP_HALF_DIGIT_BIT + 1); \ + Plo += Pmid; \ + if (Plo < Pmid) \ + ++Phi; \ + } +#endif + +#if !defined(MP_ASSEMBLY_SQUARE) +/* Add the squares of the digits of a to the digits of b. */ +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_word w; + mp_digit d; + mp_size ix; + + w = 0; +#define ADD_SQUARE(n) \ + d = pa[n]; \ + w += (d * (mp_word)d) + ps[2 * n]; \ + ps[2 * n] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \ + ps[2 * n + 1] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + + for (ix = a_len; ix >= 4; ix -= 4) { + ADD_SQUARE(0); + ADD_SQUARE(1); + ADD_SQUARE(2); + ADD_SQUARE(3); + pa += 4; + ps += 8; + } + if (ix) { + ps += 2 * ix; + pa += ix; + switch (ix) { + case 3: + ADD_SQUARE(-3); /* FALLTHRU */ + case 2: + ADD_SQUARE(-2); /* FALLTHRU */ + case 1: + ADD_SQUARE(-1); /* FALLTHRU */ + case 0: + break; + } + } + while (w) { + w += *ps; + *ps++ = ACCUM(w); + w = (w >> DIGIT_BIT); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *pa++; + mp_digit a0a0, a1a1; + + MP_SQR_D(a_i, a1a1, a0a0); + + /* here a1a1 and a0a0 constitute a_i ** 2 */ + a0a0 += carry; + if (a0a0 < carry) + ++a1a1; + + /* now add to ps */ + a0a0 += a_i = *ps; + if (a0a0 < a_i) + ++a1a1; + *ps++ = a0a0; + a1a1 += a_i = *ps; + carry = (a1a1 < a_i); + *ps++ = a1a1; + } + while (carry) { + mp_digit s_i = *ps; + carry += s_i; + *ps++ = carry; + carry = carry < s_i; + } +#endif +} +#endif + +#if !defined(MP_ASSEMBLY_DIV_2DX1D) +/* +** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized +** so its high bit is 1. This code is from NSPR. +*/ +mp_err +s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + mp_digit d1, d0, q1, q0; + mp_digit r1, r0, m; + + d1 = divisor >> MP_HALF_DIGIT_BIT; + d0 = divisor & MP_HALF_DIGIT_MAX; + r1 = Nhi % d1; + q1 = Nhi / d1; + m = q1 * d0; + r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT); + if (r1 < m) { + q1--, r1 += divisor; + if (r1 >= divisor && r1 < m) { + q1--, r1 += divisor; + } + } + r1 -= m; + r0 = r1 % d1; + q0 = r1 / d1; + m = q0 * d0; + r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX); + if (r0 < m) { + q0--, r0 += divisor; + if (r0 >= divisor && r0 < m) { + q0--, r0 += divisor; + } + } + if (qp) + *qp = (q1 << MP_HALF_DIGIT_BIT) | q0; + if (rp) + *rp = r0 - m; + return MP_OKAY; +} +#endif + +#if MP_SQUARE +/* {{{ s_mp_sqr(a) */ + +mp_err +s_mp_sqr(mp_int *a) +{ + mp_err res; + mp_int tmp; + + if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY) + return res; + res = mp_sqr(a, &tmp); + if (res == MP_OKAY) { + s_mp_exch(&tmp, a); + } + mp_clear(&tmp); + return res; +} + +/* }}} */ +#endif + +/* {{{ s_mp_div(a, b) */ + +/* + s_mp_div(a, b) + + Compute a = a / b and b = a mod b. Assumes b > a. + */ + +mp_err +s_mp_div(mp_int *rem, /* i: dividend, o: remainder */ + mp_int *div, /* i: divisor */ + mp_int *quot) /* i: 0; o: quotient */ +{ + mp_int part, t; + mp_digit q_msd; + mp_err res; + mp_digit d; + mp_digit div_msd; + int ix; + + if (mp_cmp_z(div) == 0) + return MP_RANGE; + + DIGITS(&t) = 0; + /* Shortcut if divisor is power of two */ + if ((ix = s_mp_ispow2(div)) >= 0) { + MP_CHECKOK(mp_copy(rem, quot)); + s_mp_div_2d(quot, (mp_digit)ix); + s_mp_mod_2d(rem, (mp_digit)ix); + + return MP_OKAY; + } + + MP_SIGN(rem) = ZPOS; + MP_SIGN(div) = ZPOS; + MP_SIGN(&part) = ZPOS; + + /* A working temporary for division */ + MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem))); + + /* Normalize to optimize guessing */ + MP_CHECKOK(s_mp_norm(rem, div, &d)); + + /* Perform the division itself...woo! */ + MP_USED(quot) = MP_ALLOC(quot); + + /* Find a partial substring of rem which is at least div */ + /* If we didn't find one, we're finished dividing */ + while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) { + int i; + int unusedRem; + int partExtended = 0; /* set to true if we need to extend part */ + + unusedRem = MP_USED(rem) - MP_USED(div); + MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem; + MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem; + MP_USED(&part) = MP_USED(div); + + /* We have now truncated the part of the remainder to the same length as + * the divisor. If part is smaller than div, extend part by one digit. */ + if (s_mp_cmp(&part, div) < 0) { + --unusedRem; +#if MP_ARGCHK == 2 + assert(unusedRem >= 0); +#endif + --MP_DIGITS(&part); + ++MP_USED(&part); + ++MP_ALLOC(&part); + partExtended = 1; + } + + /* Compute a guess for the next quotient digit */ + q_msd = MP_DIGIT(&part, MP_USED(&part) - 1); + div_msd = MP_DIGIT(div, MP_USED(div) - 1); + if (!partExtended) { + /* In this case, q_msd /= div_msd is always 1. First, since div_msd is + * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since + * we didn't extend part, q_msd >= div_msd. Therefore we know that + * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we + * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */ + q_msd = 1; + } else { + if (q_msd == div_msd) { + q_msd = MP_DIGIT_MAX; + } else { + mp_digit r; + MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2), + div_msd, &q_msd, &r)); + } + } +#if MP_ARGCHK == 2 + assert(q_msd > 0); /* This case should never occur any more. */ +#endif + if (q_msd <= 0) + break; + + /* See what that multiplies out to */ + mp_copy(div, &t); + MP_CHECKOK(s_mp_mul_d(&t, q_msd)); + + /* + If it's too big, back it off. We should not have to do this + more than once, or, in rare cases, twice. Knuth describes a + method by which this could be reduced to a maximum of once, but + I didn't implement that here. + When using s_mpv_div_2dx1d, we may have to do this 3 times. + */ + for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) { + --q_msd; + MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */ + } + if (i < 0) { + res = MP_RANGE; + goto CLEANUP; + } + + /* At this point, q_msd should be the right next digit */ + MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */ + s_mp_clamp(rem); + + /* + Include the digit in the quotient. We allocated enough memory + for any quotient we could ever possibly get, so we should not + have to check for failures here + */ + MP_DIGIT(quot, unusedRem) = q_msd; + } + + /* Denormalize remainder */ + if (d) { + s_mp_div_2d(rem, d); + } + + s_mp_clamp(quot); + +CLEANUP: + mp_clear(&t); + + return res; + +} /* end s_mp_div() */ + +/* }}} */ + +/* {{{ s_mp_2expt(a, k) */ + +mp_err +s_mp_2expt(mp_int *a, mp_digit k) +{ + mp_err res; + mp_size dig, bit; + + dig = k / DIGIT_BIT; + bit = k % DIGIT_BIT; + + mp_zero(a); + if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY) + return res; + + DIGIT(a, dig) |= ((mp_digit)1 << bit); + + return MP_OKAY; + +} /* end s_mp_2expt() */ + +/* }}} */ + +/* {{{ s_mp_reduce(x, m, mu) */ + +/* + Compute Barrett reduction, x (mod m), given a precomputed value for + mu = b^2k / m, where b = RADIX and k = #digits(m). This should be + faster than straight division, when many reductions by the same + value of m are required (such as in modular exponentiation). This + can nearly halve the time required to do modular exponentiation, + as compared to using the full integer divide to reduce. + + This algorithm was derived from the _Handbook of Applied + Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14, + pp. 603-604. + */ + +mp_err +s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu) +{ + mp_int q; + mp_err res; + + if ((res = mp_init_copy(&q, x)) != MP_OKAY) + return res; + + s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */ + s_mp_mul(&q, mu); /* q2 = q1 * mu */ + s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */ + + /* x = x mod b^(k+1), quick (no division) */ + s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1)); + + /* q = q * m mod b^(k+1), quick (no division) */ + s_mp_mul(&q, m); + s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1)); + + /* x = x - q */ + if ((res = mp_sub(x, &q, x)) != MP_OKAY) + goto CLEANUP; + + /* If x < 0, add b^(k+1) to it */ + if (mp_cmp_z(x) < 0) { + mp_set(&q, 1); + if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_add(x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while (mp_cmp(x, m) >= 0) { + if ((res = s_mp_sub(x, m)) != MP_OKAY) + break; + } + +CLEANUP: + mp_clear(&q); + + return res; + +} /* end s_mp_reduce() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive comparisons */ + +/* {{{ s_mp_cmp(a, b) */ + +/* Compare |a| <=> |b|, return 0 if equal, <0 if a0 if a>b */ +int +s_mp_cmp(const mp_int *a, const mp_int *b) +{ + ARGMPCHK(a != NULL && b != NULL); + + mp_size used_a = MP_USED(a); + { + mp_size used_b = MP_USED(b); + + if (used_a > used_b) + goto IS_GT; + if (used_a < used_b) + goto IS_LT; + } + { + mp_digit *pa, *pb; + mp_digit da = 0, db = 0; + +#define CMP_AB(n) \ + if ((da = pa[n]) != (db = pb[n])) \ + goto done + + pa = MP_DIGITS(a) + used_a; + pb = MP_DIGITS(b) + used_a; + while (used_a >= 4) { + pa -= 4; + pb -= 4; + used_a -= 4; + CMP_AB(3); + CMP_AB(2); + CMP_AB(1); + CMP_AB(0); + } + while (used_a-- > 0 && ((da = *--pa) == (db = *--pb))) + /* do nothing */; + done: + if (da > db) + goto IS_GT; + if (da < db) + goto IS_LT; + } + return MP_EQ; +IS_LT: + return MP_LT; +IS_GT: + return MP_GT; +} /* end s_mp_cmp() */ + +/* }}} */ + +/* {{{ s_mp_cmp_d(a, d) */ + +/* Compare |a| <=> d, return 0 if equal, <0 if a0 if a>d */ +int +s_mp_cmp_d(const mp_int *a, mp_digit d) +{ + ARGMPCHK(a != NULL); + + if (USED(a) > 1) + return MP_GT; + + if (DIGIT(a, 0) < d) + return MP_LT; + else if (DIGIT(a, 0) > d) + return MP_GT; + else + return MP_EQ; + +} /* end s_mp_cmp_d() */ + +/* }}} */ + +/* {{{ s_mp_ispow2(v) */ + +/* + Returns -1 if the value is not a power of two; otherwise, it returns + k such that v = 2^k, i.e. lg(v). + */ +int +s_mp_ispow2(const mp_int *v) +{ + mp_digit d; + int extra = 0, ix; + + ARGMPCHK(v != NULL); + + ix = MP_USED(v) - 1; + d = MP_DIGIT(v, ix); /* most significant digit of v */ + + extra = s_mp_ispow2d(d); + if (extra < 0 || ix == 0) + return extra; + + while (--ix >= 0) { + if (DIGIT(v, ix) != 0) + return -1; /* not a power of two */ + extra += MP_DIGIT_BIT; + } + + return extra; + +} /* end s_mp_ispow2() */ + +/* }}} */ + +/* {{{ s_mp_ispow2d(d) */ + +int +s_mp_ispow2d(mp_digit d) +{ + if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */ + int pow = 0; +#if defined(MP_USE_UINT_DIGIT) + if (d & 0xffff0000U) + pow += 16; + if (d & 0xff00ff00U) + pow += 8; + if (d & 0xf0f0f0f0U) + pow += 4; + if (d & 0xccccccccU) + pow += 2; + if (d & 0xaaaaaaaaU) + pow += 1; +#elif defined(MP_USE_LONG_LONG_DIGIT) + if (d & 0xffffffff00000000ULL) + pow += 32; + if (d & 0xffff0000ffff0000ULL) + pow += 16; + if (d & 0xff00ff00ff00ff00ULL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0ULL) + pow += 4; + if (d & 0xccccccccccccccccULL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaULL) + pow += 1; +#elif defined(MP_USE_LONG_DIGIT) + if (d & 0xffffffff00000000UL) + pow += 32; + if (d & 0xffff0000ffff0000UL) + pow += 16; + if (d & 0xff00ff00ff00ff00UL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0UL) + pow += 4; + if (d & 0xccccccccccccccccUL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaUL) + pow += 1; +#else +#error "unknown type for mp_digit" +#endif + return pow; + } + return -1; + +} /* end s_mp_ispow2d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive I/O helpers */ + +/* {{{ s_mp_tovalue(ch, r) */ + +/* + Convert the given character to its digit value, in the given radix. + If the given character is not understood in the given radix, -1 is + returned. Otherwise the digit's numeric value is returned. + + The results will be odd if you use a radix < 2 or > 62, you are + expected to know what you're up to. + */ +int +s_mp_tovalue(char ch, int r) +{ + int val, xch; + + if (r > 36) + xch = ch; + else + xch = toupper(ch); + + if (isdigit(xch)) + val = xch - '0'; + else if (isupper(xch)) + val = xch - 'A' + 10; + else if (islower(xch)) + val = xch - 'a' + 36; + else if (xch == '+') + val = 62; + else if (xch == '/') + val = 63; + else + return -1; + + if (val < 0 || val >= r) + return -1; + + return val; + +} /* end s_mp_tovalue() */ + +/* }}} */ + +/* {{{ s_mp_todigit(val, r, low) */ + +/* + Convert val to a radix-r digit, if possible. If val is out of range + for r, returns zero. Otherwise, returns an ASCII character denoting + the value in the given radix. + + The results may be odd if you use a radix < 2 or > 64, you are + expected to know what you're doing. + */ + +char +s_mp_todigit(mp_digit val, int r, int low) +{ + char ch; + + if (val >= r) + return 0; + + ch = s_dmap_1[val]; + + if (r <= 36 && low) + ch = tolower(ch); + + return ch; + +} /* end s_mp_todigit() */ + +/* }}} */ + +/* {{{ s_mp_outlen(bits, radix) */ + +/* + Return an estimate for how long a string is needed to hold a radix + r representation of a number with 'bits' significant bits, plus an + extra for a zero terminator (assuming C style strings here) + */ +int +s_mp_outlen(int bits, int r) +{ + return (int)((double)bits * LOG_V_2(r) + 1.5) + 1; + +} /* end s_mp_outlen() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_read_unsigned_octets(mp, str, len) */ +/* mp_read_unsigned_octets(mp, str, len) + Read in a raw value (base 256) into the given mp_int + No sign bit, number is positive. Leading zeros ignored. + */ + +mp_err +mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len) +{ + int count; + mp_err res; + mp_digit d; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + count = len % sizeof(mp_digit); + if (count) { + for (d = 0; count-- > 0; --len) { + d = (d << 8) | *str++; + } + MP_DIGIT(mp, 0) = d; + } + + /* Read the rest of the digits */ + for (; len > 0; len -= sizeof(mp_digit)) { + for (d = 0, count = sizeof(mp_digit); count > 0; --count) { + d = (d << 8) | *str++; + } + if (MP_EQ == mp_cmp_z(mp)) { + if (!d) + continue; + } else { + if ((res = s_mp_lshd(mp, 1)) != MP_OKAY) + return res; + } + MP_DIGIT(mp, 0) = d; + } + return MP_OKAY; +} /* end mp_read_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_unsigned_octet_size(mp) */ +unsigned int +mp_unsigned_octet_size(const mp_int *mp) +{ + unsigned int bytes; + int ix; + mp_digit d = 0; + + ARGCHK(mp != NULL, MP_BADARG); + ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG); + + bytes = (USED(mp) * sizeof(mp_digit)); + + /* subtract leading zeros. */ + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + d = DIGIT(mp, ix); + if (d) + break; + bytes -= sizeof(d); + } + if (!bytes) + return 1; + + /* Have MSD, check digit bytes, high order first */ + for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) { + unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT)); + if (x) + break; + --bytes; + } + return bytes; +} /* end mp_unsigned_octet_size() */ +/* }}} */ + +/* {{{ mp_to_unsigned_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos && !x) /* suppress leading zeros */ + continue; + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_to_signed_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos) { + if (!x) /* suppress leading zeros */ + continue; + if (x & 0x80) { /* add one leading zero to make output positive. */ + ARGCHK(bytes + 1 <= maxlen, MP_BADARG); + if (bytes + 1 > maxlen) + return MP_BADARG; + str[pos++] = 0; + } + } + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_signed_octets() */ +/* }}} */ + +/* {{{ mp_to_fixlen_octets(mp, str) */ +/* output a buffer of big endian octets exactly as long as requested. + constant time on the value of mp. */ +mp_err +mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length) +{ + int ix, jx; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp) && length > 0, MP_BADARG); + + /* Constant time on the value of mp. Don't use mp_unsigned_octet_size. */ + bytes = USED(mp) * MP_DIGIT_SIZE; + + /* If the output is shorter than the native size of mp, then check that any + * bytes not written have zero values. This check isn't constant time on + * the assumption that timing-sensitive callers can guarantee that mp fits + * in the allocated space. */ + ix = USED(mp) - 1; + if (bytes > length) { + unsigned int zeros = bytes - length; + + while (zeros >= MP_DIGIT_SIZE) { + ARGCHK(DIGIT(mp, ix) == 0, MP_BADARG); + zeros -= MP_DIGIT_SIZE; + ix--; + } + + if (zeros > 0) { + mp_digit d = DIGIT(mp, ix); + mp_digit m = ~0ULL << ((MP_DIGIT_SIZE - zeros) * CHAR_BIT); + ARGCHK((d & m) == 0, MP_BADARG); + for (jx = MP_DIGIT_SIZE - zeros - 1; jx >= 0; jx--) { + *str++ = d >> (jx * CHAR_BIT); + } + ix--; + } + } else if (bytes < length) { + /* Place any needed leading zeros. */ + unsigned int zeros = length - bytes; + memset(str, 0, zeros); + str += zeros; + } + + /* Iterate over each whole digit... */ + for (; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + + /* Unpack digit bytes, high order first */ + for (jx = MP_DIGIT_SIZE - 1; jx >= 0; jx--) { + *str++ = d >> (jx * CHAR_BIT); + } + } + return MP_OKAY; +} /* end mp_to_fixlen_octets() */ +/* }}} */ + +/* {{{ mp_cswap(condition, a, b, numdigits) */ +/* performs a conditional swap between mp_int. */ +mp_err +mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits) +{ + mp_digit x; + unsigned int i; + mp_err res = 0; + + /* if pointers are equal return */ + if (a == b) + return res; + + if (MP_ALLOC(a) < numdigits || MP_ALLOC(b) < numdigits) { + MP_CHECKOK(s_mp_grow(a, numdigits)); + MP_CHECKOK(s_mp_grow(b, numdigits)); + } + + condition = ((~condition & ((condition - 1))) >> (MP_DIGIT_BIT - 1)) - 1; + + x = (USED(a) ^ USED(b)) & condition; + USED(a) ^= x; + USED(b) ^= x; + + x = (SIGN(a) ^ SIGN(b)) & condition; + SIGN(a) ^= x; + SIGN(b) ^= x; + + for (i = 0; i < numdigits; i++) { + x = (DIGIT(a, i) ^ DIGIT(b, i)) & condition; + DIGIT(a, i) ^= x; + DIGIT(b, i) ^= x; + } + +CLEANUP: + return res; +} /* end mp_cswap() */ +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h new file mode 100644 index 0000000000..dd129db0d6 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.h @@ -0,0 +1,363 @@ +/* + * mpi.h + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPI_ +#define _H_MPI_ + +#include "mpi-config.h" + +#include "seccomon.h" +SEC_BEGIN_PROTOS + +#if MP_DEBUG +#undef MP_IOFUNC +#define MP_IOFUNC 1 +#endif + +#if MP_IOFUNC +#include +#include +#endif + +#include + +#if defined(BSDI) +#undef ULLONG_MAX +#endif + +#include + +#define MP_NEG 1 +#define MP_ZPOS 0 + +#define MP_OKAY 0 /* no error, all is well */ +#define MP_YES 0 /* yes (boolean result) */ +#define MP_NO -1 /* no (boolean result) */ +#define MP_MEM -2 /* out of memory */ +#define MP_RANGE -3 /* argument out of range */ +#define MP_BADARG -4 /* invalid parameter */ +#define MP_UNDEF -5 /* answer is undefined */ +#define MP_LAST_CODE MP_UNDEF + +typedef unsigned int mp_sign; +typedef unsigned int mp_size; +typedef int mp_err; + +#define MP_32BIT_MAX 4294967295U + +#if !defined(ULONG_MAX) +#error "ULONG_MAX not defined" +#elif !defined(UINT_MAX) +#error "UINT_MAX not defined" +#elif !defined(USHRT_MAX) +#error "USHRT_MAX not defined" +#endif + +#if defined(ULLONG_MAX) /* C99, Solaris */ +#define MP_ULONG_LONG_MAX ULLONG_MAX +/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */ +#elif defined(ULONG_LONG_MAX) /* HPUX */ +#define MP_ULONG_LONG_MAX ULONG_LONG_MAX +#elif defined(ULONGLONG_MAX) /* AIX */ +#define MP_ULONG_LONG_MAX ULONGLONG_MAX +#endif + +/* We only use unsigned long for mp_digit iff long is more than 32 bits. */ +#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX +typedef unsigned long mp_digit; +#define MP_DIGIT_MAX ULONG_MAX +#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_DIGIT +#define MP_USE_LONG_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT + +#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX) +typedef unsigned long long mp_digit; +#define MP_DIGIT_MAX MP_ULONG_LONG_MAX +#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_LONG_DIGIT +#define MP_USE_LONG_LONG_DIGIT 1 +#undef MP_USE_LONG_DIGIT + +#else +typedef unsigned int mp_digit; +#define MP_DIGIT_MAX UINT_MAX +#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX USHRT_MAX +#undef MP_USE_UINT_DIGIT +#define MP_USE_UINT_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT +#undef MP_USE_LONG_DIGIT +#endif + +#if !defined(MP_NO_MP_WORD) +#if defined(MP_USE_UINT_DIGIT) && \ + (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX)) + +#if (ULONG_MAX > UINT_MAX) +typedef unsigned long mp_word; +typedef long mp_sword; +#define MP_WORD_MAX ULONG_MAX + +#else +typedef unsigned long long mp_word; +typedef long long mp_sword; +#define MP_WORD_MAX MP_ULONG_LONG_MAX +#endif + +#else +#define MP_NO_MP_WORD 1 +#endif +#endif /* !defined(MP_NO_MP_WORD) */ + +#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD) +typedef unsigned int mp_word; +typedef int mp_sword; +#define MP_WORD_MAX UINT_MAX +#endif + +#define MP_DIGIT_SIZE sizeof(mp_digit) +#define MP_DIGIT_BIT (CHAR_BIT * MP_DIGIT_SIZE) +#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word)) +#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX) + +#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2) +#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX) +/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named +** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's +** consistent with the other _HALF_ names. +*/ + +/* Macros for accessing the mp_int internals */ +#define MP_SIGN(MP) ((MP)->sign) +#define MP_USED(MP) ((MP)->used) +#define MP_ALLOC(MP) ((MP)->alloc) +#define MP_DIGITS(MP) ((MP)->dp) +#define MP_DIGIT(MP, N) (MP)->dp[(N)] + +/* This defines the maximum I/O base (minimum is 2) */ +#define MP_MAX_RADIX 64 + +/* Constant Time Macros on mp_digits */ +#define MP_CT_HIGH_TO_LOW(x) ((mp_digit)((mp_digit)(x) >> (MP_DIGIT_BIT - 1))) +#define MP_CT_TRUE ((mp_digit)1) +#define MP_CT_FALSE ((mp_digit)0) + +/* basic zero and non zero tests */ +#define MP_CT_NOT_ZERO(x) (MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) +#define MP_CT_ZERO(x) (MP_CT_TRUE ^ MP_CT_HIGH_TO_LOW(((x) | (((mp_digit)0) - (x))))) + +/* basic constant-time helper macro for equalities and inequalities. + * The inequalities will produce incorrect results if + * abs(a-b) >= MP_DIGIT_SIZE/2. This can be avoided if unsigned values stay + * within the range 0-MP_DIGIT_MAX/2. */ +#define MP_CT_EQ(a, b) MP_CT_ZERO(((a) ^ (b))) +#define MP_CT_NE(a, b) MP_CT_NOT_ZERO(((a) ^ (b))) +#define MP_CT_GT(a, b) MP_CT_HIGH_TO_LOW((b) - (a)) +#define MP_CT_LT(a, b) MP_CT_HIGH_TO_LOW((a) - (b)) +#define MP_CT_GE(a, b) (MP_CT_TRUE ^ MP_CT_LT(a, b)) +#define MP_CT_LE(a, b) (MP_CT_TRUE ^ MP_CT_GT(a, b)) + +/* use constant time result to select a boolean value + * or an mp digit depending on the args */ +#define MP_CT_SEL(m, l, r) ((r) ^ ((m) & ((r) ^ (l)))) +#define MP_CT_SELB(m, l, r) MP_CT_SEL(m, l, r) /* mask, l and r are booleans */ +#define MP_CT_SEL_DIGIT(m, l, r) MP_CT_SEL(m, l, r) /*mask, l, and r are mp_digit */ + +/* full inequalities that work with full mp_digit values */ +#define MP_CT_OVERFLOW(a, b, c, d) \ + MP_CT_SELB(MP_CT_HIGH_TO_LOW((a) ^ (b)), \ + (MP_CT_HIGH_TO_LOW(d)), c) +#define MP_CT_LTU(a, b) MP_CT_OVERFLOW(a, b, MP_CT_LT(a, b), b) + +typedef struct { + mp_sign sign; /* sign of this quantity */ + mp_size alloc; /* how many digits allocated */ + mp_size used; /* how many digits used */ + mp_digit *dp; /* the digits themselves */ +} mp_int; + +/* Default precision */ +mp_size mp_get_prec(void); +void mp_set_prec(mp_size prec); + +/* Memory management */ +mp_err mp_init(mp_int *mp); +mp_err mp_init_size(mp_int *mp, mp_size prec); +mp_err mp_init_copy(mp_int *mp, const mp_int *from); +mp_err mp_copy(const mp_int *from, mp_int *to); +void mp_exch(mp_int *mp1, mp_int *mp2); +void mp_clear(mp_int *mp); +void mp_zero(mp_int *mp); +void mp_set(mp_int *mp, mp_digit d); +mp_err mp_set_int(mp_int *mp, long z); +#define mp_set_long(mp, z) mp_set_int(mp, z) +mp_err mp_set_ulong(mp_int *mp, unsigned long z); + +/* Single digit arithmetic */ +mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_2(const mp_int *a, mp_int *c); +mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r); +mp_err mp_div_2(const mp_int *a, mp_int *c); +mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c); + +/* Sign manipulations */ +mp_err mp_abs(const mp_int *a, mp_int *b); +mp_err mp_neg(const mp_int *a, mp_int *b); + +/* Full arithmetic */ +mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_subCT(const mp_int *a, mp_int *b, mp_int *c, mp_digit *borrow); +mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_mulCT(mp_int *a, mp_int *b, mp_int *c, mp_size setSize); +#if MP_SQUARE +mp_err mp_sqr(const mp_int *a, mp_int *b); +#else +#define mp_sqr(a, b) mp_mul(a, a, b) +#endif +mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); +mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r); +mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_2expt(mp_int *a, mp_digit k); + +/* Modular arithmetic */ +#if MP_MODARITH +mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c); +mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +#if MP_SQUARE +mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c); +#else +#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c) +#endif +mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c); +#endif /* MP_MODARITH */ + +/* montgomery math */ +mp_err mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont); +mp_digit mp_calculate_mont_n0i(const mp_int *N); +mp_err mp_reduceCT(const mp_int *a, const mp_int *m, mp_digit n0i, mp_int *ct); +mp_err mp_mulmontmodCT(mp_int *a, mp_int *b, const mp_int *m, mp_digit n0i, mp_int *c); + +/* Comparisons */ +int mp_cmp_z(const mp_int *a); +int mp_cmp_d(const mp_int *a, mp_digit d); +int mp_cmp(const mp_int *a, const mp_int *b); +int mp_cmp_mag(const mp_int *a, const mp_int *b); +int mp_isodd(const mp_int *a); +int mp_iseven(const mp_int *a); +mp_err mp_selectCT(mp_digit cond, const mp_int *a, const mp_int *b, mp_int *ret); + +/* Number theoretic */ +mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y); +mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c); + +/* Input and output */ +#if MP_IOFUNC +void mp_print(mp_int *mp, FILE *ofp); +#endif /* end MP_IOFUNC */ + +/* Base conversion */ +mp_err mp_read_raw(mp_int *mp, char *str, int len); +int mp_raw_size(mp_int *mp); +mp_err mp_toraw(mp_int *mp, char *str); +mp_err mp_read_radix(mp_int *mp, const char *str, int radix); +mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix); +int mp_radix_size(mp_int *mp, int radix); +mp_err mp_toradix(mp_int *mp, char *str, int radix); +int mp_tovalue(char ch, int r); + +#define mp_tobinary(M, S) mp_toradix((M), (S), 2) +#define mp_tooctal(M, S) mp_toradix((M), (S), 8) +#define mp_todecimal(M, S) mp_toradix((M), (S), 10) +#define mp_tohex(M, S) mp_toradix((M), (S), 16) + +/* Error strings */ +const char *mp_strerror(mp_err ec); + +/* Octet string conversion functions */ +mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len); +unsigned int mp_unsigned_octet_size(const mp_int *mp); +mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len); + +/* Miscellaneous */ +mp_size mp_trailing_zeros(const mp_int *mp); +void freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx); +mp_err mp_cswap(mp_digit condition, mp_int *a, mp_int *b, mp_size numdigits); + +#define MP_CHECKOK(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP +#define MP_CHECKERR(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP + +#define NEG MP_NEG +#define ZPOS MP_ZPOS +#define DIGIT_MAX MP_DIGIT_MAX +#define DIGIT_BIT MP_DIGIT_BIT +#define DIGIT_FMT MP_DIGIT_FMT +#define RADIX MP_RADIX +#define MAX_RADIX MP_MAX_RADIX +#define SIGN(MP) MP_SIGN(MP) +#define USED(MP) MP_USED(MP) +#define ALLOC(MP) MP_ALLOC(MP) +#define DIGITS(MP) MP_DIGITS(MP) +#define DIGIT(MP, N) MP_DIGIT(MP, N) + +/* Functions which return an mp_err value will NULL-check their arguments via + * ARGCHK(condition, return), where the caller is responsible for checking the + * mp_err return code. For functions that return an integer type, the caller + * has no way to tell if the value is an error code or a legitimate value. + * Therefore, ARGMPCHK(condition) will trigger an assertion failure on debug + * builds, but no-op in optimized builds. */ +#if MP_ARGCHK == 1 +#define ARGMPCHK(X) /* */ +#define ARGCHK(X, Y) \ + { \ + if (!(X)) { \ + return (Y); \ + } \ + } +#elif MP_ARGCHK == 2 +#include +#define ARGMPCHK(X) assert(X) +#define ARGCHK(X, Y) assert(X) +#else +#define ARGMPCHK(X) /* */ +#define ARGCHK(X, Y) /* */ +#endif + +#ifdef CT_VERIF +void mp_taint(mp_int *mp); +void mp_untaint(mp_int *mp); +#endif + +SEC_END_PROTOS + +#endif /* end _H_MPI_ */ diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c new file mode 100644 index 0000000000..9e538bb6a1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64.c @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_AMD64 +#error This file only works on AMD64 platforms. +#endif + +#include + +/* + * MPI glue + * + */ + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void MPI_ASM_DECL +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + mp_digit w; + mp_digit d; + + d = s_mpv_mul_add_vec64(c, a, a_len, b); + c += a_len; + while (d) { + w = c[0] + d; + d = (w < c[0] || w < d); + *c++ = w; + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_common.S b/security/nss/lib/freebl/mpi/mpi_amd64_common.S new file mode 100644 index 0000000000..4000f2066a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_common.S @@ -0,0 +1,409 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_set_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r = a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_set_vec64; + +#ifdef DARWIN +#define s_mpv_mul_set_vec64 _s_mpv_mul_set_vec64 +.private_extern s_mpv_mul_set_vec64 +s_mpv_mul_set_vec64: +#else +.type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: +#endif + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L15: + cmpq $8, %r8 # 8 - len + jb .L16 + movq 0(%rsi), %rax # rax = a[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +#ifndef DARWIN +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 +#endif + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_add_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r += a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_add_vec64; + +#ifdef DARWIN +#define s_mpv_mul_add_vec64 _s_mpv_mul_add_vec64 +.private_extern s_mpv_mul_add_vec64 +s_mpv_mul_add_vec64: +#else +.type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: +#endif + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L25: + cmpq $8, %r8 # 8 - len + jb .L26 + movq 0(%rsi), %rax # rax = a[0] + movq 0(%rdi), %r10 # r10 = r[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + movq 8(%rdi), %r10 # prefetch r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + movq 16(%rdi), %r10 # prefetch r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + movq 24(%rdi), %r10 # prefetch r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + movq 32(%rdi), %r10 # prefetch r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + movq 40(%rdi), %r10 # prefetch r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + movq 48(%rdi), %r10 # prefetch r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + movq 56(%rdi), %r10 # prefetch r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +#ifndef DARWIN +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 + +# Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm new file mode 100644 index 0000000000..2120c18f9d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm @@ -0,0 +1,388 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; +; This code is converted from mpi_amd64_gas.asm for MASM for x64. +; + +; ------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_set_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +; ------------------------------------------------------------------------ + +; r = a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +.CODE + +s_mpv_mul_set_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L17 + mov r8, rdx + xor r9, r9 + +L15: + cmp r8, 8 + jb L16 + mov rax, [rsi] + mov r11, [8+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [0+rdi], rax + mov r9, rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi, 64 + add rdi, 64 + sub r8, 8 + jz L17 + jmp L15 + +L16: + mov rax, [0+rsi] + mul rcx + add rax, r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [8+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [16+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [24+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [24+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [32+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [32+rdi],rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [40+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [40+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [48+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L17 + +L17: + mov rax, r9 + pop rsi + pop rdi + ret + +s_mpv_mul_set_vec64 ENDP + + +;------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_add_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +;------------------------------------------------------------------------ + +; r += a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +s_mpv_mul_add_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L27 + mov r8, rdx + xor r9, r9 + +L25: + cmp r8, 8 + jb L26 + mov rax, [0+rsi] + mov r10, [0+rdi] + mov r11, [8+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [8+rdi] + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [16+rdi] + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [24+rdi] + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [32+rdi] + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [40+rdi] + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [48+rdi] + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [56+rdi] + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi,64 + add rdi,64 + sub r8, 8 + jz L27 + jmp L25 + +L26: + mov rax, [0+rsi] + mov r10, [0+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [8+rsi] + mov r10, [8+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [16+rsi] + mov r10, [16+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [24+rsi] + mov r10, [24+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [32+rsi] + mov r10, [32+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [40+rsi] + mov r10, [40+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [48+rsi] + mov r10, [48+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L27 + +L27: + mov rax, r9 + + pop rsi + pop rdi + ret + +s_mpv_mul_add_vec64 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s new file mode 100644 index 0000000000..ddd5c40fda --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s @@ -0,0 +1,385 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_set_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r = a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L15: + cmpq $8, %r8 / 8 - len + jb .L16 + movq 0(%rsi), %rax / rax = a[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_add_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r += a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L25: + cmpq $8, %r8 / 8 - len + jb .L26 + movq 0(%rsi), %rax / rax = a[0] + movq 0(%rdi), %r10 / r10 = r[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + movq 8(%rdi), %r10 / prefetch r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + movq 16(%rdi), %r10 / prefetch r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + movq 24(%rdi), %r10 / prefetch r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + movq 32(%rdi), %r10 / prefetch r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + movq 40(%rdi), %r10 / prefetch r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + movq 48(%rdi), %r10 / prefetch r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + movq 56(%rdi), %r10 / prefetch r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c new file mode 100644 index 0000000000..27e4efdad1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_arm.c @@ -0,0 +1,175 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This inlined version is for 32-bit ARM platform only */ + +#if !defined(__arm__) +#error "This is for ARM only" +#endif + +/* 16-bit thumb doesn't work inlined assember version */ +#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__) + +#include "mpi-priv.h" + +#ifdef MP_ASSEMBLY_MULTIPLY +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%2]\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%2]\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r5, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %3\n" + "str r5, [%2], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r4, 3f\n" +#else + "cmp r4, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r6, [%2]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "str r5, [%2], #4\n" + "movs r5, r4\n" + "bne 2b\n" + + "3:\n" + : "+r"(a), "+l"(a_len), "+r"(c) + : "r"(b) + : "memory", "cc", "%r4", "%r5", "%r6"); +} +#endif + +#ifdef MP_ASSEMBLY_SQUARE +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r3, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "umlal r3, r4, r6, r6\n" /* w = r3:r4 */ + "str r3, [%2], #4\n" + + "ldr r5, [%2]\n" + "adds r3, r4, r5\n" + "mov r4, #0\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "mov r3, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r3, 3f\n" +#else + "cmp r3, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "movs r3, r4\n" + "bne 2b\n" + + "3:" + : "+r"(pa), "+r"(a_len), "+r"(ps) + : + : "memory", "cc", "%r3", "%r4", "%r5", "%r6"); +} +#endif +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c new file mode 100644 index 0000000000..0cea7685d6 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_hp.c @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file contains routines that perform vector multiplication. */ + +#include "mpi-priv.h" +#include + +#include +/* #include */ +#include + +extern void multacc512( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void maxpy_little( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void add_diag_little( + int length, /* doublewords in input vector. */ + const mp_digit *root, /* The vector to square. */ + mp_digit *result); /* Where to accumulate the result. */ + +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + add_diag_little(a_len, pa, ps); +} + +#define MAX_STACK_DIGITS 258 +#define MULTACC512_LEN (512 / MP_DIGIT_BIT) +#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little) + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit x[MAX_STACK_DIGITS]; + mp_digit *px = x; + size_t xSize = 0; + + if (a == c) { + if (a_len > MAX_STACK_DIGITS) { + xSize = sizeof(mp_digit) * (a_len + 2); + px = malloc(xSize); + if (!px) + return; + } + memcpy(px, a, a_len * sizeof(*a)); + a = px; + } + s_mp_setz(c, a_len + 1); + HP_MPY_ADD_FN(a_len, &b, a, c); + if (px != x && px) { + memset(px, 0, xSize); + free(px); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + c[a_len] = 0; /* so carry propagation stops here. */ + HP_MPY_ADD_FN(a_len, &b, a, c); +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ + HP_MPY_ADD_FN(a_len, &b, a, c); +} diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s new file mode 100644 index 0000000000..f800396596 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s @@ -0,0 +1,313 @@ +/ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.text + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L2 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L1: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L1 / jmp if a_len != 0 +L2: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L4 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L3: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L3 / jmp if a_len != 0 +L4: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L6 / jmp if a_len == 0 + cld + mov 8(%ebp),%esi / esi = a +L5: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L5 / jmp if a_len != 0 +L6: + cmp $0,%ebx / is carry zero? + jz L8 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L8 +L7: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L7 +L8: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 20: caller's esi + / ebp - 16: caller's edi + / ebp - 12: + / ebp - 8: carry + / ebp - 4: a_len local + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: pa argument + / ebp + 12: a_len argument + / ebp + 16: ps argument + / ebp + 20: + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / a_len + mov 16(%ebp),%edi / edi = ps + cmp $0,%ecx + je L11 / jump if a_len == 0 + cld + mov 8(%ebp),%esi / esi = pa +L10: + lodsl / %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax / add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax / add low word from result + mov 4(%edi),%ebx + stosl / [es:di] = %eax; di += 4; + adc %ebx,%edx / add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl / [es:di] = %eax; di += 4; + dec %ecx / --a_len + jnz L10 / jmp if a_len != 0 +L11: + cmp $0,%ebx / is carry zero? + jz L14 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L14 +L12: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L12 +L14: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / + / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + / so its high bit is 1. This code is from NSPR. + / + / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + / mp_digit *qp, mp_digit *rp) + + / esp + 0: Caller's ebx + / esp + 4: return address + / esp + 8: Nhi argument + / esp + 12: Nlo argument + / esp + 16: divisor argument + / esp + 20: qp argument + / esp + 24: rp argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + / + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax / return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s new file mode 100644 index 0000000000..455792bbba --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_mips.s @@ -0,0 +1,472 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include + .set noreorder + .set noat + + .section .text, 1, 0x00000006, 4, 4 +.text: + .section .text + + .ent s_mpv_mul_d_add + .globl s_mpv_mul_d_add + +s_mpv_mul_d_add: + #/* c += a * b */ + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.L.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.L.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.L.3 + # a1 = a[1]; + lwu a5,4(a0) +.L.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.L.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.L.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.L.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .L.6 + addiu a3,a3,4 + # } else { +.L.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .L.6 + dsrl32 t2,t0,0 + # } + # } else { +.L.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.L.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.L.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add + + .ent s_mpv_mul_d_add_prop + .globl s_mpv_mul_d_add_prop + +s_mpv_mul_d_add_prop: + #/* c += a * b */ + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.M.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.M.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.M.3 + # a1 = a[1]; + lwu a5,4(a0) +.M.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.M.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.M.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.M.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .M.6 + addiu a3,a3,8 + # } else { +.M.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + b .M.6 + addiu a3,a3,4 + # } + # } else { +.M.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + addiu a3,a3,4 + # } +.M.6: + + # while (cy) { + beq t2,zero,.M.1 + nop +.M.7: + # mp_word w = (mp_word)*c + cy; + lwu a6,0(a3) + daddu t2,t2,a6 + # *c++ = ACCUM(w); + sw t2,0(a3) + # cy = CARRYOUT(w); + dsrl32 t2,t2,0 + bne t2,zero,.M.7 + addiu a3,a3,4 + + # } +.M.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add_prop + + .ent s_mpv_mul_d + .globl s_mpv_mul_d + +s_mpv_mul_d: + #/* c = a * b */ + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.N.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.N.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.N.3 + # a1 = a[1]; + lwu a5,4(a0) +.N.4: + # a_len -= 2; + addiu a1,a1,-2 + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # w0 = (mp_word)b * a0; + dmultu a2,a4 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.N.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.N.3: + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.N.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .N.6 + addiu a3,a3,4 + # } else { +.N.5: + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .N.6 + dsrl32 t2,t0,0 + # } + # } else { +.N.2: + mflo t0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.N.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.N.1: + jr ra + nop + #} + # + .end s_mpv_mul_d + + + .ent s_mpv_sqr_add_prop + .globl s_mpv_sqr_add_prop + #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); + # registers + # a0 *a + # a1 a_len + # a2 *sqr + # a3 digit from *a, a_i + # a4 square of digit from a + # a5,a6 next 2 digits in sqr + # a7,t0 carry +s_mpv_sqr_add_prop: + move a7,zero + move t0,zero + lwu a3,0(a0) + addiu a1,a1,-1 # --a_len + dmultu a3,a3 + beq a1,zero,.P.3 # jump if we've already done the only sqr + addiu a0,a0,4 # ++a +.P.2: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + lwu a3,0(a0) + addiu a0,a0,4 # ++a + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + dmultu a3,a3 + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + addiu a1,a1,-1 # --a_len + dsrl32 a4,a4,0 + bne a1,zero,.P.2 # loop if a_len > 0 + sw a4,-4(a2) +.P.3: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + beq t0,zero,.P.9 # jump if no carry + dsrl32 a4,a4,0 +.P.8: + sw a4,-4(a2) + /* propagate final carry */ + lwu a5,0(a2) + daddu a6,a5,t0 + sltu t0,a6,a5 + bne t0,zero,.P.8 # loop if carry persists + addiu a2,a2,4 # sqrs++ +.P.9: + jr ra + sw a4,-4(a2) + + .end s_mpv_sqr_add_prop diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c new file mode 100644 index 0000000000..1e88357af8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sparc.c @@ -0,0 +1,226 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Multiplication performance enhancements for sparc v8+vis CPUs. */ + +#include "mpi-priv.h" +#include +#include +#include + +/* In the functions below, */ +/* vector y must be 8-byte aligned, and n must be even */ +/* returns carry out of high order word of result */ +/* maximum n is 256 */ + +/* vector x += vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a); + +/* vector z = vector x + vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y, + int n, mp_digit a); + +/* v8 versions of these functions run on any Sparc v8 CPU. */ + +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } + +/* c = a * b */ +static void +v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +static void +v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +static void +v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} + +/* These functions run only on v8plus+vis or v9+vis CPUs. */ + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + s_mp_setz(c, a_len + 1); + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d(a, a_len, b, c); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d_add(a, a_len, b, c); + } +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + if (d) { + c += a_len; + do { + mp_digit sum = d + *c; + *c++ = sum; + d = sum < d; + } while (d); + } + } else { + v8_mpv_mul_d_add_prop(a, a_len, b, c); + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s new file mode 100644 index 0000000000..16a47019c3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sse2.s @@ -0,0 +1,294 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef DARWIN +#define s_mpv_mul_d _s_mpv_mul_d +#define s_mpv_mul_d_add _s_mpv_mul_d_add +#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop +#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop +#define s_mpv_div_2dx1d _s_mpv_div_2dx1d +#define TYPE_FUNCTION(x) +#else +#define TYPE_FUNCTION(x) .type x, @function +#endif + +.text + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.private_extern s_mpv_mul_d +TYPE_FUNCTION(s_mpv_mul_d) +s_mpv_mul_d: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.private_extern s_mpv_mul_d_add +TYPE_FUNCTION(s_mpv_mul_d_add) +s_mpv_mul_d_add: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd 0(%edi), %mm0 + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.private_extern s_mpv_mul_d_add_prop +TYPE_FUNCTION(s_mpv_mul_d_add_prop) +s_mpv_mul_d_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add *c++ + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_sqr_add_prop +.private_extern s_mpv_sqr_add_prop +TYPE_FUNCTION(s_mpv_sqr_add_prop) +s_mpv_sqr_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + mov 16(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm0, %mm0 # mm0 = sqr(a) + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add the low word + movd 4(%edi), %mm3 + movd %mm2, 0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3, %mm2 # add the high word + movd %mm2, 4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8, %edi + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # +.globl s_mpv_div_2dx1d +.private_extern s_mpv_div_2dx1d +TYPE_FUNCTION(s_mpv_div_2dx1d) +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp), %edx + mov 12(%esp), %eax + mov 16(%esp), %ebx + div %ebx + mov 20(%esp), %ebx + mov %eax, 0(%ebx) + mov 24(%esp), %ebx + mov %edx, 0(%ebx) + xor %eax, %eax # return zero + pop %ebx + ret + nop + +#ifndef DARWIN + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s new file mode 100644 index 0000000000..8f7e2130c3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86.s @@ -0,0 +1,541 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +.ifndef NO_PIC +.macro GET var,reg + movl \var@GOTOFF(%ebx),\reg +.endm +.macro PUT reg,var + movl \reg,\var@GOTOFF(%ebx) +.endm +.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c new file mode 100644 index 0000000000..4faeef30ca --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c @@ -0,0 +1,531 @@ +/* + * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" + +static int is_sse = -1; +extern unsigned long s_mpi_is_sse2(); + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_2 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_1: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_1 ; jmp if a_len != 0 +L_2: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_6 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_5: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_5 ; jmp if a_len != 0 +L_6: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_11 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_10: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_10 ; jmp if a_len != 0 +L_11: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_16 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_15: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd mm0, [edi] + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_15 ; jmp if a_len != 0 +L_16: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_21 ; jmp if a_len == 0 + cld + mov esi,[ebp+8] ; esi = a +L_20: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_20 ; jmp if a_len != 0 +L_21: + cmp ebx,0 ; is carry zero? + jz L_23 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_23 +L_22: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_22 +L_23: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_26 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_25: + movd mm0, [esi] ; mm0 = *a++ + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add *c++ + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_25 ; jmp if a_len != 0 +L_26: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_28 + mov eax, [edi] + add eax, ebx + stosd + jnc L_28 +L_27: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_27 +L_28: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 20: caller's esi + * ebp - 16: caller's edi + * ebp - 12: + * ebp - 8: carry + * ebp - 4: a_len local + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: pa argument + * ebp + 12: a_len argument + * ebp + 16: ps argument + * ebp + 20: + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push ebp + mov ebp,esp + sub esp,12 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; a_len + mov edi,[ebp+16] ; edi = ps + cmp ecx,0 + je L_31 ; jump if a_len == 0 + cld + mov esi,[ebp+8] ; esi = pa +L_30: + lodsd ; eax = [ds:si]; si += 4; + mul eax + + add eax,ebx ; add "carry" + adc edx,0 + mov ebx,[edi] + add eax,ebx ; add low word from result + mov ebx,[edi+4] + stosd ; [es:di] = eax; di += 4; + adc edx,ebx ; add high word from result + mov ebx,0 + mov eax,edx + adc ebx,0 + stosd ; [es:di] = eax; di += 4; + dec ecx ; --a_len + jnz L_30 ; jmp if a_len != 0 +L_31: + cmp ebx,0 ; is carry zero? + jz L_34 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_34 +L_32: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_32 +L_34: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + mov edi, [ebp+16] + cmp ecx, 0 + je L_36 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_35: + movd mm0, [esi] ; mm0 = *a + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm0 ; mm0 = sqr(a) + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add the low word + movd mm3, [edi+4] + movd [edi], mm2 ; store the 32bit result + psrlq mm2, 32 + paddq mm2, mm3 ; add the high word + movd [edi+4], mm2 ; store the 32bit result + psrlq mm2, 32 ; save the carry. + add edi, 8 + dec ecx ; --a_len + jnz L_35 ; jmp if a_len != 0 +L_36: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_38 + mov eax, [edi] + add eax, ebx + stosd + jnc L_38 +L_37: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_37 +L_38: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + * so its high bit is 1. This code is from NSPR. + * + * Dump of assembler code for function s_mpv_div_2dx1d: + * + * esp + 0: Caller's ebx + * esp + 4: return address + * esp + 8: Nhi argument + * esp + 12: Nlo argument + * esp + 16: divisor argument + * esp + 20: qp argument + * esp + 24: rp argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) mp_err + s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + __asm { + push ebx + mov edx,[esp+8] + mov eax,[esp+12] + mov ebx,[esp+16] + div ebx + mov ebx,[esp+20] + mov [ebx],eax + mov ebx,[esp+24] + mov [ebx],edx + xor eax,eax ; return zero + pop ebx + ret + nop + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s new file mode 100644 index 0000000000..b903e2564a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s @@ -0,0 +1,538 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call _s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +#.ifndef NO_PIC +#.macro GET var,reg +# movl \var@GOTOFF(%ebx),\reg +#.endm +#.macro PUT reg,var +# movl \reg,\var@GOTOFF(%ebx) +#.endm +#.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +#.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d +.type _s_mpv_mul_d,@function +_s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_x86 + jg _s_mpv_mul_d_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_sse2 +_s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add +.type _s_mpv_mul_d_add,@function +_s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_x86 + jg _s_mpv_mul_d_add_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_sse2 +_s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add_prop +.type _s_mpv_mul_d_add_prop,@function +_s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_prop_x86 + jg _s_mpv_mul_d_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_prop_sse2 +_s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl _s_mpv_sqr_add_prop +.type _s_mpv_sqr_add_prop,@function +_s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_sqr_add_prop_x86 + jg _s_mpv_sqr_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_sqr_add_prop_sse2 +_s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl _s_mpv_div_2dx1d +.type _s_mpv_div_2dx1d,@function +_s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c new file mode 100644 index 0000000000..db19cff138 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.c @@ -0,0 +1,460 @@ +/* + * mplogic.c + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mplogic.h" + +/* {{{ Lookup table for population count */ + +static unsigned char bitc[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_not(a, b) - compute b = ~a + mpl_and(a, b, c) - compute c = a & b + mpl_or(a, b, c) - compute c = a | b + mpl_xor(a, b, c) - compute c = a ^ b + */ + +/* {{{ mpl_not(a, b) */ + +mp_err +mpl_not(mp_int *a, mp_int *b) +{ + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + /* This relies on the fact that the digit type is unsigned */ + for (ix = 0; ix < USED(b); ix++) + DIGIT(b, ix) = ~DIGIT(b, ix); + + s_mp_clamp(b); + + return MP_OKAY; + +} /* end mpl_not() */ + +/* }}} */ + +/* {{{ mpl_and(a, b, c) */ + +mp_err +mpl_and(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) <= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) &= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_and() */ + +/* }}} */ + +/* {{{ mpl_or(a, b, c) */ + +mp_err +mpl_or(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) |= DIGIT(other, ix); + + return MP_OKAY; + +} /* end mpl_or() */ + +/* }}} */ + +/* {{{ mpl_xor(a, b, c) */ + +mp_err +mpl_xor(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) ^= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_xor() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_rsh(a, b, d) - b = a >> d + mpl_lsh(a, b, d) - b = a << d + */ + +/* {{{ mpl_rsh(a, b, d) */ + +mp_err +mpl_rsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + s_mp_div_2d(b, d); + + return MP_OKAY; + +} /* end mpl_rsh() */ + +/* }}} */ + +/* {{{ mpl_lsh(a, b, d) */ + +mp_err +mpl_lsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + return s_mp_mul_2d(b, d); + +} /* end mpl_lsh() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_num_set(a, num) + + Count the number of set bits in the binary representation of a. + Returns MP_OKAY and sets 'num' to be the number of such bits, if + possible. If num is NULL, the result is thrown away, but it is + not considered an error. + + mpl_num_clear() does basically the same thing for clear bits. + */ + +/* {{{ mpl_num_set(a, num) */ + +mp_err +mpl_num_set(mp_int *a, unsigned int *num) +{ + unsigned int ix, db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_set() */ + +/* }}} */ + +/* {{{ mpl_num_clear(a, num) */ + +mp_err +mpl_num_clear(mp_int *a, unsigned int *num) +{ + unsigned int ix, db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[UCHAR_MAX - reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_clear() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_parity(a) + + Determines the bitwise parity of the value given. Returns MP_EVEN + if an even number of digits are set, MP_ODD if an odd number are + set. + */ + +/* {{{ mpl_parity(a) */ + +mp_err +mpl_parity(mp_int *a) +{ + unsigned int ix; + int par = 0; + mp_digit cur; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + int shft = (sizeof(mp_digit) * CHAR_BIT) / 2; + + cur = DIGIT(a, ix); + + /* Compute parity for current digit */ + while (shft != 0) { + cur ^= (cur >> shft); + shft >>= 1; + } + cur &= 1; + + /* XOR with running parity so far */ + par ^= cur; + } + + if (par) + return MP_ODD; + else + return MP_EVEN; + +} /* end mpl_parity() */ + +/* }}} */ + +/* + mpl_set_bit + + Returns MP_OKAY or some error code. + Grows a if needed to set a bit to 1. + */ +mp_err +mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value) +{ + mp_size ix; + mp_err rv; + mp_digit mask; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + if (ix + 1 > MP_USED(a)) { + rv = s_mp_pad(a, ix + 1); + if (rv != MP_OKAY) + return rv; + } + + bitNum = bitNum % MP_DIGIT_BIT; + mask = (mp_digit)1 << bitNum; + if (value) + MP_DIGIT(a, ix) |= mask; + else + MP_DIGIT(a, ix) &= ~mask; + s_mp_clamp(a); + return MP_OKAY; +} + +/* + mpl_get_bit + + returns 0 or 1 or some (negative) error code. + */ +mp_err +mpl_get_bit(const mp_int *a, mp_size bitNum) +{ + mp_size bit, ix; + mp_err rv; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE); + + bit = bitNum % MP_DIGIT_BIT; + rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1; + return rv; +} + +/* + mpl_get_bits + - Extracts numBits bits from a, where the least significant extracted bit + is bit lsbNum. Returns a negative value if error occurs. + - Because sign bit is used to indicate error, maximum number of bits to + be returned is the lesser of (a) the number of bits in an mp_digit, or + (b) one less than the number of bits in an mp_err. + - lsbNum + numbits can be greater than the number of significant bits in + integer a, as long as bit lsbNum is in the high order digit of a. + */ +mp_err +mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits) +{ + mp_size rshift = (lsbNum % MP_DIGIT_BIT); + mp_size lsWndx = (lsbNum / MP_DIGIT_BIT); + mp_digit *digit = MP_DIGITS(a) + lsWndx; + mp_digit mask = ((1 << numBits) - 1); + + ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG); + ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE); + + if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) || + (lsWndx + 1 >= MP_USED(a))) { + mask &= (digit[0] >> rshift); + } else { + mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift))); + } + return (mp_err)mask; +} + +#define LZCNTLOOP(i) \ + do { \ + x = d >> (i); \ + mask = (0 - x); \ + mask = (0 - (mask >> (MP_DIGIT_BIT - 1))); \ + bits += (i)&mask; \ + d ^= (x ^ d) & mask; \ + } while (0) + +/* + mpl_significant_bits + returns number of significant bits in abs(a). + In other words: floor(lg(abs(a))) + 1. + returns 1 if value is zero. + */ +mp_size +mpl_significant_bits(const mp_int *a) +{ + /* + start bits at 1. + lg(0) = 0 => bits = 1 by function semantics. + below does a binary search for the _position_ of the top bit set, + which is floor(lg(abs(a))) for a != 0. + */ + mp_size bits = 1; + int ix; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = MP_USED(a); ix > 0;) { + mp_digit d, x, mask; + if ((d = MP_DIGIT(a, --ix)) == 0) + continue; +#if !defined(MP_USE_UINT_DIGIT) + LZCNTLOOP(32); +#endif + LZCNTLOOP(16); + LZCNTLOOP(8); + LZCNTLOOP(4); + LZCNTLOOP(2); + LZCNTLOOP(1); + break; + } + bits += ix * MP_DIGIT_BIT; + return bits; +} + +#undef LZCNTLOOP + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h new file mode 100644 index 0000000000..71b7551392 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.h @@ -0,0 +1,55 @@ +/* + * mplogic.h + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPLOGIC_ +#define _H_MPLOGIC_ + +#include "mpi.h" +SEC_BEGIN_PROTOS + +/* + The logical operations treat an mp_int as if it were a bit vector, + without regard to its sign (an mp_int is represented in a signed + magnitude format). Values are treated as if they had an infinite + string of zeros left of the most-significant bit. + */ + +/* Parity results */ + +#define MP_EVEN MP_YES +#define MP_ODD MP_NO + +/* Bitwise functions */ + +mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */ +mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */ +mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */ +mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */ + +/* Shift functions */ + +mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */ +mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */ + +/* Bit count and parity */ + +mp_err mpl_num_set(mp_int *a, unsigned int *num); /* count set bits */ +mp_err mpl_num_clear(mp_int *a, unsigned int *num); /* count clear bits */ +mp_err mpl_parity(mp_int *a); /* determine parity */ + +/* Get & Set the value of a bit */ + +mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value); +mp_err mpl_get_bit(const mp_int *a, mp_size bitNum); +mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits); +mp_size mpl_significant_bits(const mp_int *a); + +SEC_END_PROTOS + +#endif /* end _H_MPLOGIC_ */ diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c new file mode 100644 index 0000000000..63842c6314 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpmontg.c @@ -0,0 +1,1160 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file implements moduluar exponentiation using Montgomery's + * method for modular reduction. This file implements the method + * described as "Improvement 2" in the paper "A Cryptogrpahic Library for + * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr. + * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90" + * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244, + * published by Springer Verlag. + */ + +#define MP_USING_CACHE_SAFE_MOD_EXP 1 +#include +#include "mpi-priv.h" +#include "mplogic.h" +#include "mpprime.h" +#ifdef MP_USING_MONT_MULF +#include "montmulf.h" +#endif +#include /* ptrdiff_t */ +#include + +#define STATIC + +#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */ + +/*! computes T = REDC(T), 2^b == R + \param T < RN +*/ +mp_err +s_mp_redc(mp_int *T, mp_mont_modulus *mmm) +{ + mp_err res; + mp_size i; + + i = (MP_USED(&mmm->N) << 1) + 1; + MP_CHECKOK(s_mp_pad(T, i)); + for (i = 0; i < MP_USED(&mmm->N); ++i) { + mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; + /* T += N * m_i * (MP_RADIX ** i); */ + s_mp_mul_d_add_offset(&mmm->N, m_i, T, i); + } + s_mp_clamp(T); + + /* T /= R */ + s_mp_rshd(T, MP_USED(&mmm->N)); + + if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { + /* T = T - N */ + MP_CHECKOK(s_mp_sub(T, &mmm->N)); +#ifdef DEBUG + if ((res = mp_cmp(T, &mmm->N)) >= 0) { + res = MP_UNDEF; + goto CLEANUP; + } +#endif + } + res = MP_OKAY; +CLEANUP: + return res; +} + +#if !defined(MP_MONT_USE_MP_MUL) + +/*! c <- REDC( a * b ) mod N + \param a < N i.e. "reduced" + \param b < N i.e. "reduced" + \param mmm modulus N and n0' of N +*/ +mp_err +s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm) +{ + mp_digit *pb; + mp_digit m_i; + mp_err res; + mp_size ib; /* "index b": index of current digit of B */ + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + ib = (MP_USED(&mmm->N) << 1) + 1; + if ((res = s_mp_pad(c, ib)) != MP_OKAY) + goto CLEANUP; + + useda = MP_USED(a); + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); + s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); + m_i = MP_DIGIT(c, 0) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); + + /* Outer loop: Digits of b */ + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + if (usedb < MP_USED(&mmm->N)) { + for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + } + s_mp_clamp(c); + s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ + if (s_mp_cmp(c, &mmm->N) >= 0) { + MP_CHECKOK(s_mp_sub(c, &mmm->N)); + } + res = MP_OKAY; + +CLEANUP: + return res; +} +#endif + +mp_err +mp_to_mont(const mp_int *x, const mp_int *N, mp_int *xMont) +{ + mp_err res; + + /* xMont = x * R mod N where N is modulus */ + if (x != xMont) { + MP_CHECKOK(mp_copy(x, xMont)); + } + MP_CHECKOK(s_mp_lshd(xMont, MP_USED(N))); /* xMont = x << b */ + MP_CHECKOK(mp_div(xMont, N, 0, xMont)); /* mod N */ +CLEANUP: + return res; +} + +mp_digit +mp_calculate_mont_n0i(const mp_int *N) +{ + return 0 - s_mp_invmod_radix(MP_DIGIT(N, 0)); +} + +#ifdef MP_USING_MONT_MULF + +/* the floating point multiply is already cache safe, + * don't turn on cache safe unless we specifically + * force it */ +#ifndef MP_FORCE_CACHE_SAFE +#undef MP_USING_CACHE_SAFE_MOD_EXP +#endif + +unsigned int mp_using_mont_mulf = 1; + +/* computes montgomery square of the integer in mResult */ +#define SQR \ + conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, d16Tmp, \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* computes montgomery product of x and the integer in mResult */ +#define MUL(x) \ + conv_i32_to_d32(dm1, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, oddPowers[x], \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* Do modular exponentiation using floating point multiply code. */ +mp_err +mp_exptmod_f(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_digit *mResult; + double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp; + double dn0; + mp_size i; + mp_err res; + int expOff; + int dSize = 0, oddPowSize, dTmpSize; + mp_int accum1; + double *oddPowers[MAX_ODD_INTS]; + + /* function for computing n0prime only works if n0 is odd */ + + MP_DIGITS(&accum1) = 0; + + for (i = 0; i < MAX_ODD_INTS; ++i) + oddPowers[i] = 0; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + + mp_set(&accum1, 1); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); + MP_CHECKOK(s_mp_pad(&accum1, nLen)); + + oddPowSize = 2 * nLen + 1; + dTmpSize = 2 * oddPowSize; + dSize = sizeof(double) * (nLen * 4 + 1 + + ((odd_ints + 1) * oddPowSize) + dTmpSize); + dBuf = malloc(dSize); + if (!dBuf) { + res = MP_MEM; + goto CLEANUP; + } + dm1 = dBuf; /* array of d32 */ + dn = dBuf + nLen; /* array of d32 */ + dSqr = dn + nLen; /* array of d32 */ + d16Tmp = dSqr + nLen; /* array of d16 */ + dTmp = d16Tmp + oddPowSize; + + for (i = 0; i < odd_ints; ++i) { + oddPowers[i] = dTmp; + dTmp += oddPowSize; + } + mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */ + + /* Make dn and dn0 */ + conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen); + dn0 = (double)(mmm->n0prime & 0xffff); + + /* Make dSqr */ + conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen); + mont_mulf_noconv(mResult, dm1, oddPowers[0], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d32(dSqr, mResult, nLen); + + for (i = 1; i < odd_ints; ++i) { + mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d16(oddPowers[i], mResult, nLen); + } + + s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */ + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR; + } else if (smallExp & 1) { + SQR; + MUL(0); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 0x20) { + SQR; + MUL(smallExp / 64); + SQR; + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else { + abort(); + } + } + + s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */ + + res = s_mp_redc(&accum1, mmm); + mp_exch(&accum1, result); + +CLEANUP: + mp_clear(&accum1); + if (dBuf) { + if (dSize) + memset(dBuf, 0, dSize); + free(dBuf); + } + + return res; +} +#undef SQR +#undef MUL +#endif + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL(x, a, b) \ + MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm)) +#endif + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_err res; + int expOff; + mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS]; + + /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */ + /* oddPowers[i] = base ** (2*i + 1); */ + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&power2) = 0; + for (i = 0; i < MAX_ODD_INTS; ++i) { + MP_DIGITS(oddPowers + i) = 0; + } + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase)); + + MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2)); + MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */ + MP_CHECKOK(s_mp_redc(&power2, mmm)); + + for (i = 1; i < odd_ints; ++i) { + MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2)); + MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i)); + MP_CHECKOK(s_mp_redc(oddPowers + i, mmm)); + } + + /* set accumulator to montgomery residue of 1 */ + mp_set(&accum1, 1); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); + pa1 = &accum1; + pa2 = &accum2; + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL(0, pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 2, pa2, pa1); + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 4, pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 8, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 16, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + MUL(smallExp / 32, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 32, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x20) { + SQR(pa1, pa2); + MUL(smallExp / 64, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else { + abort(); + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&power2); + for (i = 0; i < odd_ints; ++i) { + mp_clear(oddPowers + i); + } + return res; +} +#undef SQR +#undef MUL + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +unsigned int mp_using_cache_safe_exp = 1; +#endif + +mp_err +mp_set_safe_modexp(int value) +{ +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + mp_using_cache_safe_exp = value; + return MP_OKAY; +#else + if (value == 0) { + return MP_OKAY; + } + return MP_BADARG; +#endif +} + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +#define WEAVE_WORD_SIZE 4 + +/* + * mpi_to_weave takes an array of bignums, a matrix in which each bignum + * occupies all the columns of a row, and transposes it into a matrix in + * which each bignum occupies a column of every row. The first row of the + * input matrix becomes the first column of the output matrix. The n'th + * row of input becomes the n'th column of output. The input data is said + * to be "interleaved" or "woven" into the output matrix. + * + * The array of bignums is left in this woven form. Each time a single + * bignum value is needed, it is recreated by fetching the n'th column, + * forming a single row which is the new bignum. + * + * The purpose of this interleaving is make it impossible to determine which + * of the bignums is being used in any one operation by examining the pattern + * of cache misses. + * + * The weaving function does not transpose the entire input matrix in one call. + * It transposes 4 rows of mp_ints into their respective columns of output. + * + * This implementation treats each mp_int bignum as an array of mp_digits, + * It stores those bytes as a column of mp_digits in the output matrix. It + * doesn't care if the machine uses big-endian or little-endian byte ordering + * within mp_digits. + * + * "bignums" is an array of mp_ints. + * It points to four rows, four mp_ints, a subset of a larger array of mp_ints. + * + * "weaved" is the weaved output matrix. + * The first byte of bignums[0] is stored in weaved[0]. + * + * "nBignums" is the total number of bignums in the array of which "bignums" + * is a part. + * + * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. + * mp_ints that use less than nDigits digits are logically padded with zeros + * while being stored in the weaved array. + */ +mp_err +mpi_to_weave(const mp_int *bignums, + mp_digit *weaved, + mp_size nDigits, /* in each mp_int of input */ + mp_size nBignums) /* in the entire source array */ +{ + mp_size i; + mp_digit *endDest = weaved + (nDigits * nBignums); + + for (i = 0; i < WEAVE_WORD_SIZE; i++) { + mp_size used = MP_USED(&bignums[i]); + mp_digit *pSrc = MP_DIGITS(&bignums[i]); + mp_digit *endSrc = pSrc + used; + mp_digit *pDest = weaved + i; + + ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG); + ARGCHK(used <= nDigits, MP_BADARG); + + for (; pSrc < endSrc; pSrc++) { + *pDest = *pSrc; + pDest += nBignums; + } + while (pDest < endDest) { + *pDest = 0; + pDest += nBignums; + } + } + + return MP_OKAY; +} + +/* + * These functions return 0xffffffff if the output is true, and 0 otherwise. + */ +#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1))) +#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1)) +#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b)) + +/* Reverse the operation above for one mp_int. + * Reconstruct one mp_int from its column in the weaved array. + * Every read accesses every element of the weaved array, in order to + * avoid timing attacks based on patterns of memory accesses. + */ +mp_err +weave_to_mpi(mp_int *a, /* out, result */ + const mp_digit *weaved, /* in, byte matrix */ + mp_size index, /* which column to read */ + mp_size nDigits, /* number of mp_digits in each bignum */ + mp_size nBignums) /* width of the matrix */ +{ + /* these are indices, but need to be the same size as mp_digit + * because of the CONST_TIME operations */ + mp_digit i, j; + mp_digit d; + mp_digit *pDest = MP_DIGITS(a); + + MP_SIGN(a) = MP_ZPOS; + MP_USED(a) = nDigits; + + assert(weaved != NULL); + + /* Fetch the proper column in constant time, indexing over the whole array */ + for (i = 0; i < nDigits; ++i) { + d = 0; + for (j = 0; j < nBignums; ++j) { + d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); + } + pDest[i] = d; + } + + s_mp_clamp(a); + return MP_OKAY; +} + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(mp_mul(a, x, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm)) +#endif + +#define MUL(x, a, b) \ + MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \ + MUL_NOWEAVE(&tmp, a, b) + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp +#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y))) + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_safe_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size num_powers) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_size first_window; + mp_err res; + int expOff; + mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; + mp_int tmp; + mp_digit *powersArray = NULL; + mp_digit *powers = NULL; + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&accum[0]) = 0; + MP_DIGITS(&accum[1]) = 0; + MP_DIGITS(&accum[2]) = 0; + MP_DIGITS(&accum[3]) = 0; + MP_DIGITS(&tmp) = 0; + + /* grab the first window value. This allows us to preload accumulator1 + * and save a conversion, some squares and a multiple*/ + MP_CHECKOK(mpl_get_bits(exponent, + bits_in_exponent - window_bits, window_bits)); + first_window = (mp_size)res; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + /* build the first WEAVE_WORD powers inline */ + /* if WEAVE_WORD_SIZE is not 4, this code will have to change */ + if (num_powers > 2) { + MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2)); + mp_set(&accum[0], 1); + MP_CHECKOK(mp_to_mont(&accum[0], &(mmm->N), &accum[0])); + MP_CHECKOK(mp_copy(montBase, &accum[1])); + SQR(montBase, &accum[2]); + MUL_NOWEAVE(montBase, &accum[2], &accum[3]); + powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1)); + if (!powersArray) { + res = MP_MEM; + goto CLEANUP; + } + /* powers[i] = base ** (i); */ + powers = (mp_digit *)MP_ALIGN(powersArray, num_powers); + MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers)); + if (first_window < 4) { + MP_CHECKOK(mp_copy(&accum[first_window], &accum1)); + first_window = num_powers; + } + } else { + if (first_window == 0) { + mp_set(&accum1, 1); + MP_CHECKOK(mp_to_mont(&accum1, &(mmm->N), &accum1)); + } else { + /* assert first_window == 1? */ + MP_CHECKOK(mp_copy(montBase, &accum1)); + } + } + + /* + * calculate all the powers in the powers array. + * this adds 2**(k-1)-2 square operations over just calculating the + * odd powers where k is the window size in the two other mp_modexpt + * implementations in this file. We will get some of that + * back by not needing the first 'k' squares and one multiply for the + * first window. + * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if + * num_powers > 2, in which case powers will have been allocated. + */ + for (i = WEAVE_WORD_SIZE; i < num_powers; i++) { + int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */ + if (i & 1) { + MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]); + /* we've filled the array do our 'per array' processing */ + if (acc_index == (WEAVE_WORD_SIZE - 1)) { + MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1), + nLen, num_powers)); + + if (first_window <= i) { + MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)], + &accum1)); + first_window = num_powers; + } + } + } else { + /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source + * and target are the same so we need to copy.. After that, the + * value is overwritten, so we need to fetch it from the stored + * weave array */ + if (i > 2 * WEAVE_WORD_SIZE) { + MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers)); + SQR(&accum2, &accum[acc_index]); + } else { + int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1); + if (half_power_index == acc_index) { + /* copy is cheaper than weave_to_mpi */ + MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); + SQR(&accum2, &accum[acc_index]); + } else { + SQR(&accum[half_power_index], &accum[acc_index]); + } + } + } + } +/* if the accum1 isn't set, Then there is something wrong with our logic + * above and is an internal programming error. + */ +#if MP_ARGCHK == 2 + assert(MP_USED(&accum1) != 0); +#endif + + /* set accumulator to montgomery residue of 1 */ + pa1 = &accum1; + pa2 = &accum2; + + /* tmp is not used if window_bits == 1. */ + if (window_bits != 1) { + MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2)); + } + + for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + /* handle unroll the loops */ + switch (window_bits) { + case 1: + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL_NOWEAVE(montBase, pa2, pa1); + } else { + abort(); + } + break; + case 6: + SQR(pa1, pa2); + SQR(pa2, pa1); + /* fall through */ + case 4: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp, pa1, pa2); + SWAPPA; + break; + case 5: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp, pa2, pa1); + break; + default: + abort(); /* could do a loop? */ + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&accum[0]); + mp_clear(&accum[1]); + mp_clear(&accum[2]); + mp_clear(&accum[3]); + mp_clear(&tmp); + /* zero required by FIPS here, can't use PORT_ZFree + * because mpi doesn't link with util */ + if (powers) { + PORT_Memset(powers, 0, num_powers * sizeof(mp_digit)); + } + free(powersArray); + return res; +} +#undef SQR +#undef MUL +#endif + +mp_err +mp_exptmod(const mp_int *inBase, const mp_int *exponent, + const mp_int *modulus, mp_int *result) +{ + const mp_int *base; + mp_size bits_in_exponent, i, window_bits, odd_ints; + mp_err res; + int nLen; + mp_int montBase, goodBase; + mp_mont_modulus mmm; +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + static unsigned int max_window_bits; +#endif + + /* function for computing n0prime only works if n0 is odd */ + if (!mp_isodd(modulus)) + return s_mp_exptmod(inBase, exponent, modulus, result); + + if (mp_cmp_z(inBase) == MP_LT) + return MP_RANGE; + MP_DIGITS(&montBase) = 0; + MP_DIGITS(&goodBase) = 0; + + if (mp_cmp(inBase, modulus) < 0) { + base = inBase; + } else { + MP_CHECKOK(mp_init(&goodBase)); + base = &goodBase; + MP_CHECKOK(mp_mod(inBase, modulus, &goodBase)); + } + + nLen = MP_USED(modulus); + MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2)); + + mmm.N = *modulus; /* a copy of the mp_int struct */ + + /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX + ** where n0 = least significant mp_digit of N, the modulus. + */ + mmm.n0prime = mp_calculate_mont_n0i(modulus); + + MP_CHECKOK(mp_to_mont(base, modulus, &montBase)); + + bits_in_exponent = mpl_significant_bits(exponent); +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + if (bits_in_exponent > 780) + window_bits = 6; + else if (bits_in_exponent > 256) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + } else +#endif + if (bits_in_exponent > 480) + window_bits = 6; + else if (bits_in_exponent > 160) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + /* + * clamp the window size based on + * the cache line size. + */ + if (!max_window_bits) { + unsigned long cache_size = s_mpi_getProcessorLineSize(); + /* processor has no cache, use 'fast' code always */ + if (cache_size == 0) { + mp_using_cache_safe_exp = 0; + } + if ((cache_size == 0) || (cache_size >= 64)) { + max_window_bits = 6; + } else if (cache_size >= 32) { + max_window_bits = 5; + } else if (cache_size >= 16) { + max_window_bits = 4; + } else + max_window_bits = 1; /* should this be an assert? */ + } + + /* clamp the window size down before we caclulate bits_in_exponent */ + if (mp_using_cache_safe_exp) { + if (window_bits > max_window_bits) { + window_bits = max_window_bits; + } + } +#endif + + odd_ints = 1 << (window_bits - 1); + i = bits_in_exponent % window_bits; + if (i != 0) { + bits_in_exponent += window_bits - i; + } + +#ifdef MP_USING_MONT_MULF + if (mp_using_mont_mulf) { + MP_CHECKOK(s_mp_pad(&montBase, nLen)); + res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + } else +#endif +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, 1 << window_bits); + } else +#endif + res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + +CLEANUP: + mp_clear(&montBase); + mp_clear(&goodBase); + /* Don't mp_clear mmm.N because it is merely a copy of modulus. + ** Just zap it. + */ + memset(&mmm, 0, sizeof mmm); + return res; +} diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c new file mode 100644 index 0000000000..b757150e79 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.c @@ -0,0 +1,610 @@ +/* + * mpprime.c + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mpprime.h" +#include "mplogic.h" +#include +#include + +#define SMALL_TABLE 0 /* determines size of hard-wired prime table */ + +#define RANDOM() rand() + +#include "primes.c" /* pull in the prime digit table */ + +/* + Test if any of a given vector of digits divides a. If not, MP_NO + is returned; otherwise, MP_YES is returned and 'which' is set to + the index of the integer in the vector which divided a. + */ +mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which); + +/* {{{ mpp_divis(a, b) */ + +/* + mpp_divis(a, b) + + Returns MP_YES if a is divisible by b, or MP_NO if it is not. + */ + +mp_err +mpp_divis(mp_int *a, mp_int *b) +{ + mp_err res; + mp_int rem; + + if ((res = mp_init(&rem)) != MP_OKAY) + return res; + + if ((res = mp_mod(a, b, &rem)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp_z(&rem) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&rem); + return res; + +} /* end mpp_divis() */ + +/* }}} */ + +/* {{{ mpp_divis_d(a, d) */ + +/* + mpp_divis_d(a, d) + + Return MP_YES if a is divisible by d, or MP_NO if it is not. + */ + +mp_err +mpp_divis_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_NO; + + if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY) + return res; + + if (rem == 0) + return MP_YES; + else + return MP_NO; + +} /* end mpp_divis_d() */ + +/* }}} */ + +/* {{{ mpp_random(a) */ + +/* + mpp_random(a) + + Assigns a random value to a. This value is generated using the + standard C library's rand() function, so it should not be used for + cryptographic purposes, but it should be fine for primality testing, + since all we really care about there is good statistical properties. + + As many digits as a currently has are filled with random digits. + */ + +mp_err +mpp_random(mp_int *a) + +{ + mp_digit next = 0; + unsigned int ix, jx; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + for (jx = 0; jx < sizeof(mp_digit); jx++) { + next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX); + } + DIGIT(a, ix) = next; + } + + return MP_OKAY; + +} /* end mpp_random() */ + +/* }}} */ + +static mpp_random_fn mpp_random_insecure = &mpp_random; + +/* {{{ mpp_random_size(a, prec) */ + +mp_err +mpp_random_size(mp_int *a, mp_size prec) +{ + mp_err res; + + ARGCHK(a != NULL && prec > 0, MP_BADARG); + + if ((res = s_mp_pad(a, prec)) != MP_OKAY) + return res; + + return (*mpp_random_insecure)(a); + +} /* end mpp_random_size() */ + +/* }}} */ + +/* {{{ mpp_divis_vector(a, vec, size, which) */ + +/* + mpp_divis_vector(a, vec, size, which) + + Determines if a is divisible by any of the 'size' digits in vec. + Returns MP_YES and sets 'which' to the index of the offending digit, + if it is; returns MP_NO if it is not. + */ + +mp_err +mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which) +{ + ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG); + + return s_mpp_divp(a, vec, size, which); + +} /* end mpp_divis_vector() */ + +/* }}} */ + +/* {{{ mpp_divis_primes(a, np) */ + +/* + mpp_divis_primes(a, np) + + Test whether a is divisible by any of the first 'np' primes. If it + is, returns MP_YES and sets *np to the value of the digit that did + it. If not, returns MP_NO. + */ +mp_err +mpp_divis_primes(mp_int *a, mp_digit *np) +{ + int size, which; + mp_err res; + + ARGCHK(a != NULL && np != NULL, MP_BADARG); + + size = (int)*np; + if (size > prime_tab_size) + size = prime_tab_size; + + res = mpp_divis_vector(a, prime_tab, size, &which); + if (res == MP_YES) + *np = prime_tab[which]; + + return res; + +} /* end mpp_divis_primes() */ + +/* }}} */ + +/* {{{ mpp_fermat(a, w) */ + +/* + Using w as a witness, try pseudo-primality testing based on Fermat's + little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod + a). So, we compute z = w^a (mod a) and compare z to w; if they are + equal, the test passes and we return MP_YES. Otherwise, we return + MP_NO. + */ +mp_err +mpp_fermat(mp_int *a, mp_digit w) +{ + mp_int base, test; + mp_err res; + + if ((res = mp_init(&base)) != MP_OKAY) + return res; + + mp_set(&base, w); + + if ((res = mp_init(&test)) != MP_OKAY) + goto TEST; + + /* Compute test = base^a (mod a) */ + if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp(&base, &test) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&test); +TEST: + mp_clear(&base); + + return res; + +} /* end mpp_fermat() */ + +/* }}} */ + +/* + Perform the fermat test on each of the primes in a list until + a) one of them shows a is not prime, or + b) the list is exhausted. + Returns: MP_YES if it passes tests. + MP_NO if fermat test reveals it is composite + Some MP error code if some other error occurs. + */ +mp_err +mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes) +{ + mp_err rv = MP_YES; + + while (nPrimes-- > 0 && rv == MP_YES) { + rv = mpp_fermat(a, *primes++); + } + return rv; +} + +/* {{{ mpp_pprime(a, nt) */ + +/* + mpp_pprime(a, nt) + + Performs nt iteration of the Miller-Rabin probabilistic primality + test on a. Returns MP_YES if the tests pass, MP_NO if one fails. + If MP_NO is returned, the number is definitely composite. If MP_YES + is returned, it is probably prime (but that is not guaranteed). + */ + +mp_err +mpp_pprime(mp_int *a, int nt) +{ + return mpp_pprime_ext_random(a, nt, mpp_random_insecure); +} + +mp_err +mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random) +{ + mp_err res; + mp_int x, amo, m, z; /* "amo" = "a minus one" */ + int iter; + unsigned int jx; + mp_size b; + + ARGCHK(a != NULL, MP_BADARG); + + MP_DIGITS(&x) = 0; + MP_DIGITS(&amo) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&z) = 0; + + /* Initialize temporaries... */ + MP_CHECKOK(mp_init(&amo)); + /* Compute amo = a - 1 for what follows... */ + MP_CHECKOK(mp_sub_d(a, 1, &amo)); + + b = mp_trailing_zeros(&amo); + if (!b) { /* a was even ? */ + res = MP_NO; + goto CLEANUP; + } + + MP_CHECKOK(mp_init_size(&x, MP_USED(a))); + MP_CHECKOK(mp_init(&z)); + MP_CHECKOK(mp_init(&m)); + MP_CHECKOK(mp_div_2d(&amo, b, &m, 0)); + + /* Do the test nt times... */ + for (iter = 0; iter < nt; iter++) { + + /* Choose a random value for 1 < x < a */ + MP_CHECKOK(s_mp_pad(&x, USED(a))); + MP_CHECKOK((*random)(&x)); + MP_CHECKOK(mp_mod(&x, a, &x)); + if (mp_cmp_d(&x, 1) <= 0) { + iter--; /* don't count this iteration */ + continue; /* choose a new x */ + } + + /* Compute z = (x ** m) mod a */ + MP_CHECKOK(mp_exptmod(&x, &m, a, &z)); + + if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) { + res = MP_YES; + continue; + } + + res = MP_NO; /* just in case the following for loop never executes. */ + for (jx = 1; jx < b; jx++) { + /* z = z^2 (mod a) */ + MP_CHECKOK(mp_sqrmod(&z, a, &z)); + res = MP_NO; /* previous line set res to MP_YES */ + + if (mp_cmp_d(&z, 1) == 0) { + break; + } + if (mp_cmp(&z, &amo) == 0) { + res = MP_YES; + break; + } + } /* end testing loop */ + + /* If the test passes, we will continue iterating, but a failed + test means the candidate is definitely NOT prime, so we will + immediately break out of this loop + */ + if (res == MP_NO) + break; + + } /* end iterations loop */ + +CLEANUP: + mp_clear(&m); + mp_clear(&z); + mp_clear(&x); + mp_clear(&amo); + return res; + +} /* end mpp_pprime() */ + +/* }}} */ + +/* Produce table of composites from list of primes and trial value. +** trial must be odd. List of primes must not include 2. +** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest +** prime in list of primes. After this function is finished, +** if sieve[i] is non-zero, then (trial + 2*i) is composite. +** Each prime used in the sieve costs one division of trial, and eliminates +** one or more values from the search space. (3 eliminates 1/3 of the values +** alone!) Each value left in the search space costs 1 or more modular +** exponentations. So, these divisions are a bargain! +*/ +mp_err +mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve) +{ + mp_err res; + mp_digit rem; + mp_size ix; + unsigned long offset; + + memset(sieve, 0, nSieve); + + for (ix = 0; ix < nPrimes; ix++) { + mp_digit prime = primes[ix]; + mp_size i; + if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + offset = 0; + } else { + offset = prime - rem; + } + + for (i = offset; i < nSieve * 2; i += prime) { + if (i % 2 == 0) { + sieve[i / 2] = 1; + } + } + } + + return MP_OKAY; +} + +#define SIEVE_SIZE 32 * 1024 + +mp_err +mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong) +{ + return mpp_make_prime_ext_random(start, nBits, strong, mpp_random_insecure); +} + +mp_err +mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random) +{ + mp_digit np; + mp_err res; + unsigned int i = 0; + mp_int trial; + mp_int q; + mp_size num_tests; + unsigned char *sieve; + + ARGCHK(start != 0, MP_BADARG); + ARGCHK(nBits > 16, MP_RANGE); + + sieve = malloc(SIEVE_SIZE); + ARGCHK(sieve != NULL, MP_MEM); + + MP_DIGITS(&trial) = 0; + MP_DIGITS(&q) = 0; + MP_CHECKOK(mp_init(&trial)); + MP_CHECKOK(mp_init(&q)); + /* values originally taken from table 4.4, + * HandBook of Applied Cryptography, augmented by FIPS-186 + * requirements, Table C.2 and C.3 */ + if (nBits >= 2000) { + num_tests = 3; + } else if (nBits >= 1536) { + num_tests = 4; + } else if (nBits >= 1024) { + num_tests = 5; + } else if (nBits >= 550) { + num_tests = 6; + } else if (nBits >= 450) { + num_tests = 7; + } else if (nBits >= 400) { + num_tests = 8; + } else if (nBits >= 350) { + num_tests = 9; + } else if (nBits >= 300) { + num_tests = 10; + } else if (nBits >= 250) { + num_tests = 20; + } else if (nBits >= 200) { + num_tests = 41; + } else if (nBits >= 100) { + num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the + * required more iterations for larger aux primes */ + } else + num_tests = 50; + + if (strong) + --nBits; + MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1)); + MP_CHECKOK(mpl_set_bit(start, 0, 1)); + for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) { + MP_CHECKOK(mpl_set_bit(start, i, 0)); + } + /* start sieveing with prime value of 3. */ + MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1, + sieve, SIEVE_SIZE)); + +#ifdef DEBUG_SIEVE + res = 0; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (!sieve[i]) + ++res; + } + fprintf(stderr, "sieve found %d potential primes.\n", res); +#define FPUTC(x, y) fputc(x, y) +#else +#define FPUTC(x, y) +#endif + + res = MP_NO; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (sieve[i]) /* this number is composite */ + continue; + MP_CHECKOK(mp_add_d(start, 2 * i, &trial)); + FPUTC('.', stderr); + /* run a Fermat test */ + res = mpp_fermat(&trial, 2); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + FPUTC('+', stderr); + /* If that passed, run some Miller-Rabin tests */ + res = mpp_pprime_ext_random(&trial, num_tests, random); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + FPUTC('!', stderr); + + if (!strong) + break; /* success !! */ + + /* At this point, we have strong evidence that our candidate + is itself prime. If we want a strong prime, we need now + to test q = 2p + 1 for primality... + */ + MP_CHECKOK(mp_mul_2(&trial, &q)); + MP_CHECKOK(mp_add_d(&q, 1, &q)); + + /* Test q for small prime divisors ... */ + np = prime_tab_size; + res = mpp_divis_primes(&q, &np); + if (res == MP_YES) { /* is composite */ + mp_clear(&q); + continue; + } + if (res != MP_NO) + goto CLEANUP; + + /* And test with Fermat, as with its parent ... */ + res = mpp_fermat(&q, 2); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* And test with Miller-Rabin, as with its parent ... */ + res = mpp_pprime_ext_random(&q, num_tests, random); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* If it passed, we've got a winner */ + mp_exch(&q, &trial); + mp_clear(&q); + break; + + } /* end of loop through sieved values */ + if (res == MP_YES) + mp_exch(&trial, start); +CLEANUP: + mp_clear(&trial); + mp_clear(&q); + if (sieve != NULL) { + memset(sieve, 0, SIEVE_SIZE); + free(sieve); + } + return res; +} + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static functions visible only to the library internally */ + +/* {{{ s_mpp_divp(a, vec, size, which) */ + +/* + Test for divisibility by members of a vector of digits. Returns + MP_NO if a is not divisible by any of them; returns MP_YES and sets + 'which' to the index of the offender, if it is. Will stop on the + first digit against which a is divisible. + */ + +mp_err +s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which) +{ + mp_err res; + mp_digit rem; + + int ix; + + for (ix = 0; ix < size; ix++) { + if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + if (which) + *which = ix; + return MP_YES; + } + } + + return MP_NO; + +} /* end s_mpp_divp() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h new file mode 100644 index 0000000000..0bdc6598ce --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.h @@ -0,0 +1,48 @@ +/* + * mpprime.h + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MP_PRIME_ +#define _H_MP_PRIME_ + +#include "mpi.h" + +SEC_BEGIN_PROTOS + +extern const int prime_tab_size; /* number of primes available */ +extern const mp_digit prime_tab[]; + +/* Tests for divisibility */ +mp_err mpp_divis(mp_int *a, mp_int *b); +mp_err mpp_divis_d(mp_int *a, mp_digit d); + +/* Random selection */ +mp_err mpp_random(mp_int *a); +mp_err mpp_random_size(mp_int *a, mp_size prec); + +/* Type for a pointer to a user-provided mpp_random implementation */ +typedef mp_err (*mpp_random_fn)(mp_int *); + +/* Pseudo-primality testing */ +mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which); +mp_err mpp_divis_primes(mp_int *a, mp_digit *np); +mp_err mpp_fermat(mp_int *a, mp_digit w); +mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes); +mp_err mpp_pprime(mp_int *a, int nt); +mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve); +mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong); + +/* Pseudo-primality tests using a user-provided mpp_random implementation */ +mp_err mpp_pprime_ext_random(mp_int *a, int nt, mpp_random_fn random); +mp_err mpp_make_prime_ext_random(mp_int *start, mp_size nBits, mp_size strong, mpp_random_fn random); + +SEC_END_PROTOS + +#endif /* end _H_MP_PRIME_ */ diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c new file mode 100644 index 0000000000..423311b65b --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "vis_proto.h" + +/***************************************************************/ + +typedef int t_s32; +typedef unsigned int t_u32; +#if defined(__sparcv9) +typedef long t_s64; +typedef unsigned long t_u64; +#else +typedef long long t_s64; +typedef unsigned long long t_u64; +#endif +typedef double t_d64; + +/***************************************************************/ + +typedef union { + t_d64 d64; + struct { + t_s32 i0; + t_s32 i1; + } i32s; +} d64_2_i32; + +/***************************************************************/ + +#define BUFF_SIZE 256 + +#define A_BITS 19 +#define A_MASK ((1 << A_BITS) - 1) + +/***************************************************************/ + +static t_u64 mask_cnst[] = { + 0x8000000080000000ull +}; + +/***************************************************************/ + +#define DEF_VARS(N) \ + t_d64 *py = (t_d64 *)y; \ + t_d64 mask = *((t_d64 *)mask_cnst); \ + t_d64 ca = (1u << 31) - 1; \ + t_d64 da = (t_d64)a; \ + t_s64 buff[N], s; \ + d64_2_i32 dy + +/***************************************************************/ + +#define MUL_U32_S64_2(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \ + buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da + +#define MUL_U32_S64_2_D(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + d0 = ca - (t_d64)dy.i32s.i0; \ + d1 = ca - (t_d64)dy.i32s.i1; \ + buff[4 * (i)] = (t_s64)(d0 * da); \ + buff[4 * (i) + 1] = (t_s64)(d0 * db); \ + buff[4 * (i) + 2] = (t_s64)(d1 * da); \ + buff[4 * (i) + 3] = (t_s64)(d1 * db) + +/***************************************************************/ + +#define ADD_S64_U32(i) \ + s = buff[i] + x[i] + c; \ + z[i] = s; \ + c = (s >> 32) + +#define ADD_S64_U32_D(i) \ + s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \ + z[i] = s; \ + uc = ((t_u64)s >> 32) + +/***************************************************************/ + +#define MUL_U32_S64_8(i) \ + MUL_U32_S64_2(i); \ + MUL_U32_S64_2(i + 1); \ + MUL_U32_S64_2(i + 2); \ + MUL_U32_S64_2(i + 3) + +#define MUL_U32_S64_D_8(i) \ + MUL_U32_S64_2_D(i); \ + MUL_U32_S64_2_D(i + 1); \ + MUL_U32_S64_2_D(i + 2); \ + MUL_U32_S64_2_D(i + 3) + +/***************************************************************/ + +#define ADD_S64_U32_8(i) \ + ADD_S64_U32(i); \ + ADD_S64_U32(i + 1); \ + ADD_S64_U32(i + 2); \ + ADD_S64_U32(i + 3); \ + ADD_S64_U32(i + 4); \ + ADD_S64_U32(i + 5); \ + ADD_S64_U32(i + 6); \ + ADD_S64_U32(i + 7) + +#define ADD_S64_U32_D_8(i) \ + ADD_S64_U32_D(i); \ + ADD_S64_U32_D(i + 1); \ + ADD_S64_U32_D(i + 2); \ + ADD_S64_U32_D(i + 3); \ + ADD_S64_U32_D(i + 4); \ + ADD_S64_U32_D(i + 5); \ + ADD_S64_U32_D(i + 6); \ + ADD_S64_U32_D(i + 7) + +/***************************************************************/ + +t_u32 +mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + if (a < (1 << A_BITS)) { + + if (n == 8) { + DEF_VARS(8); + t_s32 c = 0; + + MUL_U32_S64_8(0); + ADD_S64_U32_8(0); + + return c; + + } else if (n == 16) { + DEF_VARS(16); + t_s32 c = 0; + + MUL_U32_S64_8(0); + MUL_U32_S64_8(4); + ADD_S64_U32_8(0); + ADD_S64_U32_8(8); + + return c; + + } else { + DEF_VARS(BUFF_SIZE); + t_s32 i, c = 0; + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32(i); + } + + return c; + } + } else { + + if (n == 8) { + DEF_VARS(2 * 8); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + ADD_S64_U32_D_8(0); + + return uc; + + } else if (n == 16) { + DEF_VARS(2 * 16); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + MUL_U32_S64_D_8(4); + ADD_S64_U32_D_8(0); + ADD_S64_U32_D_8(8); + + return uc; + + } else { + DEF_VARS(2 * BUFF_SIZE); + t_d64 d0, d1, db; + t_u32 i, uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2_D(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32_D(i); + } + + return uc; + } + } +} + +/***************************************************************/ + +t_u32 +mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + return mul_add(x, x, y, n, a); +} + +/***************************************************************/ diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s new file mode 100644 index 0000000000..66122a1d9d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s @@ -0,0 +1,1607 @@ +! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs. +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 14 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000106 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */ +/* 000000 19 ( 0 0) */ .word 1127219200,0 +/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 21 ( 0 0) */ .align 16 +/* 0x0010 27 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1 +/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3 +/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1 +/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84] +/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3 +/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3 +/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2 +/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2 +/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4 +/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0 +/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5 +/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3 +/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0 +/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0 +/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0068 ( 7 8) */ subcc %o2,8,%g0 +/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50 +/* 0x0070 ( 8 12) */ ldd [%o0],%f8 +/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4 +/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6 +/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10 +/* 0x0080 198 (11 14) */ fmovs %f8,%f12 +/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4 +/* 0x0088 200 (12 14) */ ld [%fp+84],%f13 +/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14 +/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6 +/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16 +/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10 +/* 0x009c 208 (15 17) */ ld [%i1],%g2 +/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8 +/* 0x00a4 210 (16 21) */ fitod %f4,%f18 +/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3 +/* 0x00ac 212 (17 22) */ fitod %f5,%f4 +/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4 +/* 0x00b4 214 (18 23) */ fitod %f6,%f20 +/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5 +/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0 +/* 0x00c0 217 (19 24) */ fitod %f7,%f6 +/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1 +/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16 +/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12 +/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2 +/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4 +/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3 +/* 0x00dc 224 (23 28) */ fitod %f10,%f18 +/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20 +/* 0x00e4 226 (25 30) */ fitod %f11,%f10 +/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6 +/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12 +/* 0x00f0 229 (27 32) */ fitod %f16,%f22 +/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4 +/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18 +/* 0x00fc 232 (29 34) */ fitod %f17,%f16 +/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20 +/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10 +/* 0x0108 235 (31 36) */ fdtox %f12,%f12 +/* 0x010c 236 (31 32) */ std %f12,[%sp+152] +/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6 +/* 0x0114 238 (32 37) */ fdtox %f4,%f4 +/* 0x0118 239 (32 33) */ std %f4,[%sp+144] +/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4 +/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12 +/* 0x0124 242 (34 39) */ fdtox %f20,%f18 +/* 0x0128 243 (34 35) */ std %f18,[%sp+136] +/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4 +/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14 +/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10 +/* 0x0138 247 (36 41) */ fdtox %f6,%f6 +/* 0x013c 248 (36 37) */ std %f6,[%sp+128] +/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5 +/* 0x0144 250 (37 38) */ add %o4,%g2,%o4 +/* 0x0148 251 (38 39) */ st %o4,[%i0] +/* 0x014c 252 (38 39) */ srax %o4,32,%g2 +/* 0x0150 253 (38 43) */ fdtox %f12,%f6 +/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4 +/* 0x0158 255 (39 40) */ std %f6,[%sp+120] +/* 0x015c 256 (39 40) */ add %o5,%g3,%g3 +/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7 +/* 0x0164 258 (40 41) */ add %g3,%g2,%g2 +/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6 +/* 0x016c 260 (40 45) */ fdtox %f10,%f8 +/* 0x0170 261 (41 42) */ std %f8,[%sp+112] +/* 0x0174 262 (41 42) */ srax %g2,32,%o5 +/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3 +/* 0x017c 264 (42 43) */ add %o7,%g4,%g4 +/* 0x0180 265 (43 44) */ st %g2,[%i0+4] +/* 0x0184 266 (43 44) */ add %g4,%o5,%g4 +/* 0x0188 267 (43 48) */ fdtox %f4,%f4 +/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5 +/* 0x0190 269 (44 45) */ add %g3,%g5,%g3 +/* 0x0194 270 (44 45) */ srax %g4,32,%g5 +/* 0x0198 271 (45 46) */ std %f4,[%sp+104] +/* 0x019c 272 (45 46) */ add %g3,%g5,%g3 +/* 0x01a0 273 (45 50) */ fdtox %f6,%f4 +/* 0x01a4 274 (46 47) */ std %f4,[%sp+96] +/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0 +/* 0x01ac 276 (46 47) */ srax %g3,32,%o5 +/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5 +/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0 +/* 0x01b8 279 (48 49) */ st %g4,[%i0+8] +/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5 +/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1 +/* 0x01c4 282 (49 50) */ srax %o0,32,%g5 +/* 0x01c8 283 (50 51) */ st %o0,[%i0+16] +/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1 +/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5 +/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2 +/* 0x01d8 287 (51 52) */ srax %o1,32,%o5 +/* 0x01dc 288 (52 53) */ st %o1,[%i0+20] +/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2 +/* 0x01e4 290 (53 54) */ st %o2,[%i0+24] +/* 0x01e8 291 (53 54) */ srax %o2,32,%g4 +/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2 +/* 0x01f0 293 (54 55) */ st %g3,[%i0+12] +/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2 +/* 0x01f8 295 (55 56) */ st %g2,[%i0+28] +/* 0x01fc 299 (55 56) */ srax %g2,32,%o7 +/* 0x0200 300 (56 57) */ or %g0,%o7,%i0 +/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (59 61) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000037 +! + + .L77000037: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50 +/* 0x0214 ( 1 5) */ ldd [%o0],%f8 +/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4 +/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6 +/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14 +/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12 +/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13 +/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4 +/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6 +/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16 +/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20 +/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8 +/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7 +/* 0x0248 332 ( 9 14) */ fitod %f4,%f18 +/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22 +/* 0x0250 334 (10 15) */ fitod %f5,%f4 +/* 0x0254 335 (11 12) */ stx %o7,[%sp+96] +/* 0x0258 336 (11 16) */ fitod %f6,%f24 +/* 0x025c 337 (12 14) */ ld [%i1+44],%o7 +/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10 +/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26 +/* 0x0268 340 (13 18) */ fitod %f7,%f6 +/* 0x026c 341 (14 15) */ stx %o7,[%sp+104] +/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18 +/* 0x0274 343 (15 17) */ ld [%i1+48],%o7 +/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4 +/* 0x027c 345 (16 18) */ ld [%i1+36],%o5 +/* 0x0280 346 (16 21) */ fitod %f10,%f28 +/* 0x0284 347 (17 18) */ stx %o7,[%sp+112] +/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16 +/* 0x028c 349 (18 20) */ ld [%i1],%g2 +/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24 +/* 0x0294 351 (19 20) */ stx %o5,[%sp+120] +/* 0x0298 352 (19 24) */ fitod %f11,%f10 +/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18 +/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5 +/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6 +/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4 +/* 0x02ac 357 (21 26) */ fitod %f16,%f30 +/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20 +/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3 +/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2 +/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28 +/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24 +/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128] +/* 0x02c8 364 (24 29) */ fdtox %f18,%f18 +/* 0x02cc 365 (25 26) */ std %f18,[%sp+272] +/* 0x02d0 366 (25 30) */ fitod %f17,%f16 +/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6 +/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10 +/* 0x02dc 369 (27 32) */ fitod %f20,%f18 +/* 0x02e0 370 (28 33) */ fdtox %f4,%f4 +/* 0x02e4 371 (28 29) */ std %f4,[%sp+264] +/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28 +/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4 +/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4 +/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22 +/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5 +/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0 +/* 0x0300 378 (31 36) */ fitod %f21,%f20 +/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10 +/* 0x0308 380 (32 34) */ ld [%i1+20],%o1 +/* 0x030c 381 (32 37) */ fdtox %f24,%f24 +/* 0x0310 382 (33 34) */ std %f24,[%sp+256] +/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16 +/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7 +/* 0x031c 385 (34 39) */ fdtox %f6,%f6 +/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4 +/* 0x0324 387 (35 36) */ std %f6,[%sp+248] +/* 0x0328 388 (35 40) */ fitod %f22,%f24 +/* 0x032c 389 (36 38) */ ld [%i1+32],%o4 +/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6 +/* 0x0334 391 (36 37) */ add %o7,%g2,%g2 +/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7 +/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26 +/* 0x0340 394 (37 38) */ srax %g2,32,%o5 +/* 0x0344 395 (38 39) */ st %g2,[%i0] +/* 0x0348 396 (38 43) */ fitod %f23,%f18 +/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16 +/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2 +/* 0x0354 399 (39 44) */ fdtox %f28,%f22 +/* 0x0358 400 (39 40) */ add %o7,%g3,%g3 +/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7 +/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20 +/* 0x0364 403 (40 41) */ add %g3,%o5,%g3 +/* 0x0368 404 (41 42) */ std %f22,[%sp+240] +/* 0x036c 405 (41 46) */ fitod %f26,%f22 +/* 0x0370 406 (41 42) */ srax %g3,32,%o5 +/* 0x0374 407 (41 42) */ add %g2,%g5,%g2 +/* 0x0378 408 (42 43) */ st %g3,[%i0+4] +/* 0x037c 409 (42 47) */ fdtox %f10,%f10 +/* 0x0380 410 (42 43) */ add %o7,%g4,%g4 +/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6 +/* 0x0388 412 (43 44) */ std %f10,[%sp+232] +/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12 +/* 0x0390 414 (43 44) */ add %g4,%o5,%g4 +/* 0x0394 415 (44 45) */ st %g4,[%i0+8] +/* 0x0398 416 (44 45) */ srax %g4,32,%o5 +/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10 +/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7 +/* 0x03a4 419 (45 50) */ fdtox %f4,%f4 +/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2 +/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20 +/* 0x03b0 422 (46 47) */ std %f4,[%sp+224] +/* 0x03b4 423 (46 47) */ srax %g2,32,%g5 +/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4 +/* 0x03bc 425 (47 48) */ st %g2,[%i0+12] +/* 0x03c0 426 (47 52) */ fitod %f27,%f24 +/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3 +/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5 +/* 0x03cc 429 (48 53) */ fdtox %f16,%f16 +/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2 +/* 0x03d4 431 (49 50) */ std %f16,[%sp+216] +/* 0x03d8 432 (49 50) */ srax %g2,32,%g4 +/* 0x03dc 433 (49 54) */ fitod %f12,%f18 +/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10 +/* 0x03e4 435 (50 51) */ st %g2,[%i0+16] +/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16 +/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2 +/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2 +/* 0x03f4 439 (51 56) */ fitod %f13,%f12 +/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2 +/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22 +/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3 +/* 0x0404 443 (52 53) */ srax %g2,32,%g4 +/* 0x0408 444 (52 57) */ fdtox %f6,%f6 +/* 0x040c 445 (53 54) */ std %f6,[%sp+208] +/* 0x0410 446 (53 58) */ fdtox %f20,%f6 +/* 0x0414 447 (54 55) */ stx %o4,[%sp+136] +/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4 +/* 0x041c 449 (55 56) */ std %f6,[%sp+200] +/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6 +/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16 +/* 0x0428 452 (56 57) */ st %g2,[%i0+20] +/* 0x042c 453 (56 57) */ add %g3,%o2,%g2 +/* 0x0430 454 (56 61) */ fdtox %f10,%f10 +/* 0x0434 455 (57 59) */ ld [%i1+28],%o3 +/* 0x0438 456 (57 58) */ add %g2,%g4,%g2 +/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5 +/* 0x0440 458 (58 59) */ srax %g2,32,%g4 +/* 0x0444 459 (59 60) */ std %f10,[%sp+192] +/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10 +/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4 +/* 0x0450 462 (60 61) */ st %g2,[%i0+24] +/* 0x0454 463 (60 61) */ add %g5,%o3,%g2 +/* 0x0458 464 (60 65) */ fdtox %f22,%f12 +/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6 +/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0 +/* 0x0464 467 (61 62) */ add %g2,%g4,%g2 +/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3 +/* 0x046c 469 (62 63) */ srax %g2,32,%g4 +/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1 +/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5 +/* 0x0478 472 (64 65) */ add %g3,%o0,%g3 +/* 0x047c 473 (64 69) */ fdtox %f4,%f4 +/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8 +/* 0x0484 475 (65 66) */ std %f12,[%sp+184] +/* 0x0488 476 (65 66) */ add %g3,%g4,%g3 +/* 0x048c 477 (65 70) */ fdtox %f16,%f12 +/* 0x0490 478 (66 67) */ std %f12,[%sp+176] +/* 0x0494 479 (66 67) */ srax %g3,32,%o0 +/* 0x0498 480 (66 67) */ add %g5,%o1,%g5 +/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2 +/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5 +/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4 +/* 0x04a8 484 (68 69) */ srax %g5,32,%o1 +/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4 +/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0 +/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4 +/* 0x04b8 488 (71 72) */ std %f4,[%sp+168] +/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4 +/* 0x04c0 490 (71 76) */ fdtox %f6,%f4 +/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3 +/* 0x04c8 492 (72 73) */ srax %g4,32,%o2 +/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1 +/* 0x04d0 494 (74 75) */ std %f4,[%sp+160] +/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0 +/* 0x04d8 496 (74 79) */ fdtox %f8,%f4 +/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5 +/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0 +/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144] +/* 0x04e8 500 (77 78) */ st %g2,[%i0+28] +/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2 +/* 0x04f0 502 (77 78) */ srax %o0,32,%o1 +/* 0x04f4 503 (78 79) */ std %f4,[%sp+152] +/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1 +/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7 +/* 0x0500 506 (79 80) */ srax %o1,32,%o3 +/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2 +/* 0x0508 508 (81 83) */ ld [%i1+60],%o4 +/* 0x050c 509 (82 83) */ add %o7,%o2,%o2 +/* 0x0510 510 (83 84) */ add %o2,%o3,%o2 +/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5 +/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2 +/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3 +/* 0x0520 514 (86 87) */ st %g3,[%i0+32] +/* 0x0524 515 (86 87) */ add %g2,%o5,%g2 +/* 0x0528 516 (86 87) */ srax %o2,32,%o5 +/* 0x052c 517 (87 88) */ st %g5,[%i0+36] +/* 0x0530 518 (87 88) */ add %g2,%o5,%g2 +/* 0x0534 519 (87 88) */ add %o3,%o4,%g3 +/* 0x0538 520 (88 89) */ st %o0,[%i0+44] +/* 0x053c 521 (88 89) */ srax %g2,32,%g5 +/* 0x0540 522 (89 90) */ st %o1,[%i0+48] +/* 0x0544 523 (89 90) */ add %g3,%g5,%g3 +/* 0x0548 524 (90 91) */ st %o2,[%i0+52] +/* 0x054c 528 (90 91) */ srax %g3,32,%o7 +/* 0x0550 529 (91 92) */ st %g4,[%i0+40] +/* 0x0554 530 (92 93) */ st %g2,[%i0+56] +/* 0x0558 531 (93 94) */ st %g3,[%i0+60] +/* 0x055c 532 (93 94) */ or %g0,%o7,%i0 +/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0564 (96 98) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000076 +! + + .L77000076: /* frequency 1.0 confidence 0.0 */ +/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6 +/* 0x056c 546 ( 0 1) */ add %o2,1,%g2 +/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14 +/* 0x0574 548 ( 0 1) */ or %g0,0,%o7 +/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9 +/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3 +/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5 +/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18 +/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4 +/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8 +/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1 +/* 0x0598 557 ( 3 4) */ or %g0,0,%g2 +/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0 +/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2 +/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0 +/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16 +/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50 +/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0 +/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0 +/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x05bc ( 7 8) */ sub %o1,2,%o1 +/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2 +/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4 +/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2 +/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0 +/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2 +/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12 +/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6 +/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10 +/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8 +/* 0x05e4 579 (15 20) */ fitod %f3,%f0 +/* 0x05e8 580 (16 21) */ fitod %f2,%f4 +/* 0x05ec 581 (17 22) */ fitod %f7,%f2 +/* 0x05f0 582 (18 23) */ fitod %f6,%f6 +/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0 +/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4 + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22 +/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4 +/* 0x0604 588 ( 0 1) */ add %g2,2,%g2 +/* 0x0608 589 ( 0 1) */ add %o4,32,%o4 +/* 0x060c 590 ( 1 6) */ fitod %f9,%f24 +/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20 +/* 0x0614 592 ( 1 2) */ add %o0,8,%o0 +/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0 +/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12 +/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0 +/* 0x0624 596 ( 2 3) */ add %o5,32,%o5 +/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2 +/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4 +/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6 +/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32] +/* 0x0638 601 ( 7 12) */ fitod %f8,%f4 +/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32] +/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8 +/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6 +/* 0x0648 605 ( 9 14) */ fitod %f23,%f2 +/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20 +/* 0x0650 607 ( 9 10) */ add %o0,8,%o0 +/* 0x0654 608 (10 14) */ ldd [%o0],%f10 +/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0 +/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4 +/* 0x0660 611 (13 18) */ fdtox %f6,%f6 +/* 0x0664 612 (14 19) */ fdtox %f20,%f20 +/* 0x0668 613 (14 15) */ std %f6,[%o5-16] +/* 0x066c 614 (15 20) */ fitod %f22,%f6 +/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50 +/* 0x0674 (15 16) */ std %f20,[%o4-16] + +! +! ENTRY .L900000152 +! + + .L900000152: /* frequency 1.0 confidence 0.0 */ +/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12 +/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22 +/* 0x0680 620 ( 0 1) */ add %o5,80,%o5 +/* 0x0684 621 ( 0 1) */ add %o4,80,%o4 +/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0 +/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24 +/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0694 625 ( 1 2) */ add %o0,8,%g5 +/* 0x0698 626 ( 2 7) */ fitod %f8,%f20 +/* 0x069c 627 ( 3 8) */ fitod %f9,%f8 +/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6 +/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26 +/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4 +/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12 +/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2 +/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0 +/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10 +/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20 +/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8 +/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6 +/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12 +/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4 +/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2 +/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0 +/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10 +/* 0x06dc 643 (16 21) */ fdtox %f24,%f24 +/* 0x06e0 644 (16 17) */ std %f24,[%o5-80] +/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8 +/* 0x06e8 646 (17 22) */ fdtox %f22,%f22 +/* 0x06ec 647 (17 18) */ std %f22,[%o4-80] +/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6 +/* 0x06f4 649 (18 23) */ fdtox %f20,%f20 +/* 0x06f8 650 (18 19) */ std %f20,[%o5-64] +/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4 +/* 0x0700 652 (19 24) */ fdtox %f12,%f12 +/* 0x0704 653 (19 20) */ std %f12,[%o4-64] +/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2 +/* 0x070c 655 (20 25) */ fdtox %f10,%f10 +/* 0x0710 656 (20 21) */ std %f10,[%o5-48] +/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0 +/* 0x0718 658 (21 26) */ fdtox %f8,%f8 +/* 0x071c 659 (21 22) */ std %f8,[%o4-48] +/* 0x0720 660 (22 27) */ fdtox %f6,%f6 +/* 0x0724 661 (22 23) */ std %f6,[%o5-32] +/* 0x0728 662 (23 28) */ fdtox %f4,%f4 +/* 0x072c 663 (23 24) */ std %f4,[%o4-32] +/* 0x0730 664 (24 29) */ fdtox %f2,%f2 +/* 0x0734 665 (24 25) */ std %f2,[%o5-16] +/* 0x0738 666 (25 30) */ fdtox %f0,%f0 +/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0740 (25 26) */ std %f0,[%o4-16] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0 + +! +! ENTRY .L900000160 +! + + .L900000160: /* frequency 1.0 confidence 0.0 */ +/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0 +/* 0x074c 673 ( 0 1) */ add %g2,1,%g2 +/* 0x0750 674 ( 0 1) */ add %g5,8,%g5 +/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0758 676 ( 4 9) */ fitod %f0,%f2 +/* 0x075c 677 ( 5 10) */ fitod %f1,%f0 +/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2 +/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0 +/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2 +/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0 +/* 0x0770 682 (19 24) */ fdtox %f2,%f2 +/* 0x0774 683 (19 20) */ std %f2,[%o5] +/* 0x0778 684 (19 20) */ add %o5,16,%o5 +/* 0x077c 685 (20 25) */ fdtox %f0,%f0 +/* 0x0780 686 (20 21) */ std %f0,[%o4] +/* 0x0784 687 (20 21) */ add %o4,16,%o4 +/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50 +/* 0x078c (23 27) */ ldd [%g5],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0 + +! +! ENTRY .L900000161 +! + + .L900000161: /* frequency 1.0 confidence 0.0 */ +/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50 +/* 0x0798 ( 0 1) */ or %g0,%o7,%i0 +/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2 +/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3 +/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5 +/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4 +/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5 +/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2 +/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0 +/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4 +/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0 +/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1 +/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2 +/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4 +/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3 +/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3 +/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5 +/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0 +/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1 +/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4] +/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000145 +! + + .L900000145: /* frequency 1.0 confidence 0.0 */ +/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7 +/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2 +/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3 +/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0 +/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2 +/* 0x0800 729 ( 1 2) */ add %g4,3,%g4 +/* 0x0804 730 ( 2 3) */ st %o2,[%g2] +/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3 +/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0 +/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2 +/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96] +/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1 +/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2 +/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1 +/* 0x0824 738 ( 5 6) */ add %g2,12,%g2 +/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8] +/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7 +/* 0x0830 741 ( 6 7) */ add %g3,12,%g3 +/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3 +/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1 +/* 0x083c 744 (10 11) */ add %o0,%o1,%o0 +/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1 +/* 0x0844 746 (11 12) */ add %o0,%o7,%o0 +/* 0x0848 747 (11 12) */ add %g5,24,%g5 +/* 0x084c 748 (11 12) */ st %o0,[%g2-4] +/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50 +/* 0x0854 (12 13) */ srax %o0,32,%o0 + +! +! ENTRY .L900000148 +! + + .L900000148: /* frequency 1.0 confidence 0.0 */ +/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2 +/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3 +/* 0x0860 754 ( 0 2) */ ld [%g3],%o0 +/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3 +/* 0x0868 756 ( 1 2) */ add %g2,8,%g2 +/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2 +/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8] +/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0 +/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0 +/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4] +/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0 +/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0888 ( 4 5) */ srax %o0,32,%o7 +/* 0x088c 765 ( 4 5) */ add %g3,4,%g3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0890 767 ( 0 2) */ ld [%g3],%o2 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0 +/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1 +/* 0x089c 771 ( 0 1) */ add %g4,1,%g4 +/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3 +/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5 +/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0 +/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0 +/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0 +/* 0x08b4 777 ( 3 4) */ st %o0,[%g2] +/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2 +/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7 +/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50 +/* 0x08c4 ( 4 6) */ ld [%g3],%o2 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0 +/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4 +/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3 +/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240] +/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2 +/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8 +/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244] +/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4 +/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6 +/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18 +/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12 +/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16 +/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20 +/* 0x0910 819 ( 7 12) */ fitod %f4,%f14 +/* 0x0914 823 ( 8 10) */ ld [%i1],%g2 +/* 0x0918 824 ( 8 13) */ fitod %f5,%f4 +/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13 +/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6 +/* 0x0924 827 (10 12) */ ld [%sp+244],%f17 +/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10 +/* 0x092c 829 (11 13) */ ld [%i1+28],%o3 +/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20 +/* 0x0934 831 (12 14) */ ld [%i1+4],%g3 +/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12 +/* 0x093c 833 (13 14) */ stx %o3,[%sp+96] +/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14 +/* 0x0944 835 (14 16) */ ld [%i1+8],%g4 +/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8 +/* 0x094c 837 (15 17) */ ld [%i1+12],%g5 +/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4 +/* 0x0954 839 (16 18) */ ld [%i1+16],%o0 +/* 0x0958 840 (16 21) */ fitod %f6,%f22 +/* 0x095c 841 (17 19) */ ld [%i1+20],%o1 +/* 0x0960 842 (17 22) */ fitod %f7,%f6 +/* 0x0964 843 (18 20) */ ld [%i1+24],%o2 +/* 0x0968 844 (18 23) */ fitod %f10,%f16 +/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24 +/* 0x0970 846 (19 24) */ fitod %f20,%f28 +/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14 +/* 0x0978 848 (20 25) */ fitod %f11,%f10 +/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26 +/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22 +/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4 +/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6 +/* 0x098c 853 (23 28) */ fdtox %f24,%f24 +/* 0x0990 854 (23 24) */ std %f24,[%sp+224] +/* 0x0994 855 (24 29) */ fdtox %f14,%f14 +/* 0x0998 856 (24 25) */ std %f14,[%sp+232] +/* 0x099c 857 (25 30) */ fdtox %f26,%f14 +/* 0x09a0 858 (25 26) */ std %f14,[%sp+208] +/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4 +/* 0x09a8 860 (26 31) */ fitod %f21,%f20 +/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30 +/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5 +/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16 +/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22 +/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4 +/* 0x09c0 866 (28 33) */ fdtox %f4,%f4 +/* 0x09c4 867 (28 29) */ std %f4,[%sp+216] +/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24 +/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26 +/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4 +/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6 +/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10 +/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2 +/* 0x09e0 874 (30 31) */ st %g2,[%i0] +/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7 +/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5 +/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18 +/* 0x09f0 878 (32 37) */ fdtox %f30,%f28 +/* 0x09f4 879 (32 33) */ std %f28,[%sp+192] +/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14 +/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4 +/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7 +/* 0x0a04 883 (33 38) */ fdtox %f22,%f20 +/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16 +/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200] +/* 0x0a10 886 (34 39) */ fdtox %f24,%f20 +/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22 +/* 0x0a18 888 (35 36) */ std %f20,[%sp+176] +/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4 +/* 0x0a20 890 (35 40) */ fdtox %f6,%f6 +/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4 +/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3 +/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3 +/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10 +/* 0x0a34 895 (37 38) */ std %f6,[%sp+184] +/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3 +/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6 +/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20 +/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4 +/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3 +/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5 +/* 0x0a50 902 (38 43) */ fdtox %f16,%f14 +/* 0x0a54 903 (39 40) */ std %f6,[%sp+160] +/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12 +/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7 +/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3 +/* 0x0a64 907 (40 45) */ fdtox %f4,%f16 +/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18 +/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168] +/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4 +/* 0x0a74 911 (41 46) */ fdtox %f10,%f4 +/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3 +/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4 +/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4 +/* 0x0a84 915 (42 47) */ fdtox %f22,%f14 +/* 0x0a88 916 (43 44) */ std %f16,[%sp+144] +/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5 +/* 0x0a90 918 (43 48) */ fdtox %f20,%f6 +/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7 +/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3 +/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16 +/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152] +/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5 +/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8 +/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3 +/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4 +/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5 +/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128] +/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5 +/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136] +/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3 +/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112] +/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0 +/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7 +/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0 +/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3 +/* 0x0adc 937 (52 53) */ std %f8,[%sp+120] +/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4 +/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7 +/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104] +/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3 +/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5 +/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1 +/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0 +/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1 +/* 0x0b00 946 (56 57) */ st %g3,[%i0+4] +/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3 +/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4 +/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3 +/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0 +/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2 +/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3 +/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20] +/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2 +/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2 +/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4 +/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3 +/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2 +/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0 +/* 0x0b38 960 (62 63) */ st %o2,[%i0+24] +/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2 +/* 0x0b40 962 (63 64) */ st %o0,[%i0+16] +/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2 +/* 0x0b48 964 (64 65) */ st %g4,[%i0+8] +/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7 +/* 0x0b50 969 (65 66) */ st %g5,[%i0+12] +/* 0x0b54 970 (66 67) */ st %g2,[%i0+28] +/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0 +/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0b60 (69 71) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4 +/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6 +/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2 +/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3 +/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8 +/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4 +/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356] +/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20 +/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6 +/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352] +/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8 +/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30 +/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22 +/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10 +/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24 +/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12 +/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4 +/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14 +/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26 +/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16 +/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25 +/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28 +/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27 +/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32 +/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22 +/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18 +/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24 +/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20 +/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4 +/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26 +/* 0x0be8 1024 (19 24) */ fitod %f7,%f6 +/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28 +/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32 +/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22 +/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10 +/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44 +/* 0x0c00 1030 (22 27) */ fitod %f9,%f8 +/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4 +/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12 +/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6 +/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46 +/* 0x0c14 1035 (25 30) */ fitod %f10,%f34 +/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22 +/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336] +/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22 +/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26 +/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328] +/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4 +/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320] +/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26 +/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8 +/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4 +/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312] +/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4 +/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32 +/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344] +/* 0x0c50 1050 (32 37) */ fitod %f11,%f10 +/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32 +/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34 +/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6 +/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304] +/* 0x0c64 1058 (35 40) */ fitod %f12,%f36 +/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6 +/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22 +/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296] +/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22 +/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4 +/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280] +/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8 +/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10 +/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4 +/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12 +/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36 +/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6 +/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272] +/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14 +/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6 +/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22 +/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264] +/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8 +/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256] +/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22 +/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4 +/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248] +/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8 +/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4 +/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10 +/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38 +/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6 +/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240] +/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12 +/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232] +/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6 +/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8 +/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224] +/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22 +/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216] +/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8 +/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14 +/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4 +/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22 +/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16 +/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6 +/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208] +/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6 +/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200] +/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10 +/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40 +/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6 +/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4 +/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192] +/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16 +/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4 +/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18 +/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32 +/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288] +/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8 +/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6 +/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184] +/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22 +/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6 +/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4 +/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176] +/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10 +/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4 +/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168] +/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4 +/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160] +/* 0x0d70 1125 (69 74) */ fitod %f18,%f42 +/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4 +/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22 +/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6 +/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8 +/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4 +/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152] +/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4 +/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6 +/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136] +/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22 +/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144] +/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22 +/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128] +/* 0x0da8 1139 (79 84) */ fitod %f19,%f22 +/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6 +/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4 +/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22 +/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6 +/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120] +/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4 +/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112] +/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2 +/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3 +/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4 +/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2 +/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4 +/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5 +/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2 +/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6 +/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3 +/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4 +/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0 +/* 0x0df4 1161 (93 94) */ st %g4,[%i0] +/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2 +/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4 +/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3 +/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2 +/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4 +/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5 +/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6 +/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1 +/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2 +/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3 +/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2 +/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2 +/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3 +/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4 +/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0 +/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5 +/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4] +/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2 +/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3 +/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2 +/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4 +/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3 +/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0 +/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1 +/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4 +/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5 +/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8] +/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3 +/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4 +/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0 +/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2 +/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4 +/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5 +/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1 +/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2 +/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2 +/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0 +/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3 +/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2 +/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5 +/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1 +/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12] +/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4 +/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3 +/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0 +/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4 +/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3 +/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2 +/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1 +/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5 +/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4 +/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16] +/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2 +/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4 +/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0 +/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1 +/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2 +/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2 +/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5 +/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2 +/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20] +/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3 +/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2 +/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0 +/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1 +/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3 +/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2 +/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5 +/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3 +/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24] +/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4 +/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3 +/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0 +/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1 +/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4 +/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2 +/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5 +/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4 +/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28] +/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2 +/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4 +/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0 +/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1 +/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2 +/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2 +/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5 +/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2 +/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32] +/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3 +/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2 +/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0 +/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1 +/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3 +/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2 +/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5 +/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3 +/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36] +/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4 +/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3 +/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0 +/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1 +/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4 +/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2 +/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5 +/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4 +/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96] +/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4 +/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40] +/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2 +/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1 +/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0 +/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2 +/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104] +/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5 +/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2 +/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44] +/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2 +/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2 +/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1 +/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0 +/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48] +/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3 +/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2 +/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3 +/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5 +/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3 +/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0 +/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4 +/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3 +/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52] +/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1 +/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4 +/* 0x1008 1297 (151 152) */ st %g3,[%i0+56] +/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2 +/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2 +/* 0x1014 1300 (152 153) */ st %g2,[%i0+60] +/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ +/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0 + +! +! ENTRY .L900000159 +! + + .L900000159: /* frequency 1.0 confidence 0.0 */ +/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + +/* 0x11a8 1441 ( 0 0) */ .type mul_add,2 +/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x11a8 1445 ( 0 0) */ .align 16 +/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1 +/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4 +/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3 +/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2 +/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3 +/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1 +/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2 +/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1 +/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result = +/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7 +/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2 +/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x11d8 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x11d8 8 ( 0 0) */ .word -2147483648 +/* 0x11dc 9 ( 0 0) */ .word -2147483648 +/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object +/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s new file mode 100644 index 0000000000..e2fbe0bd00 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s @@ -0,0 +1,1645 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .register %g2,#scratch +/* 000000 ( 0 0) */ .register %g3,#scratch +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 15 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000101 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */ +/* 000000 20 ( 0 0) */ .word 1127219200,0 +/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 22 ( 0 0) */ .align 8 +/* 0x0010 28 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1 +/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1 +/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3 +/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3 +/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1 +/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4 +/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2 +/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2 +/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0 +/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0 +/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7 +/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5 +/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0 +/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0060 ( 7 8) */ subcc %i3,8,%g0 +/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50 +/* 0x0068 ( 8 9) */ subcc %i3,16,%g0 +/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4 +/* 0x0070 195 (10 11) */ st %i4,[%sp+2287] +/* 0x0074 196 (11 14) */ ldd [%o0],%f8 +/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4 +/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10 +/* 0x0080 199 (13 16) */ fitod %f4,%f12 +/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14 +/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7 +/* 0x008c 202 (14 17) */ fitod %f5,%f4 +/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10 +/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16 +/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18 +/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4 +/* 0x00a0 210 (17 20) */ ld [%i1],%g2 +/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16 +/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3 +/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4 +/* 0x00b0 214 (20 23) */ fitod %f16,%f20 +/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0 +/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5 +/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1 +/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2 +/* 0x00c4 219 (24 25) */ fmovs %f8,%f6 +/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3 +/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6 +/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8 +/* 0x00d4 223 (28 31) */ fitod %f10,%f12 +/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4 +/* 0x00dc 225 (29 32) */ fitod %f11,%f10 +/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8 +/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12 +/* 0x00e8 228 (32 35) */ fdtox %f4,%f4 +/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271] +/* 0x00f0 230 (33 36) */ fdtox %f8,%f8 +/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279] +/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12 +/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10 +/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4 +/* 0x0104 235 (36 39) */ fitod %f17,%f8 +/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16 +/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4 +/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10 +/* 0x0114 239 (38 41) */ fdtox %f12,%f12 +/* 0x0118 240 (38 39) */ std %f12,[%sp+2263] +/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4 +/* 0x0120 242 (39 42) */ fitod %f16,%f18 +/* 0x0124 243 (39 40) */ add %o4,%g2,%g2 +/* 0x0128 244 (39 40) */ st %g2,[%i0] +/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4 +/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8 +/* 0x0134 247 (40 41) */ srax %g2,32,%o5 +/* 0x0138 248 (41 44) */ fdtox %f10,%f10 +/* 0x013c 249 (41 42) */ std %f10,[%sp+2255] +/* 0x0140 250 (42 45) */ fdtox %f4,%f4 +/* 0x0144 251 (42 43) */ std %f4,[%sp+2247] +/* 0x0148 252 (42 43) */ add %o4,%g3,%o4 +/* 0x014c 253 (43 46) */ fitod %f17,%f12 +/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2 +/* 0x0154 255 (43 44) */ add %o4,%o5,%g3 +/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8 +/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10 +/* 0x0160 258 (44 45) */ st %g3,[%i0+4] +/* 0x0164 259 (44 45) */ srax %g3,32,%g3 +/* 0x0168 260 (45 46) */ add %g2,%g4,%g4 +/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2 +/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4 +/* 0x0174 263 (46 47) */ add %g4,%g3,%g3 +/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4 +/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10 +/* 0x0180 266 (47 50) */ fdtox %f8,%f8 +/* 0x0184 267 (47 48) */ std %f8,[%sp+2239] +/* 0x0188 268 (48 49) */ add %g4,%o0,%g4 +/* 0x018c 269 (48 49) */ add %g2,%g5,%g2 +/* 0x0190 270 (48 49) */ st %g3,[%i0+8] +/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4 +/* 0x0198 272 (49 50) */ srax %g3,32,%o0 +/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5 +/* 0x01a0 274 (50 53) */ fdtox %f10,%f6 +/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231] +/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2 +/* 0x01ac 277 (51 52) */ srax %g2,32,%g3 +/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1 +/* 0x01b4 279 (51 52) */ st %g2,[%i0+12] +/* 0x01b8 280 (52 55) */ fdtox %f4,%f4 +/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223] +/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3 +/* 0x01c4 283 (53 54) */ srax %g3,32,%g4 +/* 0x01c8 284 (53 54) */ st %g3,[%i0+16] +/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0 +/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4 +/* 0x01d4 287 (55 56) */ srax %g4,32,%g2 +/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5 +/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2 +/* 0x01e0 290 (56 57) */ st %g4,[%i0+20] +/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2 +/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5 +/* 0x01ec 293 (57 58) */ st %g2,[%i0+24] +/* 0x01f0 294 (58 59) */ srax %g2,32,%g3 +/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2 +/* 0x01f8 296 (59 60) */ st %g2,[%i0+28] +/* 0x01fc 300 (60 61) */ srax %g2,32,%o3 +/* 0x0200 301 (61 62) */ srl %o3,0,%i0 +/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (64 65) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50 +/* 0x0210 ( 0 1) */ st %i4,[%sp+2223] +/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4 +/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351] +/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8 +/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10 +/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14 +/* 0x022c 321 ( 5 8) */ fitod %f4,%f12 +/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7 +/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10 +/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16 +/* 0x023c 325 ( 7 10) */ fitod %f5,%f4 +/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18 +/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20 +/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16 +/* 0x024c 335 (10 13) */ ld [%i1],%g2 +/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4 +/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22 +/* 0x0258 338 (11 14) */ fitod %f16,%f28 +/* 0x025c 339 (12 15) */ ld [%i1+4],%g3 +/* 0x0260 340 (13 16) */ ld [%i1+8],%g4 +/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22 +/* 0x0268 342 (14 17) */ ld [%i1+12],%g5 +/* 0x026c 343 (15 18) */ ld [%i1+16],%o0 +/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24 +/* 0x0274 345 (17 20) */ ld [%i1+20],%o1 +/* 0x0278 346 (17 18) */ fmovs %f8,%f6 +/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26 +/* 0x0280 348 (19 22) */ ld [%i1+24],%o2 +/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6 +/* 0x0288 350 (20 23) */ ld [%i1+28],%o3 +/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8 +/* 0x0290 355 (21 24) */ ld [%i1+32],%o4 +/* 0x0294 356 (21 24) */ fitod %f10,%f12 +/* 0x0298 357 (22 25) */ ld [%i1+36],%o7 +/* 0x029c 358 (22 25) */ fitod %f11,%f10 +/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4 +/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1 +/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8 +/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5 +/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12 +/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10 +/* 0x02b8 365 (26 29) */ fdtox %f8,%f8 +/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343] +/* 0x02c0 367 (27 30) */ fitod %f17,%f8 +/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12 +/* 0x02c8 369 (28 31) */ fdtox %f4,%f4 +/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335] +/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10 +/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16 +/* 0x02d8 373 (30 33) */ fdtox %f12,%f12 +/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327] +/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5 +/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8 +/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4 +/* 0x02ec 378 (33 36) */ fitod %f17,%f12 +/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2 +/* 0x02f4 380 (33 34) */ st %g2,[%i0] +/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5 +/* 0x02fc 382 (34 37) */ fitod %f16,%f18 +/* 0x0300 383 (34 35) */ srax %g2,32,%l0 +/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16 +/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20 +/* 0x030c 386 (36 39) */ fdtox %f10,%f10 +/* 0x0310 387 (36 37) */ std %f10,[%sp+2319] +/* 0x0314 388 (36 37) */ add %o5,%g3,%g3 +/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4 +/* 0x031c 390 (37 40) */ fitod %f16,%f8 +/* 0x0320 391 (37 38) */ add %g3,%l0,%g3 +/* 0x0324 392 (37 38) */ st %g3,[%i0+4] +/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5 +/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18 +/* 0x0330 395 (38 39) */ srax %g3,32,%l3 +/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2 +/* 0x0338 397 (39 42) */ fdtox %f4,%f4 +/* 0x033c 398 (40 41) */ std %f4,[%sp+2311] +/* 0x0340 399 (40 43) */ fdtox %f20,%f20 +/* 0x0344 400 (40 41) */ add %o5,%g4,%g4 +/* 0x0348 401 (41 42) */ std %f20,[%sp+2303] +/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4 +/* 0x0350 403 (41 42) */ add %g4,%l3,%g4 +/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18 +/* 0x0358 405 (42 43) */ st %g4,[%i0+8] +/* 0x035c 406 (42 45) */ fitod %f17,%f16 +/* 0x0360 407 (42 43) */ srax %g4,32,%l4 +/* 0x0364 408 (43 46) */ ld [%i1+44],%l0 +/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20 +/* 0x036c 410 (43 44) */ add %l2,%g5,%l2 +/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5 +/* 0x0374 412 (44 47) */ fitod %f22,%f8 +/* 0x0378 413 (44 45) */ add %l2,%l4,%l2 +/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4 +/* 0x0380 415 (45 46) */ st %l2,[%i0+12] +/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10 +/* 0x0388 417 (46 49) */ ld [%i1+52],%l3 +/* 0x038c 418 (46 49) */ fdtox %f18,%f18 +/* 0x0390 419 (46 47) */ add %g5,%o0,%l4 +/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12 +/* 0x0398 421 (47 48) */ std %f18,[%sp+2295] +/* 0x039c 422 (47 48) */ srax %l2,32,%o0 +/* 0x03a0 423 (47 50) */ fitod %f23,%f16 +/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5 +/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8 +/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4 +/* 0x03b0 427 (49 50) */ st %l4,[%i0+16] +/* 0x03b4 428 (49 50) */ srax %l4,32,%o0 +/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18 +/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5 +/* 0x03c0 431 (50 53) */ fdtox %f4,%f4 +/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287] +/* 0x03c8 433 (51 54) */ fdtox %f12,%f12 +/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4 +/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279] +/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12 +/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2 +/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8 +/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5 +/* 0x03e4 440 (53 56) */ fitod %f18,%f10 +/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2 +/* 0x03ec 442 (54 55) */ st %g2,[%i0+20] +/* 0x03f0 443 (54 57) */ fitod %f19,%f16 +/* 0x03f4 444 (54 55) */ srax %g2,32,%o0 +/* 0x03f8 445 (55 58) */ fdtox %f8,%f8 +/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263] +/* 0x0400 447 (55 56) */ add %g5,%o2,%g3 +/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5 +/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10 +/* 0x040c 450 (56 57) */ add %g3,%o0,%g3 +/* 0x0410 451 (57 58) */ st %g3,[%i0+24] +/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8 +/* 0x0418 453 (57 58) */ srax %g3,32,%o0 +/* 0x041c 454 (58 61) */ fdtox %f4,%f4 +/* 0x0420 455 (58 59) */ std %f4,[%sp+2271] +/* 0x0424 456 (58 59) */ add %g5,%o3,%g4 +/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18 +/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4 +/* 0x0430 459 (59 60) */ add %g4,%o0,%g4 +/* 0x0434 460 (60 61) */ st %g4,[%i0+28] +/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10 +/* 0x043c 462 (60 61) */ srax %g4,32,%o0 +/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5 +/* 0x0444 464 (61 64) */ fitod %f18,%f12 +/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8 +/* 0x044c 466 (62 65) */ fdtox %f4,%f4 +/* 0x0450 467 (62 63) */ std %f4,[%sp+2255] +/* 0x0454 468 (63 64) */ add %g5,%o4,%l2 +/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5 +/* 0x045c 470 (63 66) */ fdtox %f10,%f16 +/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4 +/* 0x0464 472 (64 65) */ std %f16,[%sp+2247] +/* 0x0468 473 (64 65) */ add %l2,%o0,%l2 +/* 0x046c 474 (65 68) */ fdtox %f8,%f8 +/* 0x0470 475 (65 66) */ std %f8,[%sp+2239] +/* 0x0474 476 (65 66) */ add %g5,%o7,%l4 +/* 0x0478 477 (66 69) */ fitod %f19,%f10 +/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5 +/* 0x0480 479 (66 67) */ srax %l2,32,%o0 +/* 0x0484 480 (67 68) */ add %l4,%o0,%l4 +/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4 +/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0 +/* 0x0490 483 (68 69) */ srax %l4,32,%o1 +/* 0x0494 484 (68 69) */ add %g5,%l1,%l1 +/* 0x0498 485 (68 69) */ st %l2,[%i0+32] +/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8 +/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3 +/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1 +/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5 +/* 0x04ac 490 (70 71) */ srax %o1,32,%o2 +/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0 +/* 0x04b4 492 (71 74) */ fdtox %f4,%f4 +/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231] +/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2 +/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3 +/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4 +/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5 +/* 0x04cc 498 (73 74) */ srax %o2,32,%o3 +/* 0x04d0 499 (73 74) */ st %l4,[%i0+36] +/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2 +/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0 +/* 0x04dc 502 (75 76) */ srax %g2,32,%g3 +/* 0x04e0 503 (75 78) */ fdtox %f4,%f4 +/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223] +/* 0x04e8 505 (76 77) */ st %o1,[%i0+40] +/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3 +/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5 +/* 0x04f4 508 (77 78) */ st %o2,[%i0+44] +/* 0x04f8 509 (77 78) */ srax %g3,32,%g4 +/* 0x04fc 510 (78 79) */ st %g2,[%i0+48] +/* 0x0500 511 (78 79) */ add %g5,%g4,%g4 +/* 0x0504 512 (79 80) */ st %g3,[%i0+52] +/* 0x0508 513 (79 80) */ srax %g4,32,%g5 +/* 0x050c 514 (80 83) */ ld [%i1+60],%g3 +/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2 +/* 0x0514 516 (82 83) */ st %g4,[%i0+56] +/* 0x0518 517 (83 84) */ add %g2,%g3,%g2 +/* 0x051c 518 (84 85) */ add %g2,%g5,%g2 +/* 0x0520 519 (84 85) */ st %g2,[%i0+60] +/* 0x0524 523 (85 86) */ srax %g2,32,%o3 +/* 0x0528 524 (86 87) */ srl %o3,0,%i0 +/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0530 (89 90) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000157 +! + + .L900000157: /* frequency 1.0 confidence 0.0 */ +/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14 +/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8 +/* 0x053c 539 ( 0 1) */ add %i3,1,%g2 +/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7 +/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3 +/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4 +/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0550 547 ( 2 3) */ or %g0,0,%g5 +/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18 +/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6 +/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1 +/* 0x0560 551 ( 3 4) */ or %g0,0,%o0 +/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0 +/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3 +/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16 +/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50 +/* 0x0574 ( 6 7) */ subcc %i3,0,%g0 +/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2 +/* 0x057c 558 ( 7 8) */ or %g0,0,%i0 +/* 0x0580 559 ( 7 8) */ or %g0,1,%g3 +/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0 +/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x058c ( 9 10) */ or %g0,0,%o1 +/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0 +/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3 +/* 0x0598 565 (10 13) */ ldd [%i2],%f2 +/* 0x059c 566 (10 11) */ or %g0,7,%o0 +/* 0x05a0 567 (10 11) */ or %g0,2,%i0 +/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8 +/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4 +/* 0x05ac 570 (11 12) */ or %g0,16,%o2 +/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2 +/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6 +/* 0x05b8 573 (12 13) */ or %g0,48,%o4 +/* 0x05bc 574 (13 16) */ fitod %f8,%f12 +/* 0x05c0 575 (13 14) */ or %g0,24,%o1 +/* 0x05c4 576 (13 14) */ or %g0,3,%g3 +/* 0x05c8 577 (14 17) */ fitod %f2,%f0 +/* 0x05cc 578 (15 18) */ fitod %f3,%f20 +/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2 +/* 0x05d4 580 (16 19) */ fitod %f9,%f10 +/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8 +/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0 +/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22 +/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20 +/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12 +/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10 +/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0 +/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4 +/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22 +/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6 +/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20 +/* 0x0604 592 (23 26) */ fdtox %f0,%f0 +/* 0x0608 593 (23 24) */ std %f0,[%fp-217] +/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10 +/* 0x0610 595 (24 27) */ fdtox %f22,%f22 +/* 0x0614 596 (24 25) */ std %f22,[%fp-209] +/* 0x0618 597 (25 28) */ fitod %f5,%f0 +/* 0x061c 598 (26 29) */ fdtox %f10,%f10 +/* 0x0620 599 (27 30) */ fdtox %f20,%f20 +/* 0x0624 600 (27 28) */ std %f20,[%fp-201] +/* 0x0628 601 (28 31) */ fitod %f4,%f4 +/* 0x062c 602 (28 29) */ std %f10,[%fp-193] +/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10 +/* 0x0634 604 (30 33) */ fitod %f7,%f2 +/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0 +/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4 +/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8 + +! +! ENTRY .L900000144 +! + + .L900000144: /* frequency 1.0 confidence 0.0 */ +/* 0x0644 609 ( 0 3) */ fitod %f11,%f22 +/* 0x0648 610 ( 0 1) */ add %o0,3,%o0 +/* 0x064c 611 ( 0 1) */ add %g3,6,%g3 +/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0 +/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24 +/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0 +/* 0x065c 615 ( 1 2) */ add %i0,6,%i0 +/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x0664 617 ( 2 5) */ fitod %f6,%f4 +/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0 +/* 0x066c 619 ( 3 4) */ add %o4,8,%i1 +/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20 +/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6 +/* 0x0678 622 ( 4 5) */ add %o2,16,%o4 +/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4 +/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4] +/* 0x0684 625 ( 5 6) */ add %o1,16,%o2 +/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6 +/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4] +/* 0x0690 628 ( 7 10) */ fitod %f9,%f0 +/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2 +/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24 +/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12 +/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4 +/* 0x06a4 633 (10 13) */ fdtox %f2,%f2 +/* 0x06a8 634 (10 11) */ add %i1,8,%o1 +/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22 +/* 0x06b0 636 (11 14) */ fdtox %f24,%f10 +/* 0x06b4 637 (11 12) */ add %o4,16,%i4 +/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4 +/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4] +/* 0x06c0 640 (12 13) */ add %o2,16,%i1 +/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10 +/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4] +/* 0x06cc 643 (14 17) */ fitod %f7,%f2 +/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12 +/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24 +/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0 +/* 0x06dc 647 (16 19) */ fitod %f8,%f4 +/* 0x06e0 648 (17 20) */ fdtox %f12,%f20 +/* 0x06e4 649 (17 18) */ add %o1,8,%o4 +/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12 +/* 0x06ec 651 (18 21) */ fdtox %f24,%f8 +/* 0x06f0 652 (18 19) */ add %i4,16,%o2 +/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4 +/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4] +/* 0x06fc 655 (19 20) */ add %i1,16,%o1 +/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8 +/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50 +/* 0x0708 (20 21) */ std %f20,[%o1+%g4] + +! +! ENTRY .L900000147 +! + + .L900000147: /* frequency 1.0 confidence 0.0 */ +/* 0x070c 660 ( 0 3) */ fitod %f6,%f6 +/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24 +/* 0x0714 662 ( 0 1) */ add %i4,32,%l4 +/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22 +/* 0x0720 665 ( 1 2) */ add %i1,32,%l3 +/* 0x0724 666 ( 2 5) */ fitod %f10,%f28 +/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2 +/* 0x072c 668 ( 2 3) */ add %i4,48,%l2 +/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4 +/* 0x0734 670 ( 3 4) */ add %i1,48,%l1 +/* 0x0738 671 ( 3 4) */ add %i4,64,%l0 +/* 0x073c 672 ( 4 7) */ fitod %f11,%f26 +/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1 +/* 0x0744 674 ( 4 5) */ add %i1,64,%i5 +/* 0x0748 675 ( 5 8) */ fitod %f8,%f6 +/* 0x074c 676 ( 5 6) */ add %i4,80,%i4 +/* 0x0750 677 ( 5 6) */ add %i1,80,%i1 +/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0 +/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20 +/* 0x075c 680 ( 6 7) */ add %i4,16,%o4 +/* 0x0760 681 ( 7 10) */ fitod %f9,%f4 +/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12 +/* 0x0768 683 ( 7 8) */ add %i1,16,%o3 +/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10 +/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0 +/* 0x0774 686 ( 8 9) */ add %g3,12,%g3 +/* 0x0778 687 ( 9 12) */ fitod %f0,%f2 +/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8 +/* 0x0780 689 (11 14) */ fitod %f1,%f0 +/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10 +/* 0x0788 691 (12 15) */ fdtox %f24,%f24 +/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4] +/* 0x0790 693 (12 13) */ add %i0,12,%i0 +/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6 +/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8 +/* 0x079c 696 (14 17) */ fdtox %f22,%f22 +/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4] +/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4 +/* 0x07a8 699 (16 19) */ fdtox %f20,%f20 +/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4] +/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6 +/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2 +/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0 +/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4 +/* 0x07c0 705 (19 22) */ fdtox %f12,%f12 +/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4] +/* 0x07c8 707 (20 23) */ fdtox %f10,%f10 +/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4] +/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2 +/* 0x07d4 710 (21 24) */ fdtox %f8,%f8 +/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4] +/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0 +/* 0x07e0 713 (22 25) */ fdtox %f6,%f6 +/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4] +/* 0x07e8 715 (23 26) */ fdtox %f4,%f4 +/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4] +/* 0x07f0 717 (24 27) */ fdtox %f2,%f2 +/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4] +/* 0x07f8 719 (25 28) */ fdtox %f0,%f0 +/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0800 (25 26) */ std %f0,[%o3+%g4] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L900000155 +! + + .L900000155: /* frequency 1.0 confidence 0.0 */ +/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1 +/* 0x0810 727 ( 0 1) */ add %o0,1,%o0 +/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4 +/* 0x0818 729 ( 1 2) */ add %i0,2,%i0 +/* 0x081c 730 ( 2 5) */ fitod %f0,%f2 +/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1 +/* 0x0824 732 ( 2 3) */ add %g3,2,%g3 +/* 0x0828 733 ( 3 6) */ fitod %f1,%f0 +/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1 +/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0 +/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2 +/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2 +/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1 +/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0 +/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0 +/* 0x084c 742 (11 14) */ fdtox %f2,%f2 +/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4] +/* 0x0854 744 (12 15) */ fdtox %f0,%f0 +/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4] +/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50 +/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0 + +! +! ENTRY .L900000156 +! + + .L900000156: /* frequency 1.0 confidence 0.0 */ +/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50 +/* 0x086c ( 0 1) */ or %g0,%g5,%o3 +/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1 +/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3 +/* 0x0878 763 ( 1 2) */ or %g0,0,%i0 +/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0 +/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2 +/* 0x0888 767 ( 3 6) */ ld [%o5],%i3 +/* 0x088c 768 ( 3 4) */ or %g0,8,%g2 +/* 0x0890 769 ( 3 4) */ or %g0,16,%o4 +/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3 +/* 0x0898 771 ( 4 5) */ or %g0,3,%i0 +/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1 +/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2 +/* 0x08a4 774 ( 6 7) */ st %o1,[%o7] +/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1 +/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2 +/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0 +/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1 +/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1 +/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0 +/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4] +/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0 + +! +! ENTRY .L900000140 +! + + .L900000140: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1 +/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4 +/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2 +/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5 +/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1 +/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0 +/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1 +/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0 +/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2] +/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5 +/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0 +/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2 +/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4 +/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2 +/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0 +/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1 +/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0 +/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1] +/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50 +/* 0x0914 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000143 +! + + .L900000143: /* frequency 1.0 confidence 0.0 */ +/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3 +/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0 +/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0 +/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2] +/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5 +/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0934 ( 3 4) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0 + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2 +/* 0x0940 817 ( 0 1) */ add %i0,1,%i0 +/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4 +/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2 +/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0 +/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1 +/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0 +/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0 +/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2] +/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5 +/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50 +/* 0x096c ( 7 8) */ sra %i0,0,%o0 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ + +/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0 +/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x097c ( 4 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4 +/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6 +/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3 +/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351] +/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4 +/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355] +/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12 +/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10 +/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4 +/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18 +/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12 +/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9 +/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10 +/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15 +/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22 +/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20 +/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12 +/* 0x09d4 876 (10 13) */ ld [%i1],%g2 +/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3 +/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22 +/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4 +/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18 +/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5 +/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12 +/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0 +/* 0x09f8 885 (14 17) */ fitod %f18,%f26 +/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1 +/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20 +/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2 +/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3 +/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8 +/* 0x0a10 891 (20 21) */ fmovs %f6,%f14 +/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8 +/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6 +/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14 +/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10 +/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24 +/* 0x0a28 897 (28 31) */ fdtox %f14,%f14 +/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335] +/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28 +/* 0x0a34 900 (29 32) */ fitod %f19,%f14 +/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18 +/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10 +/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343] +/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4 +/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22 +/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18 +/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311] +/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10 +/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4 +/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12 +/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319] +/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12 +/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5 +/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4 +/* 0x0a70 915 (35 38) */ fdtox %f4,%f4 +/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327] +/* 0x0a78 917 (36 39) */ fdtox %f28,%f24 +/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303] +/* 0x0a80 919 (37 40) */ fitod %f20,%f4 +/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4 +/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24 +/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7 +/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14 +/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4 +/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12 +/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5 +/* 0x0aa0 927 (39 42) */ fitod %f21,%f18 +/* 0x0aa4 928 (40 41) */ st %o4,[%i0] +/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7 +/* 0x0aac 930 (40 43) */ fdtox %f22,%f20 +/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287] +/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10 +/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5 +/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20 +/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295] +/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7 +/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4 +/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14 +/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2 +/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10 +/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3 +/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271] +/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3 +/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12 +/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0 +/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10 +/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16 +/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279] +/* 0x0af8 949 (46 49) */ fdtox %f20,%f12 +/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4 +/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255] +/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0 +/* 0x0b08 953 (47 50) */ fdtox %f14,%f12 +/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5 +/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2 +/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8 +/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1 +/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0 +/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4 +/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6 +/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263] +/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2 +/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4 +/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0 +/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5 +/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2 +/* 0x0b40 967 (52 53) */ st %g3,[%i0+4] +/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2 +/* 0x0b48 969 (52 55) */ fdtox %f16,%f10 +/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7 +/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2 +/* 0x0b54 972 (53 56) */ fdtox %f4,%f4 +/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239] +/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7 +/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247] +/* 0x0b64 976 (55 58) */ fdtox %f8,%f4 +/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7 +/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5 +/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0 +/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223] +/* 0x0b78 981 (57 60) */ fdtox %f6,%f4 +/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5 +/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7 +/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231] +/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0 +/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5 +/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1 +/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5 +/* 0x0b98 989 (61 62) */ st %g2,[%i0+12] +/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0 +/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4 +/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1 +/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1 +/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7 +/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3 +/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5 +/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4 +/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16] +/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2 +/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20] +/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4 +/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8] +/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2 +/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4 +/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24] +/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2 +/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3 +/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2 +/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2 +/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28] +/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3 +/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0 +/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0bfc (76 77) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0 +/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4 +/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483] +/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2 +/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479] +/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8 +/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10 +/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14 +/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4 +/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13 +/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7 +/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10 +/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14 +/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12 +/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6 +/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12 +/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6 +/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8 +/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18 +/* 0x0c64 1056 (20 23) */ fitod %f15,%f14 +/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10 +/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8 +/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20 +/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4 +/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14 +/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10 +/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463] +/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10 +/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18 +/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471] +/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8 +/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4 +/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447] +/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4 +/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18 +/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455] +/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10 +/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439] +/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14 +/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8 +/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431] +/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10 +/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4 +/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8 +/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14 +/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423] +/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10 +/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4 +/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415] +/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8 +/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10 +/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14 +/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8 +/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10 +/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14 +/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18 +/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4 +/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8 +/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20 +/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10 +/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4 +/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8 +/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399] +/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8 +/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18 +/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407] +/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10 +/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383] +/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14 +/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10 +/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18 +/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391] +/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8 +/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375] +/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4 +/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14 +/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367] +/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8 +/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10 +/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4 +/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351] +/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8 +/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14 +/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359] +/* 0x0d68 1124 (54 57) */ fitod %f8,%f4 +/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10 +/* 0x0d70 1126 (56 59) */ fitod %f9,%f8 +/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4 +/* 0x0d78 1128 (58 61) */ fitod %f10,%f14 +/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8 +/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18 +/* 0x0d84 1131 (60 63) */ fitod %f11,%f10 +/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4 +/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14 +/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20 +/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8 +/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10 +/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4 +/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335] +/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4 +/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18 +/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343] +/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8 +/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319] +/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14 +/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8 +/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18 +/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327] +/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4 +/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311] +/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10 +/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14 +/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303] +/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4 +/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8 +/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10 +/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287] +/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4 +/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14 +/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295] +/* 0x0df8 1160 (74 77) */ fitod %f4,%f10 +/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4 +/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8 +/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10 +/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4 +/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14 +/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18 +/* 0x0e14 1167 (80 83) */ fitod %f9,%f8 +/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10 +/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20 +/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14 +/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18 +/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279] +/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4 +/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10 +/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271] +/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10 +/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263] +/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4 +/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255] +/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10 +/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4 +/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10 +/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247] +/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4 +/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239] +/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2 +/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4 +/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3 +/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4 +/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2 +/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5 +/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2 +/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6 +/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3 +/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4 +/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4 +/* 0x0e8c 1200 (96 97) */ st %g4,[%i0] +/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4 +/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0 +/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2 +/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6 +/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3 +/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2 +/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4 +/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5 +/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1 +/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2 +/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3 +/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4 +/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4] +/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223] +/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2 +/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3 +/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2 +/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5 +/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2 +/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231] +/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0 +/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4 +/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5 +/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2 +/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8] +/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0 +/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3 +/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5 +/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3 +/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1 +/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3 +/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12] +/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5 +/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0 +/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2 +/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5 +/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4 +/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2 +/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1 +/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5 +/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2 +/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16] +/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0 +/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3 +/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5 +/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3 +/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4 +/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3 +/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1 +/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5 +/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20] +/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2 +/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5 +/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0 +/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2 +/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4 +/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2 +/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1 +/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5 +/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24] +/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3 +/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5 +/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0 +/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3 +/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4 +/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3 +/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5 +/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1 +/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2 +/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5 +/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4 +/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0 +/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2 +/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28] +/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5 +/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2 +/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1 +/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3 +/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5 +/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0 +/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4 +/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3 +/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5 +/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32] +/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3 +/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2 +/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5 +/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4 +/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0 +/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1 +/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5 +/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36] +/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3 +/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5 +/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2 +/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4 +/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2 +/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1 +/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5 +/* 0x101c 1303 (141 142) */ st %g2,[%i0+40] +/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0 +/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2 +/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5 +/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3 +/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4 +/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3 +/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1 +/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5 +/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0 +/* 0x1044 1313 (145 146) */ st %g3,[%i0+44] +/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3 +/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5 +/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2 +/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1 +/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2 +/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4 +/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5 +/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0 +/* 0x1068 1322 (149 150) */ st %g2,[%i0+48] +/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3 +/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1 +/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2 +/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3 +/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5 +/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0 +/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4 +/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2 +/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5 +/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1 +/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2 +/* 0x1098 1334 (154 155) */ st %g3,[%i0+52] +/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3 +/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56] +/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2 +/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3 +/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2 +/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60] +/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3 +/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0 +/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x10c0 (161 162) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + + + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + + +/* 0x124c 1476 ( 0 0) */ .type mul_add,2 +/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x124c 1480 ( 0 0) */ .align 8 +/* 0x1250 1486 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp +/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3 +/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2 +/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0 +/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1 +/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result = +/* 0x1268 ( 4 5) */ srl %i3,0,%o4 +/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0 +/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0 +/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2 +/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x1278 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160 +/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object +/* 0x1280 10 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c new file mode 100644 index 0000000000..94e86eedb9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpvalpha.c @@ -0,0 +1,183 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include + +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + Plo = asm("mulq %a0, %a1, %v0", a, b); \ + Phi = asm("umulh %a0, %a1, %v0", a, b); \ + } + +/* This is empty for the loop in s_mpv_mul_d */ +#define CARRY_ADD + +#define ONE_MUL \ + a_i = *a++; \ + MP_MUL_DxD(a_i, b, a1b1, a0b0); \ + a0b0 += carry; \ + if (a0b0 < carry) \ + ++a1b1; \ + CARRY_ADD \ + *c++ = a0b0; \ + carry = a1b1; + +#define FOUR_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL + +#define SIXTEEN_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL + +#define THIRTYTWO_MUL \ + SIXTEEN_MUL \ + SIXTEEN_MUL + +#define ONETWENTYEIGHT_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL + +#define EXPAND_256(CALL) \ + mp_digit carry = 0; \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + if (a_len & 255) { \ + if (a_len & 1) { \ + ONE_MUL \ + } \ + if (a_len & 2) { \ + ONE_MUL \ + ONE_MUL \ + } \ + if (a_len & 4) { \ + FOUR_MUL \ + } \ + if (a_len & 8) { \ + FOUR_MUL \ + FOUR_MUL \ + } \ + if (a_len & 16) { \ + SIXTEEN_MUL \ + } \ + if (a_len & 32) { \ + THIRTYTWO_MUL \ + } \ + if (a_len & 64) { \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + } \ + if (a_len & 128) { \ + ONETWENTYEIGHT_MUL \ + } \ + a_len = a_len & (-256); \ + } \ + if (a_len >= 256) { \ + carry = CALL(a, a_len, b, c, carry); \ + c += a_len; \ + } + +#define FUNC_NAME(NAME) \ + mp_digit NAME(const mp_digit *a, \ + mp_size a_len, \ + mp_digit b, mp_digit *c, \ + mp_digit carry) + +#define DECLARE_MUL_256(FNAME) \ + FUNC_NAME(FNAME) \ + { \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + while (a_len) { \ + ONETWENTYEIGHT_MUL \ + ONETWENTYEIGHT_MUL \ + a_len -= 256; \ + } \ + return carry; \ + } + +/* Expanding the loop in s_mpv_mul_d appeared to slow down the + (admittedly) small number of tests (i.e., timetest) used to + measure performance, so this define disables that optimization. */ +#define DO_NOT_EXPAND 1 + +/* Need forward declaration so it can be instantiated after + the routine that uses it; this helps locality somewhat */ +#if !defined(DO_NOT_EXPAND) +FUNC_NAME(s_mpv_mul_d_MUL256); +#endif + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ +#if defined(DO_NOT_EXPAND) + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } +#else + EXPAND_256(s_mpv_mul_d_MUL256) +#endif + *c = carry; +} + +#if !defined(DO_NOT_EXPAND) +DECLARE_MUL_256(s_mpv_mul_d_MUL256) +#endif + +#undef CARRY_ADD +/* This is redefined for the loop in s_mpv_mul_d_add */ +#define CARRY_ADD \ + a0b0 += a_i = *c; \ + if (a0b0 < a_i) \ + ++a1b1; + +/* Need forward declaration so it can be instantiated between the + two routines that use it; this helps locality somewhat */ +FUNC_NAME(s_mpv_mul_d_add_MUL256); + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + *c = carry; +} + +/* Instantiate multiply 256 routine here */ +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +} diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c new file mode 100644 index 0000000000..461d40ab36 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mulsqr.c @@ -0,0 +1,84 @@ +/* + * Test whether to include squaring code given the current settings + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include + +#define MP_SQUARE 1 /* make sure squaring code is included */ + +#include "mpi.h" +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + int ntests, prec, ix; + unsigned int seed; + clock_t start, stop; + double multime, sqrtime; + mp_int a, c; + + seed = (unsigned int)time(NULL); + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((ntests = abs(atoi(argv[1]))) == 0) { + fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]); + return 1; + } + if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) { + fprintf(stderr, "%s: must request at least %d bits.\n", argv[0], + CHAR_BIT); + return 1; + } + + prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; + + mp_init_size(&a, prec); + mp_init_size(&c, 2 * prec); + + /* Test multiplication by self */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_mul(&a, &a, &c); + } + stop = clock(); + + multime = (double)(stop - start) / CLOCKS_PER_SEC; + + /* Test squaring */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_sqr(&a, &c); + } + stop = clock(); + + sqrtime = (double)(stop - start) / CLOCKS_PER_SEC; + + printf("Multiply: %.4f\n", multime); + printf("Square: %.4f\n", sqrtime); + if (multime < sqrtime) { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime)); + printf("Prefer: multiply\n"); + } else { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime)); + printf("Prefer: square\n"); + } + + mp_clear(&a); + mp_clear(&c); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c new file mode 100644 index 0000000000..3e64a2acaa --- /dev/null +++ b/security/nss/lib/freebl/mpi/primes.c @@ -0,0 +1,841 @@ +/* + * These tables of primes wwere generated using the 'sieve' program + * (sieve.c) and converted to this format with 'ptab.pl'. + * + * The 'small' table is just the first 128 primes. The 'large' table + * is a table of all the prime values that will fit into a single + * mp_digit (given the current size of an mp_digit, which is two bytes). + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if SMALL_TABLE +#define MP_PRIME_TAB_SIZE 128 +#else +#define MP_PRIME_TAB_SIZE 6542 +#endif + +const int prime_tab_size = MP_PRIME_TAB_SIZE; +const mp_digit prime_tab[] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, +#if !SMALL_TABLE + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, + 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D, + 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3, + 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713, + 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755, + 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D, + 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3, + 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815, + 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851, + 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B, + 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB, + 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907, + 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949, + 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977, + 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7, + 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13, + 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61, + 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85, + 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5, + 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1, + 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D, + 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B, + 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3, + 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07, + 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F, + 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95, + 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5, + 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13, + 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55, + 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3, + 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5, + 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17, + 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57, + 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F, + 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5, + 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17, + 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53, + 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3, + 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9, + 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021, + 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073, + 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3, + 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1, + 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139, + 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181, + 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3, + 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D, + 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241, + 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F, + 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD, + 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D, + 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367, + 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393, + 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF, + 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F, + 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471, + 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD, + 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517, + 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543, + 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583, + 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5, + 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615, + 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F, + 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F, + 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9, + 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709, + 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777, + 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9, + 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5, + 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843, + 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F, + 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9, + 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB, + 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949, + 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3, + 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED, + 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D, + 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B, + 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1, + 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB, + 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B, + 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73, + 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1, + 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27, + 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55, + 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5, + 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21, + 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D, + 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89, + 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5, + 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B, + 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D, + 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD, + 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF, + 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49, + 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97, + 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1, + 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027, + 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063, + 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1, + 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB, + 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159, + 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1, + 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5, + 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B, + 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263, + 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D, + 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED, + 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333, + 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383, + 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9, + 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B, + 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F, + 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1, + 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9, + 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513, + 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573, + 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1, + 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605, + 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F, + 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681, + 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9, + 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735, + 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773, + 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3, + 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807, + 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F, + 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881, + 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB, + 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923, + 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983, + 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3, + 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13, + 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B, + 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9, + 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB, + 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F, + 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99, + 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3, + 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35, + 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87, + 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB, + 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F, + 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89, + 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3, + 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25, + 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F, + 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5, + 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB, + 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45, + 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81, + 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB, + 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023, + 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071, + 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9, + 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF, + 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D, + 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D, + 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3, + 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215, + 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B, + 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7, + 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9, + 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335, + 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379, + 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB, + 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419, + 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D, + 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB, + 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D, + 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571, + 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B, + 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7, + 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637, + 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D, + 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1, + 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F, + 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D, + 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3, + 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847, + 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D, + 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5, + 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923, + 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B, + 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1, + 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5, + 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27, + 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D, + 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1, + 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21, + 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71, + 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD, + 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3, + 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29, + 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71, + 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1, + 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B, + 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F, + 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D, + 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3, + 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F, + 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87, + 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7, + 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37, + 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79, + 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD, + 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B, + 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D, + 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB, + 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115, + 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B, + 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB, + 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F, + 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261, + 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D, + 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307, + 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F, + 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF, + 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED, + 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B, + 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F, + 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF, + 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513, + 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577, + 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7, + 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609, + 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645, + 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D, + 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF, + 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F, + 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771, + 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF, + 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807, + 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855, + 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1, + 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919, + 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969, + 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1, + 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41, + 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89, + 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF, + 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B, + 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD, + 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF, + 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D, + 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55, + 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93, + 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05, + 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51, + 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B, + 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9, + 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35, + 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85, + 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5, + 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09, + 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D, + 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9, + 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF, + 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047, + 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7, + 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B, + 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149, + 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7, + 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7, + 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243, + 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297, + 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9, + 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341, + 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387, + 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9, + 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413, + 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455, + 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D, + 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7, + 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D, + 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585, + 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED, + 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F, + 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B, + 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD, + 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707, + 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761, + 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5, + 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F, + 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D, + 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB, + 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1, + 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B, + 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F, + 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED, + 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17, + 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77, + 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB, + 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19, + 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79, + 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1, + 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29, + 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F, + 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF, + 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05, + 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47, + 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95, + 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7, + 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B, + 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49, + 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5, + 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27, + 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65, + 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5, + 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F, + 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085, + 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9, + 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B, + 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191, + 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1, + 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241, + 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295, + 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD, + 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F, + 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377, + 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB, + 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427, + 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475, + 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7, + 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521, + 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F, + 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD, + 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607, + 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B, + 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B, + 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5, + 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F, + 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791, + 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809, + 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B, + 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871, + 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7, + 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917, + 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B, + 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB, + 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07, + 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B, + 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1, + 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39, + 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93, + 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF, + 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59, + 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83, + 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3, + 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25, + 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61, + 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7, + 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15, + 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B, + 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3, + 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F, + 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D, + 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5, + 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9, + 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027, + 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079, + 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7, + 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121, + 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183, + 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB, + 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219, + 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277, + 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB, + 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303, + 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F, + 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1, + 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D, + 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D, + 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1, + 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D, + 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D, + 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5, + 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633, + 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685, + 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1, + 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741, + 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD, + 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF, + 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853, + 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B, + 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD, + 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F, + 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993, + 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF, + 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11, + 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59, + 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1, + 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D, + 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71, + 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7, + 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19, + 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81, + 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7, + 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F, + 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D, + 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5, + 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F, + 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B, + 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7, + 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7, + 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D, + 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87, + 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED, + 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F, + 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095, + 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB, + 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D, + 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D, + 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB, + 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231, + 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F, + 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7, + 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B, + 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D, + 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F, + 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7, + 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425, + 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483, + 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF, + 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D, + 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9, + 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9, + 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639, + 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699, + 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1, + 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B, + 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777, + 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5, + 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F, + 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B, + 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED, + 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927, + 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981, + 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB, + 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35, + 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79, + 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD, + 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13, + 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF, + 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF, + 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53, + 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F, + 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9, + 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41, + 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7, + 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5, + 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69, + 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF, + 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB, + 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45, + 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D, + 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9, + 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023, + 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053, + 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF, + 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113, + 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167, + 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9, + 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215, + 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275, + 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD, + 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7, + 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D, + 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5, + 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409, + 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469, + 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1, + 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535, + 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589, + 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD, + 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635, + 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9, + 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD, + 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739, + 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787, + 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9, + 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F, + 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887, + 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD, + 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929, + 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D, + 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7, + 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13, + 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75, + 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF, + 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F, + 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D, + 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD, + 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23, + 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67, + 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1, + 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15, + 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97, + 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1, + 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47, + 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95, + 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05, + 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D, + 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3, + 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5, + 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057, + 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5, + 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1, + 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129, + 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D, + 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED, + 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243, + 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B, + 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5, + 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B, + 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387, + 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5, + 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F, + 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459, + 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3, + 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3, + 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553, + 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D, + 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9, + 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D, + 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691, + 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD, + 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F, + 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759, + 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9, + 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805, + 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F, + 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7, + 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937, + 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3, + 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15, + 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59, + 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB, + 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07, + 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73, + 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5, + 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09, + 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57, + 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9, + 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED, + 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D, + 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1, + 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05, + 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61, + 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7, + 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD, + 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57, + 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3, + 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015, + 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069, + 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF, + 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119, + 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173, + 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD, + 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D, + 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281, + 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9, + 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D, + 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B, + 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B, + 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B, + 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5, + 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501, + 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F, + 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF, + 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609, + 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659, + 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5, + 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF, + 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755, + 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3, + 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827, + 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893, + 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5, + 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F, + 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D, + 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1, + 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25, + 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65, + 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF, + 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F, + 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61, + 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF, + 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33, + 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F, + 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3, + 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F, + 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F, + 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3, + 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27, + 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D, + 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9, + 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39, + 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89, + 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD, + 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029, + 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D, + 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7, + 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119, + 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179, + 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD, + 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221, + 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B, + 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD, + 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313, + 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371, + 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5, + 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF, + 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461, + 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7, + 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509, + 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F, + 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3, + 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641, + 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1, + 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1, + 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757, + 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD, + 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803, + 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857, + 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7, + 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF, + 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937, + 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989, + 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9, + 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37, + 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F, + 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9, + 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29, + 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87, + 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7, + 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B, + 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91, + 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15, + 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57, + 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB, + 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD, + 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65, + 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7, + 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07, + 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61, + 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9, + 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7, + 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F, + 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB, + 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D, + 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171, + 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD, + 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217, + 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279, + 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3, + 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315, + 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B, + 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD, + 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B, + 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D, + 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1, + 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F, + 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565, + 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1, + 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D, + 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687, + 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1, + 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F, + 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7, + 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB, + 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D, + 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF, + 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933, + 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F, + 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9, + 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09, + 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51, + 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D, + 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5, + 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43, + 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD, + 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D, + 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F, + 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9, + 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF, + 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53, + 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B, + 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9, + 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D, + 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77, + 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1, + 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F, + 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67, + 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5, + 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005, + 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071, + 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7, + 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135, + 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F, + 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1, + 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219, + 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279, + 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD, + 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327, + 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363, + 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9, + 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419, + 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455, + 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD, + 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537, + 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585, + 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615, + 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657, + 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695, + 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3, + 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F, + 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B, + 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF, + 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B, + 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869, + 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB, + 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915, + 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963, + 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7, + 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D, + 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3, + 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB, + 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B, + 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1, + 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B, + 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B, + 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD, + 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F, + 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B, + 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3, + 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19, + 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81, + 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1, + 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71, + 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5, + 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B, + 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071, + 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF, + 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7, + 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175, + 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7, + 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227, + 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277, + 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF, + 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319, + 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B, + 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409, + 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455, + 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9, + 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B, + 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B, + 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF, + 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D, + 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F, + 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB, + 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755, + 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D, + 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7, + 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D, + 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877, + 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7, + 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9, + 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941, + 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3, + 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13, + 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B, + 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF, + 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F, + 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7, + 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37, + 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73, + 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD, + 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51, + 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99, + 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03, + 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D, + 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93, + 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3, + 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D, + 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D, + 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1 +#endif +}; diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il new file mode 100644 index 0000000000..d2e8024ac2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_32.il @@ -0,0 +1,1291 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! The interface to the VIS instructions as declared below (and in the VIS +! User's Manual) will not change, but the macro implementation might change +! in the future. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,8 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,8 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,8 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,8 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,8 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,8 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,8 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,8 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,8 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,8 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,8 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,8 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,8 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,8 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + faligndata %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple16 %f4,%f10,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne16 %f4,%f10,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple32 %f4,%f10,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne32 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt16 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq16 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt32 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq32 %f4,%f10,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + st %o2,[%sp+0x4c] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16au %f4,%f10,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16al %f4,%f10,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + std %o0,[%sp+0x40] + ldd [%sp+0x40],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd16 %f4,%f10,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd16s %f4,%f10,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd32 %f4,%f10,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd32s %f4,%f10,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub16 %f4,%f10,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub16s %f4,%f10,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub32 %f4,%f10,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub32s %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + std %f0,[%o4] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpackfix %f4,%f0 + fpackfix %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack32 %f4,%f10,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpackfix %f4,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f6 + fpackfix %f4,%f0 + fpackfix %f6,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); +! + .inline vis_pdist,24 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + pdist %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpmerge %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fmovs %f5, %f2 + fexpand %f2,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnor %f4,%f10,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnors %f4,%f10,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fandnot1 %f4,%f10,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fandnot1s %f4,%f10,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fnot1 %f4,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fnot1s %f4,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxor %f4,%f10,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxors %f4,%f10,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnand %f4,%f10,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnands %f4,%f10,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fand %f4,%f10,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fands %f4,%f10,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxnor %f4,%f10,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxnors %f4,%f10,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fsrc1 %f4,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fsrc1s %f4,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fornot1 %f4,%f10,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fornot1s %f4,%f10,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + for %f4,%f10,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fors %f4,%f10,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,16 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,4 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,8 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,8 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,8 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,4 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,4 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,8 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,4 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,4 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,8 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,4 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,4 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,8 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,4 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array8 %o3,%o2,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array16 %o3,%o2,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array32 %o3,%o2,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0; + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + st %o0,[%sp+0x48] ! store float frs1 + ld [%sp+0x48],%f0 + st %o1,[%sp+0x48] ! store float frs2 + ld [%sp+0x48],%f1 + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,4 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,4 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,4 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,4 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,4 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,4 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,4 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,4 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,4 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,8 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,8 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,4 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,4 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il new file mode 100644 index 0000000000..cbe2b5aa27 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_64.il @@ -0,0 +1,997 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! This file is to be used in place of vis.il in 64-bit builds. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,16 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,16 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,16 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,16 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,16 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,16 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,16 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,16 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,16 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,16 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,16 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,16 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,12 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,12 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + faligndata %f0,%f2,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + fcmple16 %f0,%f2,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + fcmpne16 %f0,%f2,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + fcmple32 %f0,%f2,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + fcmpne32 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + fcmpgt16 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + fcmpeq16 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + fcmpgt32 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + fcmpeq32 %f0,%f2,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + fmul8x16 %f1,%f2,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + fmul8x16 %f1,%f4,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + fmul8x16au %f1,%f3,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + fmul8x16al %f1,%f3,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + fmul8sux16 %f0,%f2,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + fmul8ulx16 %f0,%f2,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + fmuld8sux16 %f1,%f3,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + fmuld8ulx16 %f1,%f3,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + fpadd16 %f0,%f2,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + fpadd16s %f1,%f3,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + fpadd32 %f0,%f2,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + fpadd32s %f1,%f3,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + fpsub16 %f0,%f2,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + fpsub16s %f1,%f3,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + fpsub32 %f0,%f2,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + fpsub32s %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + fpack16 %f0,%f0 + .end +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + std %f0,[%o2] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,24 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + fpack16 %f2,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + fpack16 %f2,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + fpack32 %f0,%f2,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + fpackfix %f0,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, +! double pxls2 /*frs2*/); +! + .inline vis_pxldist64,24 + pdist %f2,%f4,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + fpmerge %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + fexpand %f1,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + fexpand %f0,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + fexpand %f1,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + fnor %f0,%f2,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + fnors %f1,%f3,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + fandnot1 %f0,%f2,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + fandnot1s %f1,%f3,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + fnot1 %f0,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + fnot1s %f1,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + fxor %f0,%f2,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + fxors %f1,%f3,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + fnand %f0,%f2,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + fnands %f1,%f3,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + fand %f0,%f2,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + fands %f1,%f3,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + fxnor %f0,%f2,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + fxnors %f1,%f3,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + fsrc1 %f0,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + fsrc1s %f1,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + fornot1 %f0,%f2,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + fornot1s %f1,%f3,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + for %f0,%f2,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + fors %f1,%f3,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,20 + stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,20 + stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,28 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,20 + stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,20 + stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,20 + stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,20 + stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,20 + stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,16 + stda %f0,[%o1]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,24 + stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,16 + stda %f0,[%o1]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,16 + stda %f0,[%o1]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,24 + stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,16 + stda %f0,[%o1]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,16 + stda %f0,[%o1]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,16 + stda %f0,[%o1]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,16 + stda %f0,[%o1]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,16 + stda %f0,[%o1]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,8 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,16 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,12 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,12 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,8 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,8 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,16 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,8 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,8 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,16 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,8 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,8 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,16 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,8 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + array8 %o0,%o1,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + array16 %o0,%o1,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + array32 %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + fmovs %f0,%f0 + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + fmovs %f1,%f0 ! %f1 = float frs1; put in hi; + fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1; + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + st %o1,[%sp+2183] + ld [%sp+2183],%f1 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + stx %o0,[%sp+2183] + ldd [%sp+2183],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,8 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,8 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,8 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,8 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,8 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,8 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,12 + sta %f1,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,12 + sta %f1,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,12 + sta %f1,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,16 + stda %f0,[%o1+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,16 + stda %f0,[%o1]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,16 + stda %f0,[%o1]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,8 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,8 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,8 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,16 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,16 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,8 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,8 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h new file mode 100644 index 0000000000..275de59df8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_proto.h @@ -0,0 +1,234 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Prototypes for the inline templates in vis.il + */ + +#ifndef VIS_PROTO_H +#define VIS_PROTO_H + +#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Pure edge handling instructions */ +int vis_edge8(void * /*frs1*/, void * /*frs2*/); +int vis_edge8l(void * /*frs1*/, void * /*frs2*/); +int vis_edge16(void * /*frs1*/, void * /*frs2*/); +int vis_edge16l(void * /*frs1*/, void * /*frs2*/); +int vis_edge32(void * /*frs1*/, void * /*frs2*/); +int vis_edge32l(void * /*frs1*/, void * /*frs2*/); + +/* Edge handling instructions with negative return values if cc set. */ +int vis_edge8cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/); + +/* Alignment instructions. */ +void *vis_alignaddr(void * /*rs1*/, int /*rs2*/); +void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/); +double vis_faligndata(double /*frs1*/, double /*frs2*/); + +/* Partitioned comparison instructions. */ +int vis_fcmple16(double /*frs1*/, double /*frs2*/); +int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +int vis_fcmple32(double /*frs1*/, double /*frs2*/); +int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); + +/* Partitioned multiplication. */ +#if 0 +double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +#endif +double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); + +/* Partitioned addition & subtraction. */ +double vis_fpadd16(double /*frs1*/, double /*frs2*/); +float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +double vis_fpadd32(double /*frs1*/, double /*frs2*/); +float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +double vis_fpsub16(double /*frs1*/, double /*frs2*/); +float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +double vis_fpsub32(double /*frs1*/, double /*frs2*/); +float vis_fpsub32s(float /*frs1*/, float /*frs2*/); + +/* Pixel packing & clamping. */ +float vis_fpack16(double /*frs2*/); +double vis_fpack32(double /*frs1*/, double /*frs2*/); +float vis_fpackfix(double /*frs2*/); + +/* Combined pack ops. */ +double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +void vis_st2_fpack16(double, double, double *); +void vis_std_fpack16(double, double, double *); +void vis_st2_fpackfix(double, double, double *); + +double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); + +/* Motion estimation. */ +double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); + +/* Channel merging. */ +double vis_fpmerge(float /*frs1*/, float /*frs2*/); + +/* Pixel expansion. */ +double vis_fexpand(float /*frs2*/); +double vis_fexpand_hi(double /*frs2*/); +double vis_fexpand_lo(double /*frs2*/); + +/* Bitwise logical operators. */ +double vis_fnor(double /*frs1*/, double /*frs2*/); +float vis_fnors(float /*frs1*/, float /*frs2*/); +double vis_fandnot(double /*frs1*/, double /*frs2*/); +float vis_fandnots(float /*frs1*/, float /*frs2*/); +double vis_fnot(double /*frs1*/); +float vis_fnots(float /*frs1*/); +double vis_fxor(double /*frs1*/, double /*frs2*/); +float vis_fxors(float /*frs1*/, float /*frs2*/); +double vis_fnand(double /*frs1*/, double /*frs2*/); +float vis_fnands(float /*frs1*/, float /*frs2*/); +double vis_fand(double /*frs1*/, double /*frs2*/); +float vis_fands(float /*frs1*/, float /*frs2*/); +double vis_fxnor(double /*frs1*/, double /*frs2*/); +float vis_fxnors(float /*frs1*/, float /*frs2*/); +double vis_fsrc(double /*frs1*/); +float vis_fsrcs(float /*frs1*/); +double vis_fornot(double /*frs1*/, double /*frs2*/); +float vis_fornots(float /*frs1*/, float /*frs2*/); +double vis_for(double /*frs1*/, double /*frs2*/); +float vis_fors(float /*frs1*/, float /*frs2*/); +double vis_fzero(void); +float vis_fzeros(void); +double vis_fone(void); +float vis_fones(void); + +/* Partial stores. */ +void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/, + void * /*rs3*/, int /*rmask*/); +void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/); + +/* Byte & short stores. */ +void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/); + +/* Byte & short loads. */ +double vis_lddfa_ASI_FL8P(void * /*rs1*/); +double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8S(void * /*rs1*/); +double vis_lddfa_ASI_FL16P(void * /*rs1*/); +double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL16S(void * /*rs1*/); +double vis_lddfa_ASI_FL8PL(void * /*rs1*/); +double vis_lddfa_ASI_FL8SL(void * /*rs1*/); +double vis_lddfa_ASI_FL16PL(void * /*rs1*/); +double vis_lddfa_ASI_FL16SL(void * /*rs1*/); + +/* Direct write to GSR, read from GSR */ +void vis_write_gsr(unsigned int /*GSR*/); +unsigned int vis_read_gsr(void); + +/* Voxel texture mapping. */ +#if !defined(_NO_LONGLONG) +unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Register aliasing and type casts. */ +float vis_read_hi(double /*frs1*/); +float vis_read_lo(double /*frs1*/); +double vis_write_hi(double /*frs1*/, float /*frs2*/); +double vis_write_lo(double /*frs1*/, float /*frs2*/); +double vis_freg_pair(float /*frs1*/, float /*frs2*/); +float vis_to_float(unsigned int /*value*/); +double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +double vis_to_double_dup(unsigned int /*value*/); +#if !defined(_NO_LONGLONG) +double vis_ll_to_double(unsigned long long /*value*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Miscellany (no inlines) */ +void vis_error(char * /*fmt*/, int /*a0*/); +void vis_sim_init(void); + +/* For better performance */ +#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg)) + +/* Nicknames for explicit ASI loads and stores. */ +#define vis_st_u8 vis_stdfa_ASI_FL8P +#define vis_st_u8_i vis_stdfa_ASI_FL8P_index +#define vis_st_u8_le vis_stdfa_ASI_FL8PL +#define vis_st_u16 vis_stdfa_ASI_FL16P +#define vis_st_u16_i vis_stdfa_ASI_FL16P_index +#define vis_st_u16_le vis_stdfa_ASI_FL16PL + +#define vis_ld_u8 vis_lddfa_ASI_FL8P +#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index +#define vis_ld_u8_le vis_lddfa_ASI_FL8PL +#define vis_ld_u16 vis_lddfa_ASI_FL16P +#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index +#define vis_ld_u16_le vis_lddfa_ASI_FL16PL + +#define vis_pst_8 vis_stdfa_ASI_PST8P +#define vis_pst_16 vis_stdfa_ASI_PST16P +#define vis_pst_32 vis_stdfa_ASI_PST32P + +#define vis_st_u8s vis_stdfa_ASI_FL8S +#define vis_st_u8s_le vis_stdfa_ASI_FL8SL +#define vis_st_u16s vis_stdfa_ASI_FL16S +#define vis_st_u16s_le vis_stdfa_ASI_FL16SL + +#define vis_ld_u8s vis_lddfa_ASI_FL8S +#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL +#define vis_ld_u16s vis_lddfa_ASI_FL16S +#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL + +#define vis_pst_8s vis_stdfa_ASI_PST8S +#define vis_pst_16s vis_stdfa_ASI_PST16S +#define vis_pst_32s vis_stdfa_ASI_PST32S + +/* "<" and ">=" may be implemented in terms of ">" and "<=". */ +#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a)) +#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a)) +#define vis_fcmpge16(a, b) vis_fcmple16((b), (a)) +#define vis_fcmpge32(a, b) vis_fcmple32((b), (a)) + +#ifdef __cplusplus +} // End of extern "C" +#endif /* __cplusplus */ + +#endif /* VIS_PROTO_H */ -- cgit v1.2.3